Browse Source

Handle some non-english date formats

Frédéric Guillot 8 years ago
parent
commit
02ba735ba9
2 changed files with 37 additions and 1 deletions
  1. 32 0
      reader/date/parser.go
  2. 5 1
      reader/date/parser_test.go

+ 32 - 0
reader/date/parser.go

@@ -71,6 +71,8 @@ var dateFormats = []string{
 	"Mon, 2 Jan 2006 15:04:05 -0700",
 	"Mon, 2 Jan 2006 15:04:05",
 	"Mon, 2 Jan 2006 15:04",
+	"Mon, 02 Jan 2006, 15:04",
+	"Mon, 2 Jan 2006, 15:04",
 	"Mon,2 Jan 2006",
 	"Mon, 2 Jan 2006",
 	"Mon, 2 Jan 15:04:05 MST",
@@ -192,6 +194,7 @@ var dateFormats = []string{
 // Parse parses a given date string using a large
 // list of commonly found feed date formats.
 func Parse(ds string) (t time.Time, err error) {
+	ds = replaceNonEnglishWords(ds)
 	d := strings.TrimSpace(ds)
 	if d == "" {
 		return t, errors.New("date parser: empty value")
@@ -211,3 +214,32 @@ func Parse(ds string) (t time.Time, err error) {
 	err = fmt.Errorf(`date parser: failed to parse date "%s"`, ds)
 	return
 }
+
+// Replace German and French dates to English.
+func replaceNonEnglishWords(ds string) string {
+	r := strings.NewReplacer(
+		"Mo,", "Mon,",
+		"Di,", "Tue,",
+		"Mi,", "Wed,",
+		"Do,", "Thu,",
+		"Fr,", "Fri,",
+		"Sa,", "Sat,",
+		"So,", "Sun,",
+		"Mär ", "Mar ",
+		"Mai ", "May ",
+		"Okt ", "Oct ",
+		"Dez ", "Dec ",
+		"lun,", "Mon,",
+		"mar,", "Tue,",
+		"mer,", "Wed,",
+		"jeu,", "Thu,",
+		"ven,", "Fri,",
+		"sam,", "Sat,",
+		"dim,", "Sun,",
+		"avr ", "Apr ",
+		"mai ", "May ",
+		"jui ", "Jun ",
+	)
+
+	return r.Replace(ds)
+}

+ 5 - 1
reader/date/parser_test.go

@@ -47,11 +47,15 @@ func TestParseWeirdDateFormat(t *testing.T) {
 		"Friday, December 22, 2017 - 3:09pm",
 		"Friday, December 8, 2017 - 3:07pm",
 		"Thu, 25 Feb 2016 00:00:00 Europe/Brussels",
+		"Mon, 09 Apr 2018, 16:04",
+		"Di, 23 Jan 2018 00:00:00 +0100",
+		"Do, 29 Mär 2018 00:00:00 +0200",
+		"mer, 9 avr 2018 00:00:00 +0200",
 	}
 
 	for _, date := range dates {
 		if _, err := Parse(date); err != nil {
-			t.Fatalf(`Unable to parse date: "%s"`, date)
+			t.Fatalf(`Unable to parse date: %q`, date)
 		}
 	}
 }