4
0
Эх сурвалжийг харах

Handle more invalid dates

Frédéric Guillot 5 жил өмнө
parent
commit
b1c9977711

+ 28 - 10
reader/date/parser.go

@@ -23,6 +23,7 @@ var dateFormats = []string{
 	time.RFC1123Z,
 	time.RFC1123Z,
 	time.RFC1123,
 	time.RFC1123,
 	time.ANSIC,
 	time.ANSIC,
+	"Mon, 02 Jan 2006 15:04:05 MST -07:00",
 	"Mon, January 2, 2006, 3:04 PM MST",
 	"Mon, January 2, 2006, 3:04 PM MST",
 	"Mon, January 2 2006 15:04:05 -0700",
 	"Mon, January 2 2006 15:04:05 -0700",
 	"Mon, January 02, 2006, 15:04:05 MST",
 	"Mon, January 02, 2006, 15:04:05 MST",
@@ -38,6 +39,7 @@ var dateFormats = []string{
 	"Mon Jan 02, 2006 3:04 pm",
 	"Mon Jan 02, 2006 3:04 pm",
 	"Mon, Jan 02,2006 15:04:05 MST",
 	"Mon, Jan 02,2006 15:04:05 MST",
 	"Mon Jan 02 2006 15:04:05 -0700",
 	"Mon Jan 02 2006 15:04:05 -0700",
+	"Mon, 02/01/2006",
 	"Monday, 2. January 2006 - 15:04",
 	"Monday, 2. January 2006 - 15:04",
 	"Monday 02 January 2006",
 	"Monday 02 January 2006",
 	"Monday, January 2, 2006 15:04:05 MST",
 	"Monday, January 2, 2006 15:04:05 MST",
@@ -206,11 +208,15 @@ var dateFormats = []string{
 	"01/02/2006",
 	"01/02/2006",
 	"01-02-2006",
 	"01-02-2006",
 	"Jan. 2006",
 	"Jan. 2006",
+	"Jan. 2, 2006, 03:04 p.m.",
+	"2006-01-02 15:04:05 -07:00",
+	"2 January, 2006",
 }
 }
 
 
 var invalidTimezoneReplacer = strings.NewReplacer(
 var invalidTimezoneReplacer = strings.NewReplacer(
 	"Europe/Brussels", "CET",
 	"Europe/Brussels", "CET",
 	"GMT+0000 (Coordinated Universal Time)", "GMT",
 	"GMT+0000 (Coordinated Universal Time)", "GMT",
+	"GMT-", "GMT -",
 )
 )
 
 
 var invalidLocalizedDateReplacer = strings.NewReplacer(
 var invalidLocalizedDateReplacer = strings.NewReplacer(
@@ -246,22 +252,30 @@ var invalidLocalizedDateReplacer = strings.NewReplacer(
 	"Vendredi,", "Friday,",
 	"Vendredi,", "Friday,",
 	"Samedi,", "Saturday,",
 	"Samedi,", "Saturday,",
 	"Dimanche,", "Sunday,",
 	"Dimanche,", "Sunday,",
-	"avr ", "Apr ",
-	"mai ", "May ",
-	"jui ", "Jun ",
-	"juin ", "June ",
 	"jan.", "January ",
 	"jan.", "January ",
 	"feb.", "February ",
 	"feb.", "February ",
 	"mars.", "March ",
 	"mars.", "March ",
 	"avril.", "April ",
 	"avril.", "April ",
 	"mai.", "May ",
 	"mai.", "May ",
 	"juin.", "June ",
 	"juin.", "June ",
-	"juil.", "july",
-	"août.", "august",
-	"sept.", "september",
-	"oct.", "october",
-	"nov.", "november",
-	"dec.", "december",
+	"juil.", "July",
+	"août.", "August",
+	"sept.", "September",
+	"oct.", "October",
+	"nov.", "November",
+	"dec.", "December",
+	"janvier ", "January ",
+	"février ", "February ",
+	"mars ", "March ",
+	"avril ", "April ",
+	"mai ", "May ",
+	"juin ", "June ",
+	"juillet ", "July",
+	"août ", "August",
+	"septembre ", "September",
+	"octobre ", "October",
+	"november ", "November",
+	"décembre ", "December",
 	"Janvier", "January",
 	"Janvier", "January",
 	"Février", "February",
 	"Février", "February",
 	"Mars", "March",
 	"Mars", "March",
@@ -274,6 +288,10 @@ var invalidLocalizedDateReplacer = strings.NewReplacer(
 	"Octobre", "October",
 	"Octobre", "October",
 	"Novembre", "November",
 	"Novembre", "November",
 	"Décembre", "December",
 	"Décembre", "December",
+	"avr ", "Apr ",
+	"mai ", "May ",
+	"jui ", "Jun ",
+	"juin ", "June ",
 )
 )
 
 
 // Parse parses a given date string using a large
 // Parse parses a given date string using a large

+ 6 - 0
reader/date/parser_test.go

@@ -143,6 +143,12 @@ func TestParseWeirdDateFormat(t *testing.T) {
 		"Mon, 16th Nov 2020 13:16:28 GMT",
 		"Mon, 16th Nov 2020 13:16:28 GMT",
 		"Nov. 2020",
 		"Nov. 2020",
 		"ven., 03 juil. 2020 15:09:58 +0000",
 		"ven., 03 juil. 2020 15:09:58 +0000",
+		"Fri, 26/06/2020",
+		"Thu, 29 Oct 2020 07:36:03 GMT-07:00",
+		"jeu., 02 avril 2020 00:00:00 +0200",
+		"Jan. 4, 2016, 12:37 p.m.",
+		"2018-10-23 04:07:42 +00:00",
+		"5 August, 2019",
 	}
 	}
 
 
 	for _, date := range dates {
 	for _, date := range dates {

+ 1 - 1
reader/rdf/rdf.go

@@ -95,7 +95,7 @@ func (r *rdfItem) entryDate() time.Time {
 	if r.DublinCoreDate != "" {
 	if r.DublinCoreDate != "" {
 		result, err := date.Parse(r.DublinCoreDate)
 		result, err := date.Parse(r.DublinCoreDate)
 		if err != nil {
 		if err != nil {
-			logger.Error("rdf: %v", err)
+			logger.Error("rdf: %v (entry link = %s)", err, r.Link)
 			return time.Now()
 			return time.Now()
 		}
 		}