Przeglądaj źródła

Update date parser to parse more invalid date formats

Frédéric Guillot 2 lat temu
rodzic
commit
e3eaaea15a

+ 1 - 1
internal/reader/atom/atom_03.go

@@ -126,7 +126,7 @@ func (a *atom03Entry) entryDate() time.Time {
 	if dateText != "" {
 		result, err := date.Parse(dateText)
 		if err != nil {
-			slog.Warn("Unable to parse date from Atom 0.3 feed",
+			slog.Debug("Unable to parse date from Atom 0.3 feed",
 				slog.String("date", dateText),
 				slog.String("id", a.ID),
 				slog.Any("error", err),

+ 1 - 1
internal/reader/atom/atom_10.go

@@ -144,7 +144,7 @@ func (a *atom10Entry) entryDate() time.Time {
 	if dateText != "" {
 		result, err := date.Parse(dateText)
 		if err != nil {
-			slog.Warn("Unable to parse date from Atom 0.3 feed",
+			slog.Debug("Unable to parse date from Atom 0.3 feed",
 				slog.String("date", dateText),
 				slog.String("id", a.ID),
 				slog.Any("error", err),

+ 5 - 1
internal/reader/date/parser.go

@@ -219,6 +219,10 @@ var dateFormats = []string{
 	"Mon, 2rd Jan 2006 15:04:05 MST",
 	"Mon, 2nd Jan 2006 15:04:05 MST",
 	"Mon, 2st Jan 2006 15:04:05 MST",
+	"Mon, Jan 02 2006 03:04:05 PM",
+	"Monday, January 2, 2006 - 15:04",
+	"01/02/06 15:04:05",
+	"02.01.06",
 }
 
 var invalidTimezoneReplacer = strings.NewReplacer(
@@ -309,6 +313,7 @@ var invalidLocalizedDateReplacer = strings.NewReplacer(
 // Parse parses a given date string using a large
 // list of commonly found feed date formats.
 func Parse(rawInput string) (t time.Time, err error) {
+	rawInput = strings.TrimSpace(rawInput)
 	timestamp, err := strconv.ParseInt(rawInput, 10, 64)
 	if err == nil {
 		return time.Unix(timestamp, 0), nil
@@ -316,7 +321,6 @@ func Parse(rawInput string) (t time.Time, err error) {
 
 	processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
 	processedInput = invalidTimezoneReplacer.Replace(processedInput)
-	processedInput = strings.TrimSpace(processedInput)
 	if processedInput == "" {
 		return t, errors.New(`date parser: empty value`)
 	}

+ 5 - 1
internal/reader/date/parser_test.go

@@ -214,11 +214,15 @@ func TestParseWeirdDateFormat(t *testing.T) {
 		"Jun 23, 2023 19:00 GMT",
 		"09/15/2014 4:20 pm PST",
 		"Fri, 23rd Jun 2023 09:32:20 GMT",
+		"Sat, Oct 28 2023 08:28:28 PM",
+		"Monday, October 6, 2023 - 16:29\n",
+		"10/30/23 21:55:58",
+		"30.10.23",
 	}
 
 	for _, date := range dates {
 		if _, err := Parse(date); err != nil {
-			t.Errorf(`Unable to parse date: %q`, date)
+			t.Errorf(`Unable to parse date: %q (%v)`, date, err)
 		}
 	}
 }

+ 1 - 1
internal/reader/json/json.go

@@ -110,7 +110,7 @@ func (j *jsonItem) GetDate() time.Time {
 		if value != "" {
 			d, err := date.Parse(value)
 			if err != nil {
-				slog.Warn("Unable to parse date from JSON feed",
+				slog.Debug("Unable to parse date from JSON feed",
 					slog.String("date", value),
 					slog.String("url", j.URL),
 					slog.Any("error", err),

+ 1 - 1
internal/reader/rdf/rdf.go

@@ -100,7 +100,7 @@ func (r *rdfItem) entryDate() time.Time {
 	if r.DublinCoreDate != "" {
 		result, err := date.Parse(r.DublinCoreDate)
 		if err != nil {
-			slog.Warn("Unable to parse date from RDF feed",
+			slog.Debug("Unable to parse date from RDF feed",
 				slog.String("date", r.DublinCoreDate),
 				slog.String("link", r.Link),
 				slog.Any("error", err),

+ 1 - 1
internal/reader/rss/rss.go

@@ -235,7 +235,7 @@ func (r *rssItem) entryDate() time.Time {
 	if value != "" {
 		result, err := date.Parse(value)
 		if err != nil {
-			slog.Warn("Unable to parse date from RSS feed",
+			slog.Debug("Unable to parse date from RSS feed",
 				slog.String("date", value),
 				slog.String("guid", r.GUID.Data),
 				slog.Any("error", err),