Quellcode durchsuchen

fix(filter): skip invalid rules instead of exiting the loop

Frédéric Guillot vor 9 Monaten
Ursprung
Commit
bc6ab44ff2

+ 31 - 29
internal/reader/filter/filter.go

@@ -17,30 +17,31 @@ import (
 
 func IsBlockedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool {
 	if user.BlockFilterEntryRules != "" {
-		rules := strings.SplitSeq(user.BlockFilterEntryRules, "\n")
-		for rule := range rules {
+		for rule := range strings.SplitSeq(user.BlockFilterEntryRules, "\n") {
 			match := false
+
 			parts := strings.SplitN(rule, "=", 2)
 			if len(parts) != 2 {
-				return false
+				continue
 			}
-			part, pattern := parts[0], parts[1]
 
-			switch part {
+			ruleKey, ruleValue := parts[0], parts[1]
+
+			switch ruleKey {
 			case "EntryDate":
-				match = isDateMatchingPattern(pattern, entry.Date)
+				match = isDateMatchingPattern(ruleValue, entry.Date)
 			case "EntryTitle":
-				match, _ = regexp.MatchString(pattern, entry.Title)
+				match, _ = regexp.MatchString(ruleValue, entry.Title)
 			case "EntryURL":
-				match, _ = regexp.MatchString(pattern, entry.URL)
+				match, _ = regexp.MatchString(ruleValue, entry.URL)
 			case "EntryCommentsURL":
-				match, _ = regexp.MatchString(pattern, entry.CommentsURL)
+				match, _ = regexp.MatchString(ruleValue, entry.CommentsURL)
 			case "EntryContent":
-				match, _ = regexp.MatchString(pattern, entry.Content)
+				match, _ = regexp.MatchString(ruleValue, entry.Content)
 			case "EntryAuthor":
-				match, _ = regexp.MatchString(pattern, entry.Author)
+				match, _ = regexp.MatchString(ruleValue, entry.Author)
 			case "EntryTag":
-				match = containsRegexPattern(pattern, entry.Tags)
+				match = containsRegexPattern(ruleValue, entry.Tags)
 			}
 
 			if match {
@@ -87,30 +88,31 @@ func IsBlockedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool
 
 func IsAllowedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool {
 	if user.KeepFilterEntryRules != "" {
-		rules := strings.SplitSeq(user.KeepFilterEntryRules, "\n")
-		for rule := range rules {
+		for rule := range strings.SplitSeq(user.KeepFilterEntryRules, "\n") {
 			match := false
+
 			parts := strings.SplitN(rule, "=", 2)
 			if len(parts) != 2 {
-				return false
+				continue
 			}
-			part, pattern := parts[0], parts[1]
 
-			switch part {
+			ruleKey, ruleValue := parts[0], parts[1]
+
+			switch ruleKey {
 			case "EntryDate":
-				match = isDateMatchingPattern(pattern, entry.Date)
+				match = isDateMatchingPattern(ruleValue, entry.Date)
 			case "EntryTitle":
-				match, _ = regexp.MatchString(pattern, entry.Title)
+				match, _ = regexp.MatchString(ruleValue, entry.Title)
 			case "EntryURL":
-				match, _ = regexp.MatchString(pattern, entry.URL)
+				match, _ = regexp.MatchString(ruleValue, entry.URL)
 			case "EntryCommentsURL":
-				match, _ = regexp.MatchString(pattern, entry.CommentsURL)
+				match, _ = regexp.MatchString(ruleValue, entry.CommentsURL)
 			case "EntryContent":
-				match, _ = regexp.MatchString(pattern, entry.Content)
+				match, _ = regexp.MatchString(ruleValue, entry.Content)
 			case "EntryAuthor":
-				match, _ = regexp.MatchString(pattern, entry.Author)
+				match, _ = regexp.MatchString(ruleValue, entry.Author)
 			case "EntryTag":
-				match = containsRegexPattern(pattern, entry.Tags)
+				match = containsRegexPattern(ruleValue, entry.Tags)
 			}
 
 			if match {
@@ -164,23 +166,23 @@ func isDateMatchingPattern(pattern string, entryDate time.Time) bool {
 		return false
 	}
 
-	operator, dateStr := parts[0], parts[1]
+	ruleType, inputDate := parts[0], parts[1]
 
-	switch operator {
+	switch ruleType {
 	case "before":
-		targetDate, err := time.Parse("2006-01-02", dateStr)
+		targetDate, err := time.Parse("2006-01-02", inputDate)
 		if err != nil {
 			return false
 		}
 		return entryDate.Before(targetDate)
 	case "after":
-		targetDate, err := time.Parse("2006-01-02", dateStr)
+		targetDate, err := time.Parse("2006-01-02", inputDate)
 		if err != nil {
 			return false
 		}
 		return entryDate.After(targetDate)
 	case "between":
-		dates := strings.Split(dateStr, ",")
+		dates := strings.Split(inputDate, ",")
 		if len(dates) != 2 {
 			return false
 		}

+ 36 - 24
internal/reader/filter/filter_test.go

@@ -18,6 +18,7 @@ func TestBlockingEntries(t *testing.T) {
 		expected bool
 	}{
 		{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{URL: "https://example.com"}, &model.User{}, true},
+		{&model.Feed{ID: 1, BlocklistRules: "[a-z"}, &model.Entry{URL: "https://example.com"}, &model.User{}, false}, // invalid regex
 		{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{URL: "https://different.com"}, &model.User{}, false},
 		{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, &model.User{}, true},
 		{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, &model.User{}, false},
@@ -28,15 +29,17 @@ func TestBlockingEntries(t *testing.T) {
 		{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Example"}, &model.User{}, true},
 		{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Something different"}, &model.User{}, false},
 		{&model.Feed{ID: 1}, &model.Entry{Title: "No rule defined"}, &model.User{}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{URL: "https://different.com", Title: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{CommentsURL: "https://example.com", Content: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Test"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://example.com", Content: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Test"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "test"}}, &model.User{BlockFilterEntryRules: "EntryAuthor\nEntryTag=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{BlockFilterEntryRules: "EntryDate=before:2024-03-15"}, true},
 	}
 
 	for _, tc := range scenarios {
@@ -55,6 +58,7 @@ func TestAllowEntries(t *testing.T) {
 		expected bool
 	}{
 		{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "https://example.com"}, &model.User{}, true},
+		{&model.Feed{ID: 1, KeeplistRules: "[a-z"}, &model.Entry{Title: "https://example.com"}, &model.User{}, false}, // invalid regex
 		{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "https://different.com"}, &model.User{}, false},
 		{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, &model.User{}, true},
 		{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, &model.User{}, false},
@@ -65,21 +69,29 @@ func TestAllowEntries(t *testing.T) {
 		{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something more", Tags: []string{"something different", "something else"}}, &model.User{}, false},
 		{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Example"}, &model.User{}, true},
 		{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Something different"}, &model.User{}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{URL: "https://different.com", Title: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{CommentsURL: "https://example.com", Content: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Test"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Now().Add(24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Now().Add(-24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, false},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=before:2024-03-15"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 16, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=after:2024-03-15"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 10, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, true},
-		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://example.com", Content: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Test"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "some test"}}, &model.User{KeepFilterEntryRules: "EntryAuthor\nEntryTag=(?i)Test"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Now().Add(24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Now().Add(-24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=before:2024-03-15"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=before:invalid-date"}, false}, // invalid date format
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 16, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=after:2024-03-15"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 16, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=after:invalid-date"}, false}, // invalid date format
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 10, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, true},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, false},
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:invalid-date,2024-03-15"}, false}, // invalid date format
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-15,invalid-date"}, false}, // invalid date format
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-15"}, false},              // missing second date in range
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=abcd"}, false},                            // no colon in rule value
+		{&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=unknown:2024-03-15"}, false},              // unknown rule type
 	}
 
 	for _, tc := range scenarios {

+ 1 - 1
internal/reader/processor/reading_time.go

@@ -38,7 +38,7 @@ func fetchWatchTime(websiteURL, query string, isoDate bool) (int, error) {
 
 	duration, exists := doc.FindMatcher(goquery.Single(query)).Attr("content")
 	if !exists {
-		return 0, errors.New("duration has not found")
+		return 0, errors.New("duration not found")
 	}
 
 	ret := 0

+ 2 - 2
internal/reader/processor/utils.go

@@ -27,7 +27,7 @@ func parseISO8601(from string) (time.Duration, error) {
 	if iso8601Regex.MatchString(from) {
 		match = iso8601Regex.FindStringSubmatch(from)
 	} else {
-		return 0, errors.New("youtube: could not parse duration string")
+		return 0, errors.New("processor: could not parse duration string")
 	}
 
 	for i, name := range iso8601Regex.SubexpNames() {
@@ -49,7 +49,7 @@ func parseISO8601(from string) (time.Duration, error) {
 		case "second":
 			d += time.Duration(val) * time.Second
 		default:
-			return 0, fmt.Errorf("youtube: unknown field %s", name)
+			return 0, fmt.Errorf("processor: unknown field %s", name)
 		}
 	}