Browse Source

Improve a bit internal/reader/scraper/scraper.go

- make findContentUsingCustomRules' more idiomatic,
  since in golang a function returning an error might
  return garbage in other parameter. Moreover, ignoring
  errors is bad practise.
- getPredefinedScraperRules is now running in constant-time,
  instead of iterating on a list with around 50 items in it.
jvoisin 2 năm trước cách đây
mục cha
commit
c2d2f31438

+ 6 - 9
internal/reader/scraper/scraper.go

@@ -78,10 +78,9 @@ func findContentUsingCustomRules(page io.Reader, rules string) (string, error) {
 
 	contents := ""
 	document.Find(rules).Each(func(i int, s *goquery.Selection) {
-		var content string
-
-		content, _ = goquery.OuterHtml(s)
-		contents += content
+		if content, err := goquery.OuterHtml(s); err == nil {
+			contents += content
+		}
 	})
 
 	return contents, nil
@@ -89,13 +88,11 @@ func findContentUsingCustomRules(page io.Reader, rules string) (string, error) {
 
 func getPredefinedScraperRules(websiteURL string) string {
 	urlDomain := urllib.Domain(websiteURL)
+	urlDomain = strings.TrimPrefix(urlDomain, "www.")
 
-	for domain, rules := range predefinedRules {
-		if strings.Contains(urlDomain, domain) {
-			return rules
-		}
+	if rules, ok := predefinedRules[urlDomain]; ok {
+		return rules
 	}
-
 	return ""
 }
 

+ 4 - 0
internal/reader/scraper/scraper_test.go

@@ -19,6 +19,10 @@ func TestGetPredefinedRules(t *testing.T) {
 		t.Error("Unable to find rule for linux.com")
 	}
 
+	if getPredefinedScraperRules("https://linux.com/") == "" {
+		t.Error("Unable to find rule for linux.com")
+	}
+
 	if getPredefinedScraperRules("https://example.org/") != "" {
 		t.Error("A rule not defined should not return anything")
 	}