Browse Source

refactor: use a better construct than `doc.Find(…).First()`

As mentioned in goquery's documentation (https://pkg.go.dev/github.com/PuerkitoBio/goquery#Single):

> By default, Selection.Find and other functions that accept a selector string
to select nodes will use all matches corresponding to that selector. By using
the Matcher returned by Single, at most the first match will be selected.
>
> The one using Single is optimized to be potentially much faster on large documents.
Julien Voisin 1 year ago
parent
commit
1b0b8b9c42

+ 1 - 1
internal/mediaproxy/rewriter.go

@@ -87,7 +87,7 @@ func genericProxyRewriter(router *mux.Router, proxifyFunction urlProxyRewriter,
 		}
 	}
 
-	output, err := doc.Find("body").First().Html()
+	output, err := doc.FindMatcher(goquery.Single("body")).Html()
 	if err != nil {
 		return htmlDocument
 	}

+ 1 - 1
internal/reader/processor/nebula.go

@@ -48,7 +48,7 @@ func fetchNebulaWatchTime(websiteURL string) (int, error) {
 		return 0, docErr
 	}
 
-	durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
+	durs, exists := doc.FindMatcher(goquery.Single(`meta[property="video:duration"]`)).Attr("content")
 	// durs contains video watch time in seconds
 	if !exists {
 		return 0, errors.New("duration has not found")

+ 1 - 1
internal/reader/processor/odysee.go

@@ -48,7 +48,7 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
 		return 0, docErr
 	}
 
-	durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
+	durs, exists := doc.FindMatcher(goquery.Single(`meta[property="og:video:duration"]`)).Attr("content")
 	// durs contains video watch time in seconds
 	if !exists {
 		return 0, errors.New("duration has not found")

+ 1 - 1
internal/reader/processor/youtube.go

@@ -60,7 +60,7 @@ func fetchYouTubeWatchTimeFromWebsite(websiteURL string) (int, error) {
 		return 0, docErr
 	}
 
-	durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content")
+	durs, exists := doc.FindMatcher(goquery.Single(`meta[itemprop="duration"]`)).Attr("content")
 	if !exists {
 		return 0, errors.New("duration has not found")
 	}

+ 1 - 1
internal/reader/readability/readability.go

@@ -77,7 +77,7 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er
 		return "", "", err
 	}
 
-	if hrefValue, exists := document.Find("head base").First().Attr("href"); exists {
+	if hrefValue, exists := document.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
 		hrefValue = strings.TrimSpace(hrefValue)
 		if urllib.IsAbsoluteURL(hrefValue) {
 			baseURL = hrefValue

+ 11 - 11
internal/reader/rewrite/rewrite_functions.go

@@ -44,7 +44,7 @@ func addImageTitle(entryURL, entryContent string) string {
 			img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
 		})
 
-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}
 
@@ -76,7 +76,7 @@ func addMailtoSubject(entryURL, entryContent string) string {
 			a.AppendHtml(" [" + html.EscapeString(subject) + "]")
 		})
 
-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}
 
@@ -160,7 +160,7 @@ func addDynamicImage(entryURL, entryContent string) string {
 	}
 
 	if changed {
-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}
 
@@ -197,7 +197,7 @@ func addDynamicIframe(entryURL, entryContent string) string {
 	})
 
 	if changed {
-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}
 
@@ -217,7 +217,7 @@ func fixMediumImages(entryURL, entryContent string) string {
 		}
 	})
 
-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }
 
@@ -239,7 +239,7 @@ func useNoScriptImages(entryURL, entryContent string) string {
 		}
 	})
 
-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }
 
@@ -317,7 +317,7 @@ func removeCustom(entryContent string, selector string) string {
 
 	doc.Find(selector).Remove()
 
-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }
 
@@ -344,7 +344,7 @@ func applyFuncOnTextContent(entryContent string, selector string, repl func(stri
 
 	doc.Find(selector).Each(treatChildren)
 
-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }
 
@@ -401,7 +401,7 @@ func addHackerNewsLinksUsing(entryContent, app string) string {
 			}
 		})
 
-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}
 
@@ -420,7 +420,7 @@ func removeTables(entryContent string) string {
 
 	for _, selector := range selectors {
 		for {
-			loopElement = doc.Find(selector).First()
+			loopElement = doc.FindMatcher(goquery.Single(selector))
 
 			if loopElement.Length() == 0 {
 				break
@@ -436,6 +436,6 @@ func removeTables(entryContent string) string {
 		}
 	}
 
-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }

+ 1 - 1
internal/reader/scraper/scraper.go

@@ -75,7 +75,7 @@ func findContentUsingCustomRules(page io.Reader, rules string) (baseURL string,
 		return "", "", err
 	}
 
-	if hrefValue, exists := document.Find("head base").First().Attr("href"); exists {
+	if hrefValue, exists := document.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
 		hrefValue = strings.TrimSpace(hrefValue)
 		if urllib.IsAbsoluteURL(hrefValue) {
 			baseURL = hrefValue

+ 1 - 1
internal/reader/subscription/finder.go

@@ -146,7 +146,7 @@ func (f *SubscriptionFinder) FindSubscriptionsFromWebPage(websiteURL, contentTyp
 		return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_parse_html_document", err)
 	}
 
-	if hrefValue, exists := doc.Find("head base").First().Attr("href"); exists {
+	if hrefValue, exists := doc.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
 		hrefValue = strings.TrimSpace(hrefValue)
 		if urllib.IsAbsoluteURL(hrefValue) {
 			websiteURL = hrefValue