Răsfoiți Sursa

Minor simplifications of the rewriter

- Online some one-line functions
- Transform a free-standing function into a method
- Massively simplify `removeClickbait`
- Use a proper constant instead of a magic number in `applyFuncOnTextContent`
jvoisin 2 ani în urmă
părinte
comite
c29ca0e313

+ 3 - 19
internal/reader/rewrite/rewrite_functions.go

@@ -14,6 +14,8 @@ import (
 
 	"miniflux.app/v2/internal/config"
 
+	nethtml "golang.org/x/net/html"
+
 	"github.com/PuerkitoBio/goquery"
 	"github.com/yuin/goldmark"
 	goldmarkhtml "github.com/yuin/goldmark/renderer/html"
@@ -301,10 +303,6 @@ func replaceTextLinks(input string) string {
 	return textLinkRegex.ReplaceAllString(input, `<a href="${1}">${1}</a>`)
 }
 
-func replaceLineFeeds(input string) string {
-	return strings.ReplaceAll(input, "\n", "<br>")
-}
-
 func replaceCustom(entryContent string, searchTerm string, replaceTerm string) string {
 	re, err := regexp.Compile(searchTerm)
 	if err == nil {
@@ -334,7 +332,7 @@ func addCastopodEpisode(entryURL, entryContent string) string {
 func applyFuncOnTextContent(entryContent string, selector string, repl func(string) string) string {
 	var treatChildren func(i int, s *goquery.Selection)
 	treatChildren = func(i int, s *goquery.Selection) {
-		if s.Nodes[0].Type == 1 {
+		if s.Nodes[0].Type == nethtml.TextNode {
 			s.ReplaceWithHtml(repl(s.Nodes[0].Data))
 		} else {
 			s.Contents().Each(treatChildren)
@@ -457,17 +455,3 @@ func removeTables(entryContent string) string {
 	output, _ := doc.Find("body").First().Html()
 	return output
 }
-
-func removeClickbait(entryTitle string) string {
-	titleWords := []string{}
-	for _, word := range strings.Fields(entryTitle) {
-		runes := []rune(word)
-		if len(runes) > 1 {
-			// keep first rune as is to keep the first capital letter
-			titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:])))
-		} else {
-			titleWords = append(titleWords, word)
-		}
-	}
-	return strings.Join(titleWords, " ")
-}

+ 48 - 49
internal/reader/rewrite/rewriter.go

@@ -11,6 +11,9 @@ import (
 
 	"miniflux.app/v2/internal/model"
 	"miniflux.app/v2/internal/urllib"
+
+	"golang.org/x/text/cases"
+	"golang.org/x/text/language"
 )
 
 type rule struct {
@@ -18,50 +21,7 @@ type rule struct {
 	args []string
 }
 
-// Rewriter modify item contents with a set of rewriting rules.
-func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
-	rulesList := getPredefinedRewriteRules(entryURL)
-	if customRewriteRules != "" {
-		rulesList = customRewriteRules
-	}
-
-	rules := parseRules(rulesList)
-	rules = append(rules, rule{name: "add_pdf_download_link"})
-
-	slog.Debug("Rewrite rules applied",
-		slog.Any("rules", rules),
-		slog.String("entry_url", entryURL),
-	)
-
-	for _, rule := range rules {
-		applyRule(entryURL, entry, rule)
-	}
-}
-
-func parseRules(rulesText string) (rules []rule) {
-	scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
-	scan.Init(strings.NewReader(rulesText))
-
-	for {
-		switch scan.Scan() {
-		case scanner.Ident:
-			rules = append(rules, rule{name: scan.TokenText()})
-
-		case scanner.String:
-			if l := len(rules) - 1; l >= 0 {
-				text := scan.TokenText()
-				text, _ = strconv.Unquote(text)
-
-				rules[l].args = append(rules[l].args, text)
-			}
-
-		case scanner.EOF:
-			return
-		}
-	}
-}
-
-func applyRule(entryURL string, entry *model.Entry, rule rule) {
+func (rule rule) applyRule(entryURL string, entry *model.Entry) {
 	switch rule.name {
 	case "add_image_title":
 		entry.Content = addImageTitle(entryURL, entry.Content)
@@ -82,7 +42,7 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) {
 	case "add_pdf_download_link":
 		entry.Content = addPDFLink(entryURL, entry.Content)
 	case "nl2br":
-		entry.Content = replaceLineFeeds(entry.Content)
+		entry.Content = strings.ReplaceAll(entry.Content, "\n", "<br>")
 	case "convert_text_link", "convert_text_links":
 		entry.Content = replaceTextLinks(entry.Content)
 	case "fix_medium_images":
@@ -122,11 +82,11 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) {
 	case "add_castopod_episode":
 		entry.Content = addCastopodEpisode(entryURL, entry.Content)
 	case "base64_decode":
+		selector := "body"
 		if len(rule.args) >= 1 {
-			entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
-		} else {
-			entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
+			selector = rule.args[0]
 		}
+		entry.Content = applyFuncOnTextContent(entry.Content, selector, decodeBase64Content)
 	case "add_hn_links_using_hack":
 		entry.Content = addHackerNewsLinksUsing(entry.Content, "hack")
 	case "add_hn_links_using_opener":
@@ -136,7 +96,46 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) {
 	case "remove_tables":
 		entry.Content = removeTables(entry.Content)
 	case "remove_clickbait":
-		entry.Title = removeClickbait(entry.Title)
+		entry.Title = cases.Title(language.English).String(strings.ToLower(entry.Title))
+	}
+}
+
+// Rewriter modify item contents with a set of rewriting rules.
+func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
+	rulesList := getPredefinedRewriteRules(entryURL)
+	if customRewriteRules != "" {
+		rulesList = customRewriteRules
+	}
+
+	rules := parseRules(rulesList)
+	rules = append(rules, rule{name: "add_pdf_download_link"})
+
+	slog.Debug("Rewrite rules applied",
+		slog.Any("rules", rules),
+		slog.String("entry_url", entryURL),
+	)
+
+	for _, rule := range rules {
+		rule.applyRule(entryURL, entry)
+	}
+}
+
+func parseRules(rulesText string) (rules []rule) {
+	scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
+	scan.Init(strings.NewReader(rulesText))
+
+	for {
+		switch scan.Scan() {
+		case scanner.Ident:
+			rules = append(rules, rule{name: scan.TokenText()})
+		case scanner.String:
+			if l := len(rules) - 1; l >= 0 {
+				text, _ := strconv.Unquote(scan.TokenText())
+				rules[l].args = append(rules[l].args, text)
+			}
+		case scanner.EOF:
+			return
+		}
 	}
 }