4
0
Эх сурвалжийг харах

Add a rewrite rule to remove clickbait titles

Romain de Laage 3 жил өмнө
parent
commit
33c4b5188c

+ 4 - 4
reader/processor/processor.go

@@ -85,7 +85,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
 			}
 		}
 
-		entry.Content = rewrite.Rewriter(url, entry.Content, feed.RewriteRules)
+		rewrite.Rewriter(url, entry, feed.RewriteRules)
 
 		// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
 		entry.Content = sanitizer.Sanitize(url, entry.Content)
@@ -168,14 +168,14 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
 		return scraperErr
 	}
 
-	content = rewrite.Rewriter(url, content, entry.Feed.RewriteRules)
-	content = sanitizer.Sanitize(url, content)
-
 	if content != "" {
 		entry.Content = content
 		entry.ReadingTime = calculateReadingTime(content, user)
 	}
 
+	rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
+	entry.Content = sanitizer.Sanitize(url, entry.Content)
+
 	return nil
 }
 

+ 14 - 0
reader/rewrite/rewrite_functions.go

@@ -367,3 +367,17 @@ func removeTables(entryContent string) string {
 	output, _ := doc.Find("body").First().Html()
 	return output
 }
+
+func removeClickbait(entryTitle string) string {
+	titleWords := []string{}
+	for _, word := range strings.Fields(entryTitle) {
+		runes := []rune(word)
+		if len(runes) > 1 {
+			// keep first rune as is to keep the first capital letter
+			titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:])))
+		} else {
+			titleWords = append(titleWords, word)
+		}
+	}
+	return strings.Join(titleWords, " ")
+}

+ 25 - 26
reader/rewrite/rewriter.go

@@ -10,6 +10,7 @@ import (
 	"text/scanner"
 
 	"miniflux.app/logger"
+	"miniflux.app/model"
 	"miniflux.app/url"
 )
 
@@ -19,7 +20,7 @@ type rule struct {
 }
 
 // Rewriter modify item contents with a set of rewriting rules.
-func Rewriter(entryURL, entryContent, customRewriteRules string) string {
+func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
 	rulesList := getPredefinedRewriteRules(entryURL)
 	if customRewriteRules != "" {
 		rulesList = customRewriteRules
@@ -31,10 +32,8 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
 	logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
 
 	for _, rule := range rules {
-		entryContent = applyRule(entryURL, entryContent, rule)
+		applyRule(entryURL, entry, rule)
 	}
-
-	return entryContent
 }
 
 func parseRules(rulesText string) (rules []rule) {
@@ -60,61 +59,61 @@ func parseRules(rulesText string) (rules []rule) {
 	}
 }
 
-func applyRule(entryURL, entryContent string, rule rule) string {
+func applyRule(entryURL string, entry *model.Entry, rule rule) {
 	switch rule.name {
 	case "add_image_title":
-		entryContent = addImageTitle(entryURL, entryContent)
+		entry.Content = addImageTitle(entryURL, entry.Content)
 	case "add_mailto_subject":
-		entryContent = addMailtoSubject(entryURL, entryContent)
+		entry.Content = addMailtoSubject(entryURL, entry.Content)
 	case "add_dynamic_image":
-		entryContent = addDynamicImage(entryURL, entryContent)
+		entry.Content = addDynamicImage(entryURL, entry.Content)
 	case "add_youtube_video":
-		entryContent = addYoutubeVideo(entryURL, entryContent)
+		entry.Content = addYoutubeVideo(entryURL, entry.Content)
 	case "add_invidious_video":
-		entryContent = addInvidiousVideo(entryURL, entryContent)
+		entry.Content = addInvidiousVideo(entryURL, entry.Content)
 	case "add_youtube_video_using_invidious_player":
-		entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent)
+		entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
 	case "add_youtube_video_from_id":
-		entryContent = addYoutubeVideoFromId(entryContent)
+		entry.Content = addYoutubeVideoFromId(entry.Content)
 	case "add_pdf_download_link":
-		entryContent = addPDFLink(entryURL, entryContent)
+		entry.Content = addPDFLink(entryURL, entry.Content)
 	case "nl2br":
-		entryContent = replaceLineFeeds(entryContent)
+		entry.Content = replaceLineFeeds(entry.Content)
 	case "convert_text_link", "convert_text_links":
-		entryContent = replaceTextLinks(entryContent)
+		entry.Content = replaceTextLinks(entry.Content)
 	case "fix_medium_images":
-		entryContent = fixMediumImages(entryURL, entryContent)
+		entry.Content = fixMediumImages(entryURL, entry.Content)
 	case "use_noscript_figure_images":
-		entryContent = useNoScriptImages(entryURL, entryContent)
+		entry.Content = useNoScriptImages(entryURL, entry.Content)
 	case "replace":
 		// Format: replace("search-term"|"replace-term")
 		if len(rule.args) >= 2 {
-			entryContent = replaceCustom(entryContent, rule.args[0], rule.args[1])
+			entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
 		} else {
 			logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
 		}
 	case "remove":
 		// Format: remove("#selector > .element, .another")
 		if len(rule.args) >= 1 {
-			entryContent = removeCustom(entryContent, rule.args[0])
+			entry.Content = removeCustom(entry.Content, rule.args[0])
 		} else {
 			logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
 		}
 	case "add_castopod_episode":
-		entryContent = addCastopodEpisode(entryURL, entryContent)
+		entry.Content = addCastopodEpisode(entryURL, entry.Content)
 	case "base64_decode":
 		if len(rule.args) >= 1 {
-			entryContent = applyFuncOnTextContent(entryContent, rule.args[0], decodeBase64Content)
+			entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
 		} else {
-			entryContent = applyFuncOnTextContent(entryContent, "body", decodeBase64Content)
+			entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
 		}
 	case "parse_markdown":
-		entryContent = parseMarkdown(entryContent)
+		entry.Content = parseMarkdown(entry.Content)
 	case "remove_tables":
-		entryContent = removeTables(entryContent)
+		entry.Content = removeTables(entry.Content)
+	case "remove_clickbait":
+		entry.Title = removeClickbait(entry.Title)
 	}
-
-	return entryContent
 }
 
 func getPredefinedRewriteRules(entryURL string) string {

+ 336 - 140
reader/rewrite/rewriter_test.go

@@ -8,6 +8,8 @@ import (
 	"reflect"
 	"strings"
 	"testing"
+
+	"miniflux.app/model"
 )
 
 func TestParseRules(t *testing.T) {
@@ -46,178 +48,301 @@ func TestReplaceTextLinks(t *testing.T) {
 }
 
 func TestRewriteWithNoMatchingRule(t *testing.T) {
-	output := Rewriter("https://example.org/article", `Some text.`, ``)
-	expected := `Some text.`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Some text.`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Some text.`,
+	}
+	Rewriter("https://example.org/article", testEntry, ``)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithYoutubeLink(t *testing.T) {
-	output := Rewriter("https://www.youtube.com/watch?v=1234", "Video Description", ``)
-	expected := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Video Description`,
+	}
+	Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithInexistingCustomRule(t *testing.T) {
-	output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`)
-	expected := `Video Description`
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Video Description`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Video Description`,
+	}
+	Rewriter("https://www.youtube.com/watch?v=1234", testEntry, `some rule`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithXkcdLink(t *testing.T) {
-	description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
-	output := Rewriter("https://xkcd.com/1912/", description, ``)
-	expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
+	}
+	Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
-	description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`
-	output := Rewriter("https://xkcd.com/1912/", description, ``)
-	expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`,
+	}
+	Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
-	description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
-	output := Rewriter("https://xkcd.com/1912/", description, ``)
-	expected := description
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
+	}
+	Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
-	description := "test"
-	output := Rewriter("https://xkcd.com/1912/", description, ``)
-	expected := description
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `test`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `test`,
+	}
+	Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithXkcdAndNoImage(t *testing.T) {
-	description := "test"
-	output := Rewriter("https://xkcd.com/1912/", description, ``)
-	expected := description
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `test`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `test`,
+	}
+	Rewriter("https://xkcd.com/1912/", testEntry, ``)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteMailtoLink(t *testing.T) {
-	description := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`
-	output := Rewriter("https://www.qwantz.com/", description, ``)
-	expected := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`,
+	}
+	Rewriter("https://www.qwantz.com/", testEntry, ``)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithPDFLink(t *testing.T) {
-	description := "test"
-	output := Rewriter("https://example.org/document.pdf", description, ``)
-	expected := `<a href="https://example.org/document.pdf">PDF</a><br>test`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="https://example.org/document.pdf">PDF</a><br>test`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `test`,
+	}
+	Rewriter("https://example.org/document.pdf", testEntry, ``)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithNoLazyImage(t *testing.T) {
-	description := `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`
-	output := Rewriter("https://example.org/article", description, "add_dynamic_image")
-	expected := description
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
+	}
+	Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithLazyImage(t *testing.T) {
-	description := `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
-	output := Rewriter("https://example.org/article", description, "add_dynamic_image")
-	expected := `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
+	}
+	Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithLazyDivImage(t *testing.T) {
-	description := `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
-	output := Rewriter("https://example.org/article", description, "add_dynamic_image")
-	expected := `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
+	}
+	Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
-	description := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
-	output := Rewriter("https://example.org/article", description, "add_dynamic_image")
-	expected := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
+	}
+	Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithLazySrcset(t *testing.T) {
-	description := `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`
-	output := Rewriter("https://example.org/article", description, "add_dynamic_image")
-	expected := `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
+	}
+	Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteWithImageAndLazySrcset(t *testing.T) {
-	description := `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`
-	output := Rewriter("https://example.org/article", description, "add_dynamic_image")
-	expected := `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
+	}
+	Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestNewLineRewriteRule(t *testing.T) {
-	description := "A\nB\nC"
-	output := Rewriter("https://example.org/article", description, "nl2br")
-	expected := `A<br>B<br>C`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `A<br>B<br>C`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: "A\nB\nC",
+	}
+	Rewriter("https://example.org/article", testEntry, "nl2br")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestConvertTextLinkRewriteRule(t *testing.T) {
-	description := "Test: http://example.org/a/b"
-	output := Rewriter("https://example.org/article", description, "convert_text_link")
-	expected := `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Test: http://example.org/a/b`,
+	}
+	Rewriter("https://example.org/article", testEntry, "convert_text_link")
 
-	if expected != output {
-		t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestMediumImage(t *testing.T) {
-	content := `
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`,
+	}
+	testEntry := &model.Entry{
+		Title: `A title`,
+		Content: `
 		<figure class="ht hu hv hw hx hy cy cz paragraph-image">
 			<div class="hz ia ib ic aj">
 				<div class="cy cz hs">
@@ -235,103 +360,174 @@ func TestMediumImage(t *testing.T) {
 				</div>
 			</div>
 		</figure>
-	`
-	expected := `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`
-	output := Rewriter("https://example.org/article", content, "fix_medium_images")
-	output = strings.TrimSpace(output)
+		`,
+	}
+	Rewriter("https://example.org/article", testEntry, "fix_medium_images")
+	testEntry.Content = strings.TrimSpace(testEntry.Content)
 
-	if expected != output {
-		t.Errorf(`Not expected output: %s`, output)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
-	content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`
-	expected := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`
-	output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
-	output = strings.TrimSpace(output)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`,
+	}
+	Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
+	testEntry.Content = strings.TrimSpace(testEntry.Content)
 
-	if expected != output {
-		t.Errorf(`Not expected output: %s`, output)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
-	content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`
-	expected := `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`
-	output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
-	output = strings.TrimSpace(output)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`,
+	}
+	Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
+	testEntry.Content = strings.TrimSpace(testEntry.Content)
 
-	if expected != output {
-		t.Errorf(`Not expected output: %s`, output)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteReplaceCustom(t *testing.T) {
-	content := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`
-	expected := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`
-	output := Rewriter("https://example.org/article", content, `replace("article/(.*).svg"|"article/$1.png")`)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`,
+	}
+	Rewriter("https://example.org/article", testEntry, `replace("article/(.*).svg"|"article/$1.png")`)
 
-	if expected != output {
-		t.Errorf(`Not expected output: %s`, output)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteRemoveCustom(t *testing.T) {
-	content := `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`
-	expected := `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`
-	output := Rewriter("https://example.org/article", content, `remove(".spam, .ads:not(.keep)")`)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `remove(".spam, .ads:not(.keep)")`)
 
-	if expected != output {
-		t.Errorf(`Not expected output: %s`, output)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteAddCastopodEpisode(t *testing.T) {
-	output := Rewriter("https://podcast.demo/@demo/episodes/test", "Episode Description", `add_castopod_episode`)
-	expected := `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `Episode Description`,
+	}
+	Rewriter("https://podcast.demo/@demo/episodes/test", testEntry, `add_castopod_episode`)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteBase64Decode(t *testing.T) {
-	content := `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`
-	expected := `This is some base64 encoded content`
-	output := Rewriter("https://example.org/article", content, `base64_decode`)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `This is some base64 encoded content`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`,
+	}
+	Rewriter("https://example.org/article", testEntry, `base64_decode`)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteBase64DecodeInHTML(t *testing.T) {
-	content := `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`
-	expected := `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`
-	output := Rewriter("https://example.org/article", content, `base64_decode`)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `base64_decode`)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteBase64DecodeArgs(t *testing.T) {
-	content := `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`
-	expected := `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`
-	output := Rewriter("https://example.org/article", content, `base64_decode(".base64")`)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `base64_decode(".base64")`)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
 
 func TestRewriteRemoveTables(t *testing.T) {
-	content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`
-	expected := `<p>Test</p><p>Hello World!</p><p>Test</p>`
-	output := Rewriter("https://example.org/article", content, `remove_tables`)
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<p>Test</p><p>Hello World!</p><p>Test</p>`,
+	}
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `remove_tables`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}
+
+func TestRemoveClickbait(t *testing.T) {
+	controlEntry := &model.Entry{
+		Title:   `This Is Amazing`,
+		Content: `Some description`,
+	}
+	testEntry := &model.Entry{
+		Title:   `THIS IS AMAZING`,
+		Content: `Some description`,
+	}
+	Rewriter("https://example.org/article", testEntry, `remove_clickbait`)
 
-	if expected != output {
-		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }