Quellcode durchsuchen

Add rewrite rule to use noscript content for images rendered with Javascript

Frédéric Guillot vor 5 Jahren
Ursprung
Commit
b50778d3eb
3 geänderte Dateien mit 49 neuen und 1 gelöschten Zeilen
  1. 25 1
      reader/rewrite/rewrite_functions.go
  2. 2 0
      reader/rewrite/rewriter.go
  3. 22 0
      reader/rewrite/rewriter_test.go

+ 25 - 1
reader/rewrite/rewrite_functions.go

@@ -147,7 +147,31 @@ func fixMediumImages(entryURL, entryContent string) string {
 
 	doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
 		noscriptElement := paragraphImage.Find("noscript")
-		paragraphImage.ReplaceWithHtml(noscriptElement.Text())
+		if noscriptElement.Length() > 0 {
+			paragraphImage.ReplaceWithHtml(noscriptElement.Text())
+		}
+	})
+
+	output, _ := doc.Find("body").First().Html()
+	return output
+}
+
+func useNoScriptImages(entryURL, entryContent string) string {
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
+	if err != nil {
+		return entryContent
+	}
+
+	doc.Find("figure").Each(func(i int, figureElement *goquery.Selection) {
+		imgElement := figureElement.Find("img")
+		if imgElement.Length() > 0 {
+			noscriptElement := figureElement.Find("noscript")
+			if noscriptElement.Length() > 0 {
+				figureElement.PrependHtml(noscriptElement.Text())
+				imgElement.Remove()
+				noscriptElement.Remove()
+			}
+		}
 	})
 
 	output, _ := doc.Find("body").First().Html()

+ 2 - 0
reader/rewrite/rewriter.go

@@ -45,6 +45,8 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
 			entryContent = replaceTextLinks(entryContent)
 		case "fix_medium_images":
 			entryContent = fixMediumImages(entryURL, entryContent)
+		case "use_noscript_figure_images":
+			entryContent = useNoScriptImages(entryURL, entryContent)
 		}
 	}
 

+ 22 - 0
reader/rewrite/rewriter_test.go

@@ -208,3 +208,25 @@ func TestMediumImage(t *testing.T) {
 		t.Errorf(`Not expected output: %s`, output)
 	}
 }
+
+func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
+	content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`
+	expected := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`
+	output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
+	output = strings.TrimSpace(output)
+
+	if expected != output {
+		t.Errorf(`Not expected output: %s`, output)
+	}
+}
+
+func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
+	content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`
+	expected := `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`
+	output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
+	output = strings.TrimSpace(output)
+
+	if expected != output {
+		t.Errorf(`Not expected output: %s`, output)
+	}
+}