Browse Source

Basic table removal rule

Jake Walker 3 years ago
parent
commit
49d2596fc6
3 changed files with 43 additions and 0 deletions
  1. 31 0
      reader/rewrite/rewrite_functions.go
  2. 2 0
      reader/rewrite/rewriter.go
  3. 10 0
      reader/rewrite/rewriter_test.go

+ 31 - 0
reader/rewrite/rewrite_functions.go

@@ -335,3 +335,34 @@ func parseMarkdown(entryContent string) string {
 
 	return sb.String()
 }
+
+func removeTables(entryContent string) string {
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
+	if err != nil {
+		return entryContent
+	}
+
+	var table *goquery.Selection
+
+	for {
+		table = doc.Find("table").First()
+
+		if table.Length() == 0 {
+			break
+		}
+
+		td := table.Find("td").First()
+
+		if td.Length() == 0 {
+			break
+		}
+
+		tdHtml, _ := td.Html()
+
+		table.Parent().AppendHtml(tdHtml)
+		table.Remove()
+	}
+
+	output, _ := doc.Find("body").First().Html()
+	return output
+}

+ 2 - 0
reader/rewrite/rewriter.go

@@ -110,6 +110,8 @@ func applyRule(entryURL, entryContent string, rule rule) string {
 		}
 	case "parse_markdown":
 		entryContent = parseMarkdown(entryContent)
+	case "remove_tables":
+		entryContent = removeTables(entryContent)
 	}
 
 	return entryContent

+ 10 - 0
reader/rewrite/rewriter_test.go

@@ -325,3 +325,13 @@ func TestRewriteBase64DecodeArgs(t *testing.T) {
 		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
 	}
 }
+
+func TestRewriteRemoveTables(t *testing.T) {
+	content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td>Hello World!</td></tr></tbody></table></td></tr></tbody></table>`
+	expected := `<p>Test</p>Hello World!`
+	output := Rewriter("https://example.org/article", content, `remove_tables`)
+
+	if expected != output {
+		t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+	}
+}