Răsfoiți Sursa

feat: add `fix_ghost_cards` rewrite rule

Jake Walker 1 an în urmă
părinte
comite
6cbe8c3a9d

+ 52 - 0
internal/reader/rewrite/rewrite_functions.go

@@ -455,3 +455,55 @@ func removeTables(entryContent string) string {
 	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }
+
+func fixGhostCards(entryContent string) string {
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
+	if err != nil {
+		return entryContent
+	}
+
+	const cardSelector = "figure.kg-card"
+	var currentList *goquery.Selection
+
+	doc.Find(cardSelector).Each(func(i int, s *goquery.Selection) {
+		title := s.Find(".kg-bookmark-title").First().Text()
+		author := s.Find(".kg-bookmark-author").First().Text()
+		href := s.Find("a.kg-bookmark-container").First().AttrOr("href", "")
+
+		// if there is no link or title, skip processing
+		if href == "" || title == "" {
+			return
+		}
+
+		link := ""
+		if author == "" || strings.HasSuffix(title, author) {
+			link = fmt.Sprintf("<a href=\"%s\">%s</a>", href, title)
+		} else {
+			link = fmt.Sprintf("<a href=\"%s\">%s - %s</a>", href, title, author)
+		}
+
+		next := s.Next()
+
+		// if the next element is also a card, start a list
+		if next.Is(cardSelector) && currentList == nil {
+			currentList = s.BeforeHtml("<ul></ul>").Prev()
+		}
+
+		if currentList != nil {
+			// add this card to the list, then delete it
+			currentList.AppendHtml("<li>" + link + "</li>")
+			s.Remove()
+		} else {
+			// replace single card
+			s.ReplaceWithHtml(link)
+		}
+
+		// if the next element is not a card, start a new list
+		if !next.Is(cardSelector) && currentList != nil {
+			currentList = nil
+		}
+	})
+
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
+	return strings.TrimSpace(output)
+}

+ 2 - 0
internal/reader/rewrite/rewriter.go

@@ -92,6 +92,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
 		entry.Content = removeTables(entry.Content)
 	case "remove_clickbait":
 		entry.Title = titlelize(entry.Title)
+	case "fix_ghost_cards":
+		entry.Content = fixGhostCards(entry.Content)
 	}
 }
 

+ 228 - 0
internal/reader/rewrite/rewriter_test.go

@@ -703,3 +703,231 @@ func TestAddImageTitle(t *testing.T) {
 		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
 	}
 }
+
+func TestFixGhostCard(t *testing.T) {
+	testEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<figure class="kg-card kg-bookmark-card">
+			<a class="kg-bookmark-container" href="https://example.org/article">
+				<div class="kg-bookmark-content">
+					<div class="kg-bookmark-title">Example Article</div>
+					<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
+					<div class="kg-bookmark-metadata">
+						<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
+						<span class="kg-bookmark-author">Example</span>
+						<span class="kg-bookmark-publisher">Test Author</span>
+					</div>
+				</div>
+				<div class="kg-bookmark-thumbnail">
+					<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
+				</div>
+			</a>
+		</figure>`,
+	}
+
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="https://example.org/article">Example Article - Example</a>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}
+
+func TestFixGhostCardNoCard(t *testing.T) {
+	testEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="https://example.org/article">Example Article - Example</a>`,
+	}
+
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="https://example.org/article">Example Article - Example</a>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}
+
+func TestFixGhostCardInvalidCard(t *testing.T) {
+	testEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<figure class="kg-card kg-bookmark-card">
+			<a href="https://example.org/article">This card does not have the required fields</a>
+		</figure>`,
+	}
+
+	controlEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<figure class="kg-card kg-bookmark-card">
+			<a href="https://example.org/article">This card does not have the required fields</a>
+		</figure>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}
+
+func TestFixGhostCardMissingAuthor(t *testing.T) {
+	testEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<figure class="kg-card kg-bookmark-card">
+			<a class="kg-bookmark-container" href="https://example.org/article">
+				<div class="kg-bookmark-content">
+					<div class="kg-bookmark-title">Example Article</div>
+					<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
+				</div>
+				<div class="kg-bookmark-thumbnail">
+					<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
+				</div>
+			</a>
+		</figure>`,
+	}
+
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="https://example.org/article">Example Article</a>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}
+
+func TestFixGhostCardDuplicatedAuthor(t *testing.T) {
+	testEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<figure class="kg-card kg-bookmark-card">
+			<a class="kg-bookmark-container" href="https://example.org/article">
+				<div class="kg-bookmark-content">
+					<div class="kg-bookmark-title">Example Article - Example</div>
+					<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
+					<div class="kg-bookmark-metadata">
+						<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
+						<span class="kg-bookmark-author">Example</span>
+						<span class="kg-bookmark-publisher">Test Author</span>
+					</div>
+				</div>
+				<div class="kg-bookmark-thumbnail">
+					<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
+				</div>
+			</a>
+		</figure>`,
+	}
+
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<a href="https://example.org/article">Example Article - Example</a>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}
+
+func TestFixGhostCardMultiple(t *testing.T) {
+	testEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<figure class="kg-card kg-bookmark-card">
+			<a class="kg-bookmark-container" href="https://example.org/article1">
+				<div class="kg-bookmark-content">
+					<div class="kg-bookmark-title">Example Article 1 - Example</div>
+					<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
+					<div class="kg-bookmark-metadata">
+						<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
+						<span class="kg-bookmark-author">Example</span>
+						<span class="kg-bookmark-publisher">Test Author</span>
+					</div>
+				</div>
+				<div class="kg-bookmark-thumbnail">
+					<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
+				</div>
+			</a>
+		</figure>
+		<figure class="kg-card kg-bookmark-card">
+			<a class="kg-bookmark-container" href="https://example.org/article2">
+				<div class="kg-bookmark-content">
+					<div class="kg-bookmark-title">Example Article 2 - Example</div>
+					<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
+					<div class="kg-bookmark-metadata">
+						<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
+						<span class="kg-bookmark-author">Example</span>
+						<span class="kg-bookmark-publisher">Test Author</span>
+					</div>
+				</div>
+				<div class="kg-bookmark-thumbnail">
+					<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
+				</div>
+			</a>
+		</figure>`,
+	}
+
+	controlEntry := &model.Entry{
+		Title:   `A title`,
+		Content: `<ul><li><a href="https://example.org/article1">Example Article 1 - Example</a></li><li><a href="https://example.org/article2">Example Article 2 - Example</a></li></ul>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}
+
+func TestFixGhostCardMultipleSplit(t *testing.T) {
+	testEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<figure class="kg-card kg-bookmark-card">
+			<a class="kg-bookmark-container" href="https://example.org/article1">
+				<div class="kg-bookmark-content">
+					<div class="kg-bookmark-title">Example Article 1 - Example</div>
+					<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
+					<div class="kg-bookmark-metadata">
+						<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
+						<span class="kg-bookmark-author">Example</span>
+						<span class="kg-bookmark-publisher">Test Author</span>
+					</div>
+				</div>
+				<div class="kg-bookmark-thumbnail">
+					<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
+				</div>
+			</a>
+		</figure>
+		<p>This separates the two cards</p>
+		<figure class="kg-card kg-bookmark-card">
+			<a class="kg-bookmark-container" href="https://example.org/article2">
+				<div class="kg-bookmark-content">
+					<div class="kg-bookmark-title">Example Article 2 - Example</div>
+					<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
+					<div class="kg-bookmark-metadata">
+						<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
+						<span class="kg-bookmark-author">Example</span>
+						<span class="kg-bookmark-publisher">Test Author</span>
+					</div>
+				</div>
+				<div class="kg-bookmark-thumbnail">
+					<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
+				</div>
+			</a>
+		</figure>`,
+	}
+
+	controlEntry := &model.Entry{
+		Title: `A title`,
+		Content: `<a href="https://example.org/article1">Example Article 1 - Example</a>
+		<p>This separates the two cards</p>
+		<a href="https://example.org/article2">Example Article 2 - Example</a>`,
+	}
+	Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)
+
+	if !reflect.DeepEqual(testEntry, controlEntry) {
+		t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
+	}
+}