rewriter.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
  4. import (
  5. "log/slog"
  6. "strconv"
  7. "strings"
  8. "text/scanner"
  9. "miniflux.app/v2/internal/model"
  10. "miniflux.app/v2/internal/urllib"
  11. )
  12. type rule struct {
  13. name string
  14. args []string
  15. }
  16. // Rewriter modify item contents with a set of rewriting rules.
  17. func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
  18. rulesList := getPredefinedRewriteRules(entryURL)
  19. if customRewriteRules != "" {
  20. rulesList = customRewriteRules
  21. }
  22. rules := parseRules(rulesList)
  23. rules = append(rules, rule{name: "add_pdf_download_link"})
  24. slog.Debug("Rewrite rules applied",
  25. slog.Any("rules", rules),
  26. slog.String("entry_url", entryURL),
  27. )
  28. for _, rule := range rules {
  29. applyRule(entryURL, entry, rule)
  30. }
  31. }
  32. func parseRules(rulesText string) (rules []rule) {
  33. scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
  34. scan.Init(strings.NewReader(rulesText))
  35. for {
  36. switch scan.Scan() {
  37. case scanner.Ident:
  38. rules = append(rules, rule{name: scan.TokenText()})
  39. case scanner.String:
  40. if l := len(rules) - 1; l >= 0 {
  41. text := scan.TokenText()
  42. text, _ = strconv.Unquote(text)
  43. rules[l].args = append(rules[l].args, text)
  44. }
  45. case scanner.EOF:
  46. return
  47. }
  48. }
  49. }
  50. func applyRule(entryURL string, entry *model.Entry, rule rule) {
  51. switch rule.name {
  52. case "add_image_title":
  53. entry.Content = addImageTitle(entryURL, entry.Content)
  54. case "add_mailto_subject":
  55. entry.Content = addMailtoSubject(entryURL, entry.Content)
  56. case "add_dynamic_image":
  57. entry.Content = addDynamicImage(entryURL, entry.Content)
  58. case "add_youtube_video":
  59. entry.Content = addYoutubeVideo(entryURL, entry.Content)
  60. case "add_invidious_video":
  61. entry.Content = addInvidiousVideo(entryURL, entry.Content)
  62. case "add_youtube_video_using_invidious_player":
  63. entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
  64. case "add_youtube_video_from_id":
  65. entry.Content = addYoutubeVideoFromId(entry.Content)
  66. case "add_pdf_download_link":
  67. entry.Content = addPDFLink(entryURL, entry.Content)
  68. case "nl2br":
  69. entry.Content = replaceLineFeeds(entry.Content)
  70. case "convert_text_link", "convert_text_links":
  71. entry.Content = replaceTextLinks(entry.Content)
  72. case "fix_medium_images":
  73. entry.Content = fixMediumImages(entryURL, entry.Content)
  74. case "use_noscript_figure_images":
  75. entry.Content = useNoScriptImages(entryURL, entry.Content)
  76. case "replace":
  77. // Format: replace("search-term"|"replace-term")
  78. if len(rule.args) >= 2 {
  79. entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
  80. } else {
  81. slog.Warn("Cannot find search and replace terms for replace rule",
  82. slog.Any("rule", rule),
  83. slog.String("entry_url", entryURL),
  84. )
  85. }
  86. case "replace_title":
  87. // Format: replace_title("search-term"|"replace-term")
  88. if len(rule.args) >= 2 {
  89. entry.Title = replaceCustom(entry.Title, rule.args[0], rule.args[1])
  90. } else {
  91. slog.Warn("Cannot find search and replace terms for replace_title rule",
  92. slog.Any("rule", rule),
  93. slog.String("entry_url", entryURL),
  94. )
  95. }
  96. case "remove":
  97. // Format: remove("#selector > .element, .another")
  98. if len(rule.args) >= 1 {
  99. entry.Content = removeCustom(entry.Content, rule.args[0])
  100. } else {
  101. slog.Warn("Cannot find selector for remove rule",
  102. slog.Any("rule", rule),
  103. slog.String("entry_url", entryURL),
  104. )
  105. }
  106. case "add_castopod_episode":
  107. entry.Content = addCastopodEpisode(entryURL, entry.Content)
  108. case "base64_decode":
  109. if len(rule.args) >= 1 {
  110. entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
  111. } else {
  112. entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
  113. }
  114. case "add_hn_links_using_hack":
  115. entry.Content = addHackerNewsLinksUsing(entry.Content, "hack")
  116. case "add_hn_links_using_opener":
  117. entry.Content = addHackerNewsLinksUsing(entry.Content, "opener")
  118. case "parse_markdown":
  119. entry.Content = parseMarkdown(entry.Content)
  120. case "remove_tables":
  121. entry.Content = removeTables(entry.Content)
  122. case "remove_clickbait":
  123. entry.Title = removeClickbait(entry.Title)
  124. }
  125. }
  126. func getPredefinedRewriteRules(entryURL string) string {
  127. urlDomain := urllib.Domain(entryURL)
  128. for domain, rules := range predefinedRules {
  129. if strings.Contains(urlDomain, domain) {
  130. return rules
  131. }
  132. }
  133. return ""
  134. }