rewriter.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
  4. import (
  5. "log/slog"
  6. "strconv"
  7. "strings"
  8. "text/scanner"
  9. "miniflux.app/v2/internal/model"
  10. "miniflux.app/v2/internal/urllib"
  11. "golang.org/x/text/cases"
  12. "golang.org/x/text/language"
  13. )
  14. type rule struct {
  15. name string
  16. args []string
  17. }
  18. func (rule rule) applyRule(entryURL string, entry *model.Entry) {
  19. switch rule.name {
  20. case "add_image_title":
  21. entry.Content = addImageTitle(entryURL, entry.Content)
  22. case "add_mailto_subject":
  23. entry.Content = addMailtoSubject(entryURL, entry.Content)
  24. case "add_dynamic_image":
  25. entry.Content = addDynamicImage(entryURL, entry.Content)
  26. case "add_dynamic_iframe":
  27. entry.Content = addDynamicIframe(entryURL, entry.Content)
  28. case "add_youtube_video":
  29. entry.Content = addYoutubeVideo(entryURL, entry.Content)
  30. case "add_invidious_video":
  31. entry.Content = addInvidiousVideo(entryURL, entry.Content)
  32. case "add_youtube_video_using_invidious_player":
  33. entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
  34. case "add_youtube_video_from_id":
  35. entry.Content = addYoutubeVideoFromId(entry.Content)
  36. case "add_pdf_download_link":
  37. entry.Content = addPDFLink(entryURL, entry.Content)
  38. case "nl2br":
  39. entry.Content = strings.ReplaceAll(entry.Content, "\n", "<br>")
  40. case "convert_text_link", "convert_text_links":
  41. entry.Content = replaceTextLinks(entry.Content)
  42. case "fix_medium_images":
  43. entry.Content = fixMediumImages(entryURL, entry.Content)
  44. case "use_noscript_figure_images":
  45. entry.Content = useNoScriptImages(entryURL, entry.Content)
  46. case "replace":
  47. // Format: replace("search-term"|"replace-term")
  48. if len(rule.args) >= 2 {
  49. entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
  50. } else {
  51. slog.Warn("Cannot find search and replace terms for replace rule",
  52. slog.Any("rule", rule),
  53. slog.String("entry_url", entryURL),
  54. )
  55. }
  56. case "replace_title":
  57. // Format: replace_title("search-term"|"replace-term")
  58. if len(rule.args) >= 2 {
  59. entry.Title = replaceCustom(entry.Title, rule.args[0], rule.args[1])
  60. } else {
  61. slog.Warn("Cannot find search and replace terms for replace_title rule",
  62. slog.Any("rule", rule),
  63. slog.String("entry_url", entryURL),
  64. )
  65. }
  66. case "remove":
  67. // Format: remove("#selector > .element, .another")
  68. if len(rule.args) >= 1 {
  69. entry.Content = removeCustom(entry.Content, rule.args[0])
  70. } else {
  71. slog.Warn("Cannot find selector for remove rule",
  72. slog.Any("rule", rule),
  73. slog.String("entry_url", entryURL),
  74. )
  75. }
  76. case "add_castopod_episode":
  77. entry.Content = addCastopodEpisode(entryURL, entry.Content)
  78. case "base64_decode":
  79. selector := "body"
  80. if len(rule.args) >= 1 {
  81. selector = rule.args[0]
  82. }
  83. entry.Content = applyFuncOnTextContent(entry.Content, selector, decodeBase64Content)
  84. case "add_hn_links_using_hack":
  85. entry.Content = addHackerNewsLinksUsing(entry.Content, "hack")
  86. case "add_hn_links_using_opener":
  87. entry.Content = addHackerNewsLinksUsing(entry.Content, "opener")
  88. case "parse_markdown":
  89. entry.Content = parseMarkdown(entry.Content)
  90. case "remove_tables":
  91. entry.Content = removeTables(entry.Content)
  92. case "remove_clickbait":
  93. entry.Title = cases.Title(language.English).String(strings.ToLower(entry.Title))
  94. }
  95. }
  96. // Rewriter modify item contents with a set of rewriting rules.
  97. func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
  98. rulesList := getPredefinedRewriteRules(entryURL)
  99. if customRewriteRules != "" {
  100. rulesList = customRewriteRules
  101. }
  102. rules := parseRules(rulesList)
  103. rules = append(rules, rule{name: "add_pdf_download_link"})
  104. slog.Debug("Rewrite rules applied",
  105. slog.Any("rules", rules),
  106. slog.String("entry_url", entryURL),
  107. )
  108. for _, rule := range rules {
  109. rule.applyRule(entryURL, entry)
  110. }
  111. }
  112. func parseRules(rulesText string) (rules []rule) {
  113. scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
  114. scan.Init(strings.NewReader(rulesText))
  115. for {
  116. switch scan.Scan() {
  117. case scanner.Ident:
  118. rules = append(rules, rule{name: scan.TokenText()})
  119. case scanner.String:
  120. if l := len(rules) - 1; l >= 0 {
  121. text, _ := strconv.Unquote(scan.TokenText())
  122. rules[l].args = append(rules[l].args, text)
  123. }
  124. case scanner.EOF:
  125. return
  126. }
  127. }
  128. }
  129. func getPredefinedRewriteRules(entryURL string) string {
  130. urlDomain := urllib.Domain(entryURL)
  131. for domain, rules := range predefinedRules {
  132. if strings.Contains(urlDomain, domain) {
  133. return rules
  134. }
  135. }
  136. return ""
  137. }