content_rewrite.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
  4. import (
  5. "log/slog"
  6. "strconv"
  7. "strings"
  8. "text/scanner"
  9. "miniflux.app/v2/internal/model"
  10. "miniflux.app/v2/internal/urllib"
  11. )
  12. type rule struct {
  13. name string
  14. args []string
  15. }
  16. func (rule rule) applyRule(entryURL string, entry *model.Entry) {
  17. switch rule.name {
  18. case "add_image_title":
  19. entry.Content = addImageTitle(entry.Content)
  20. case "add_mailto_subject":
  21. entry.Content = addMailtoSubject(entry.Content)
  22. case "add_dynamic_image":
  23. entry.Content = addDynamicImage(entry.Content)
  24. case "add_dynamic_iframe":
  25. entry.Content = addDynamicIframe(entry.Content)
  26. case "add_youtube_video":
  27. entry.Content = addYoutubeVideoRewriteRule(entryURL, entry.Content)
  28. case "add_invidious_video":
  29. entry.Content = addInvidiousVideo(entryURL, entry.Content)
  30. case "add_youtube_video_using_invidious_player":
  31. entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
  32. case "add_youtube_video_from_id":
  33. entry.Content = addYoutubeVideoFromId(entry.Content)
  34. case "add_pdf_download_link":
  35. entry.Content = addPDFLink(entryURL, entry.Content)
  36. case "nl2br":
  37. entry.Content = strings.ReplaceAll(entry.Content, "\n", "<br>")
  38. case "convert_text_link", "convert_text_links":
  39. entry.Content = replaceTextLinks(entry.Content)
  40. case "fix_medium_images":
  41. entry.Content = fixMediumImages(entry.Content)
  42. case "use_noscript_figure_images":
  43. entry.Content = useNoScriptImages(entry.Content)
  44. case "replace":
  45. // Format: replace("search-term"|"replace-term")
  46. if len(rule.args) >= 2 {
  47. entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
  48. } else {
  49. slog.Warn("Cannot find search and replace terms for replace rule",
  50. slog.Any("rule", rule),
  51. slog.String("entry_url", entryURL),
  52. )
  53. }
  54. case "replace_title":
  55. // Format: replace_title("search-term"|"replace-term")
  56. if len(rule.args) >= 2 {
  57. entry.Title = replaceCustom(entry.Title, rule.args[0], rule.args[1])
  58. } else {
  59. slog.Warn("Cannot find search and replace terms for replace_title rule",
  60. slog.Any("rule", rule),
  61. slog.String("entry_url", entryURL),
  62. )
  63. }
  64. case "remove":
  65. // Format: remove("#selector > .element, .another")
  66. if len(rule.args) >= 1 {
  67. entry.Content = removeCustom(entry.Content, rule.args[0])
  68. } else {
  69. slog.Warn("Cannot find selector for remove rule",
  70. slog.Any("rule", rule),
  71. slog.String("entry_url", entryURL),
  72. )
  73. }
  74. case "add_castopod_episode":
  75. entry.Content = addCastopodEpisode(entryURL, entry.Content)
  76. case "base64_decode":
  77. selector := "body"
  78. if len(rule.args) >= 1 {
  79. selector = rule.args[0]
  80. }
  81. entry.Content = applyFuncOnTextContent(entry.Content, selector, decodeBase64Content)
  82. case "add_hn_links_using_hack":
  83. entry.Content = addHackerNewsLinksUsing(entry.Content, "hack")
  84. case "add_hn_links_using_opener":
  85. entry.Content = addHackerNewsLinksUsing(entry.Content, "opener")
  86. case "remove_tables":
  87. entry.Content = removeTables(entry.Content)
  88. case "remove_clickbait":
  89. entry.Title = titlelize(entry.Title)
  90. case "fix_ghost_cards":
  91. entry.Content = fixGhostCards(entry.Content)
  92. case "remove_img_blur_params":
  93. entry.Content = removeImgBlurParams(entry.Content)
  94. }
  95. }
  96. func ApplyContentRewriteRules(entry *model.Entry, customRewriteRules string) {
  97. rulesList := getPredefinedRewriteRules(entry.URL)
  98. if customRewriteRules != "" {
  99. rulesList = customRewriteRules
  100. }
  101. rules := parseRules(rulesList)
  102. rules = append(rules, rule{name: "add_pdf_download_link"})
  103. slog.Debug("Rewrite rules applied",
  104. slog.Any("rules", rules),
  105. slog.String("entry_url", entry.URL),
  106. )
  107. for _, rule := range rules {
  108. rule.applyRule(entry.URL, entry)
  109. }
  110. }
  111. func parseRules(rulesText string) (rules []rule) {
  112. scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
  113. scan.Init(strings.NewReader(rulesText))
  114. for {
  115. switch scan.Scan() {
  116. case scanner.Ident:
  117. rules = append(rules, rule{name: scan.TokenText()})
  118. case scanner.String:
  119. if l := len(rules) - 1; l >= 0 {
  120. text, _ := strconv.Unquote(scan.TokenText())
  121. rules[l].args = append(rules[l].args, text)
  122. }
  123. case scanner.EOF:
  124. return rules
  125. }
  126. }
  127. }
  128. func getPredefinedRewriteRules(entryURL string) string {
  129. urlDomain := urllib.DomainWithoutWWW(entryURL)
  130. if rules, ok := predefinedRules[urlDomain]; ok {
  131. return rules
  132. }
  133. return ""
  134. }