rewriter.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package rewrite // import "miniflux.app/reader/rewrite"
  4. import (
  5. "strconv"
  6. "strings"
  7. "text/scanner"
  8. "miniflux.app/logger"
  9. "miniflux.app/model"
  10. "miniflux.app/url"
  11. )
  12. type rule struct {
  13. name string
  14. args []string
  15. }
  16. // Rewriter modify item contents with a set of rewriting rules.
  17. func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
  18. rulesList := getPredefinedRewriteRules(entryURL)
  19. if customRewriteRules != "" {
  20. rulesList = customRewriteRules
  21. }
  22. rules := parseRules(rulesList)
  23. rules = append(rules, rule{name: "add_pdf_download_link"})
  24. logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
  25. for _, rule := range rules {
  26. applyRule(entryURL, entry, rule)
  27. }
  28. }
  29. func parseRules(rulesText string) (rules []rule) {
  30. scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
  31. scan.Init(strings.NewReader(rulesText))
  32. for {
  33. switch scan.Scan() {
  34. case scanner.Ident:
  35. rules = append(rules, rule{name: scan.TokenText()})
  36. case scanner.String:
  37. if l := len(rules) - 1; l >= 0 {
  38. text := scan.TokenText()
  39. text, _ = strconv.Unquote(text)
  40. rules[l].args = append(rules[l].args, text)
  41. }
  42. case scanner.EOF:
  43. return
  44. }
  45. }
  46. }
  47. func applyRule(entryURL string, entry *model.Entry, rule rule) {
  48. switch rule.name {
  49. case "add_image_title":
  50. entry.Content = addImageTitle(entryURL, entry.Content)
  51. case "add_mailto_subject":
  52. entry.Content = addMailtoSubject(entryURL, entry.Content)
  53. case "add_dynamic_image":
  54. entry.Content = addDynamicImage(entryURL, entry.Content)
  55. case "add_youtube_video":
  56. entry.Content = addYoutubeVideo(entryURL, entry.Content)
  57. case "add_invidious_video":
  58. entry.Content = addInvidiousVideo(entryURL, entry.Content)
  59. case "add_youtube_video_using_invidious_player":
  60. entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
  61. case "add_youtube_video_from_id":
  62. entry.Content = addYoutubeVideoFromId(entry.Content)
  63. case "add_pdf_download_link":
  64. entry.Content = addPDFLink(entryURL, entry.Content)
  65. case "nl2br":
  66. entry.Content = replaceLineFeeds(entry.Content)
  67. case "convert_text_link", "convert_text_links":
  68. entry.Content = replaceTextLinks(entry.Content)
  69. case "fix_medium_images":
  70. entry.Content = fixMediumImages(entryURL, entry.Content)
  71. case "use_noscript_figure_images":
  72. entry.Content = useNoScriptImages(entryURL, entry.Content)
  73. case "replace":
  74. // Format: replace("search-term"|"replace-term")
  75. if len(rule.args) >= 2 {
  76. entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
  77. } else {
  78. logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
  79. }
  80. case "remove":
  81. // Format: remove("#selector > .element, .another")
  82. if len(rule.args) >= 1 {
  83. entry.Content = removeCustom(entry.Content, rule.args[0])
  84. } else {
  85. logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
  86. }
  87. case "add_castopod_episode":
  88. entry.Content = addCastopodEpisode(entryURL, entry.Content)
  89. case "base64_decode":
  90. if len(rule.args) >= 1 {
  91. entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
  92. } else {
  93. entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
  94. }
  95. case "parse_markdown":
  96. entry.Content = parseMarkdown(entry.Content)
  97. case "remove_tables":
  98. entry.Content = removeTables(entry.Content)
  99. case "remove_clickbait":
  100. entry.Title = removeClickbait(entry.Title)
  101. }
  102. }
  103. func getPredefinedRewriteRules(entryURL string) string {
  104. urlDomain := url.Domain(entryURL)
  105. for domain, rules := range predefinedRules {
  106. if strings.Contains(urlDomain, domain) {
  107. return rules
  108. }
  109. }
  110. return ""
  111. }