rewrite_functions.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rewrite // import "miniflux.app/reader/rewrite"
  5. import (
  6. "fmt"
  7. "html"
  8. "net/url"
  9. "regexp"
  10. "strings"
  11. "github.com/PuerkitoBio/goquery"
  12. )
  13. var (
  14. youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
  15. imgRegex = regexp.MustCompile(`<img [^>]+>`)
  16. textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
  17. )
  18. func addImageTitle(entryURL, entryContent string) string {
  19. doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
  20. if err != nil {
  21. return entryContent
  22. }
  23. matches := doc.Find("img[src][title]")
  24. if matches.Length() > 0 {
  25. matches.Each(func(i int, img *goquery.Selection) {
  26. altAttr := img.AttrOr("alt", "")
  27. srcAttr, _ := img.Attr("src")
  28. titleAttr, _ := img.Attr("title")
  29. img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
  30. })
  31. output, _ := doc.Find("body").First().Html()
  32. return output
  33. }
  34. return entryContent
  35. }
  36. func addMailtoSubject(entryURL, entryContent string) string {
  37. doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
  38. if err != nil {
  39. return entryContent
  40. }
  41. matches := doc.Find(`a[href^="mailto:"]`)
  42. if matches.Length() > 0 {
  43. matches.Each(func(i int, a *goquery.Selection) {
  44. hrefAttr, _ := a.Attr("href")
  45. mailto, err := url.Parse(hrefAttr)
  46. if err != nil {
  47. return
  48. }
  49. subject := mailto.Query().Get("subject")
  50. if subject == "" {
  51. return
  52. }
  53. a.AppendHtml(" [" + html.EscapeString(subject) + "]")
  54. })
  55. output, _ := doc.Find("body").First().Html()
  56. return output
  57. }
  58. return entryContent
  59. }
  60. func addDynamicImage(entryURL, entryContent string) string {
  61. doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
  62. if err != nil {
  63. return entryContent
  64. }
  65. // Ordered most preferred to least preferred.
  66. candidateAttrs := []string{
  67. "data-src",
  68. "data-original",
  69. "data-orig",
  70. "data-url",
  71. "data-orig-file",
  72. "data-large-file",
  73. "data-medium-file",
  74. "data-2000src",
  75. "data-1000src",
  76. "data-800src",
  77. "data-655src",
  78. "data-500src",
  79. "data-380src",
  80. }
  81. changed := false
  82. doc.Find("img,div").Each(func(i int, img *goquery.Selection) {
  83. for _, candidateAttr := range candidateAttrs {
  84. if srcAttr, found := img.Attr(candidateAttr); found {
  85. changed = true
  86. if img.Is("img") {
  87. img.SetAttr("src", srcAttr)
  88. } else {
  89. altAttr := img.AttrOr("alt", "")
  90. img.ReplaceWithHtml(`<img src="` + srcAttr + `" alt="` + altAttr + `"/>`)
  91. }
  92. break
  93. }
  94. }
  95. })
  96. if !changed {
  97. doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
  98. matches := imgRegex.FindAllString(noscript.Text(), 2)
  99. if len(matches) == 1 {
  100. changed = true
  101. noscript.ReplaceWithHtml(matches[0])
  102. }
  103. })
  104. }
  105. if changed {
  106. output, _ := doc.Find("body").First().Html()
  107. return output
  108. }
  109. return entryContent
  110. }
  111. func addYoutubeVideo(entryURL, entryContent string) string {
  112. matches := youtubeRegex.FindStringSubmatch(entryURL)
  113. if len(matches) == 2 {
  114. video := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/` + matches[1] + `" allowfullscreen></iframe>`
  115. return video + `<br>` + entryContent
  116. }
  117. return entryContent
  118. }
  119. func addPDFLink(entryURL, entryContent string) string {
  120. if strings.HasSuffix(entryURL, ".pdf") {
  121. return fmt.Sprintf(`<a href="%s">PDF</a><br>%s`, entryURL, entryContent)
  122. }
  123. return entryContent
  124. }
  125. func replaceTextLinks(input string) string {
  126. return textLinkRegex.ReplaceAllString(input, `<a href="${1}">${1}</a>`)
  127. }
  128. func replaceLineFeeds(input string) string {
  129. return strings.Replace(input, "\n", "<br>", -1)
  130. }