media_proxy.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package proxy // import "miniflux.app/v2/internal/proxy"
  4. import (
  5. "strings"
  6. "miniflux.app/v2/internal/config"
  7. "miniflux.app/v2/internal/reader/sanitizer"
  8. "miniflux.app/v2/internal/urllib"
  9. "github.com/PuerkitoBio/goquery"
  10. "github.com/gorilla/mux"
  11. )
  12. type urlProxyRewriter func(router *mux.Router, url string) string
  13. // ProxyRewriter replaces media URLs with internal proxy URLs.
  14. func ProxyRewriter(router *mux.Router, data string) string {
  15. return genericProxyRewriter(router, ProxifyURL, data)
  16. }
  17. // AbsoluteProxyRewriter do the same as ProxyRewriter except it uses absolute URLs.
  18. func AbsoluteProxyRewriter(router *mux.Router, host, data string) string {
  19. proxifyFunction := func(router *mux.Router, url string) string {
  20. return AbsoluteProxifyURL(router, host, url)
  21. }
  22. return genericProxyRewriter(router, proxifyFunction, data)
  23. }
  24. func genericProxyRewriter(router *mux.Router, proxifyFunction urlProxyRewriter, data string) string {
  25. proxyOption := config.Opts.ProxyOption()
  26. if proxyOption == "none" {
  27. return data
  28. }
  29. doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
  30. if err != nil {
  31. return data
  32. }
  33. for _, mediaType := range config.Opts.ProxyMediaTypes() {
  34. switch mediaType {
  35. case "image":
  36. doc.Find("img").Each(func(i int, img *goquery.Selection) {
  37. if srcAttrValue, ok := img.Attr("src"); ok {
  38. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
  39. img.SetAttr("src", proxifyFunction(router, srcAttrValue))
  40. }
  41. }
  42. if srcsetAttrValue, ok := img.Attr("srcset"); ok {
  43. proxifySourceSet(img, router, proxifyFunction, proxyOption, srcsetAttrValue)
  44. }
  45. })
  46. doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
  47. if srcsetAttrValue, ok := sourceElement.Attr("srcset"); ok {
  48. proxifySourceSet(sourceElement, router, proxifyFunction, proxyOption, srcsetAttrValue)
  49. }
  50. })
  51. doc.Find("video").Each(func(i int, video *goquery.Selection) {
  52. if posterAttrValue, ok := video.Attr("poster"); ok {
  53. if !isDataURL(posterAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(posterAttrValue)) {
  54. video.SetAttr("poster", proxifyFunction(router, posterAttrValue))
  55. }
  56. }
  57. })
  58. case "audio":
  59. doc.Find("audio").Each(func(i int, audio *goquery.Selection) {
  60. if srcAttrValue, ok := audio.Attr("src"); ok {
  61. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
  62. audio.SetAttr("src", proxifyFunction(router, srcAttrValue))
  63. }
  64. }
  65. })
  66. doc.Find("audio source").Each(func(i int, sourceElement *goquery.Selection) {
  67. if srcAttrValue, ok := sourceElement.Attr("src"); ok {
  68. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
  69. sourceElement.SetAttr("src", proxifyFunction(router, srcAttrValue))
  70. }
  71. }
  72. })
  73. case "video":
  74. doc.Find("video").Each(func(i int, video *goquery.Selection) {
  75. if srcAttrValue, ok := video.Attr("src"); ok {
  76. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
  77. video.SetAttr("src", proxifyFunction(router, srcAttrValue))
  78. }
  79. }
  80. if posterAttrValue, ok := video.Attr("poster"); ok {
  81. if !isDataURL(posterAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(posterAttrValue)) {
  82. video.SetAttr("poster", proxifyFunction(router, posterAttrValue))
  83. }
  84. }
  85. })
  86. doc.Find("video source").Each(func(i int, sourceElement *goquery.Selection) {
  87. if srcAttrValue, ok := sourceElement.Attr("src"); ok {
  88. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
  89. sourceElement.SetAttr("src", proxifyFunction(router, srcAttrValue))
  90. }
  91. }
  92. })
  93. }
  94. }
  95. output, err := doc.Find("body").First().Html()
  96. if err != nil {
  97. return data
  98. }
  99. return output
  100. }
  101. func proxifySourceSet(element *goquery.Selection, router *mux.Router, proxifyFunction urlProxyRewriter, proxyOption, srcsetAttrValue string) {
  102. imageCandidates := sanitizer.ParseSrcSetAttribute(srcsetAttrValue)
  103. for _, imageCandidate := range imageCandidates {
  104. if !isDataURL(imageCandidate.ImageURL) && (proxyOption == "all" || !urllib.IsHTTPS(imageCandidate.ImageURL)) {
  105. imageCandidate.ImageURL = proxifyFunction(router, imageCandidate.ImageURL)
  106. }
  107. }
  108. element.SetAttr("srcset", imageCandidates.String())
  109. }
  110. func isDataURL(s string) bool {
  111. return strings.HasPrefix(s, "data:")
  112. }