media_proxy.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. // Copyright 2020 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package proxy // import "miniflux.app/proxy"
  5. import (
  6. "strings"
  7. "miniflux.app/config"
  8. "miniflux.app/reader/sanitizer"
  9. "miniflux.app/url"
  10. "github.com/PuerkitoBio/goquery"
  11. "github.com/gorilla/mux"
  12. )
  13. type urlProxyRewriter func(router *mux.Router, url string) string
  14. // ProxyRewriter replaces media URLs with internal proxy URLs.
  15. func ProxyRewriter(router *mux.Router, data string) string {
  16. return genericProxyRewriter(router, ProxifyURL, data)
  17. }
  18. // AbsoluteProxyRewriter do the same as ProxyRewriter except it uses absolute URLs.
  19. func AbsoluteProxyRewriter(router *mux.Router, host, data string) string {
  20. proxifyFunction := func(router *mux.Router, url string) string {
  21. return AbsoluteProxifyURL(router, host, url)
  22. }
  23. return genericProxyRewriter(router, proxifyFunction, data)
  24. }
  25. func genericProxyRewriter(router *mux.Router, proxifyFunction urlProxyRewriter, data string) string {
  26. proxyOption := config.Opts.ProxyOption()
  27. if proxyOption == "none" {
  28. return data
  29. }
  30. doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
  31. if err != nil {
  32. return data
  33. }
  34. for _, mediaType := range config.Opts.ProxyMediaTypes() {
  35. switch mediaType {
  36. case "image":
  37. doc.Find("img").Each(func(i int, img *goquery.Selection) {
  38. if srcAttrValue, ok := img.Attr("src"); ok {
  39. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !url.IsHTTPS(srcAttrValue)) {
  40. img.SetAttr("src", proxifyFunction(router, srcAttrValue))
  41. }
  42. }
  43. if srcsetAttrValue, ok := img.Attr("srcset"); ok {
  44. proxifySourceSet(img, router, proxifyFunction, proxyOption, srcsetAttrValue)
  45. }
  46. })
  47. doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
  48. if srcsetAttrValue, ok := sourceElement.Attr("srcset"); ok {
  49. proxifySourceSet(sourceElement, router, proxifyFunction, proxyOption, srcsetAttrValue)
  50. }
  51. })
  52. case "audio":
  53. doc.Find("audio").Each(func(i int, audio *goquery.Selection) {
  54. if srcAttrValue, ok := audio.Attr("src"); ok {
  55. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !url.IsHTTPS(srcAttrValue)) {
  56. audio.SetAttr("src", proxifyFunction(router, srcAttrValue))
  57. }
  58. }
  59. })
  60. doc.Find("audio source").Each(func(i int, sourceElement *goquery.Selection) {
  61. if srcAttrValue, ok := sourceElement.Attr("src"); ok {
  62. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !url.IsHTTPS(srcAttrValue)) {
  63. sourceElement.SetAttr("src", proxifyFunction(router, srcAttrValue))
  64. }
  65. }
  66. })
  67. case "video":
  68. doc.Find("video").Each(func(i int, video *goquery.Selection) {
  69. if srcAttrValue, ok := video.Attr("src"); ok {
  70. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !url.IsHTTPS(srcAttrValue)) {
  71. video.SetAttr("src", proxifyFunction(router, srcAttrValue))
  72. }
  73. }
  74. })
  75. doc.Find("video source").Each(func(i int, sourceElement *goquery.Selection) {
  76. if srcAttrValue, ok := sourceElement.Attr("src"); ok {
  77. if !isDataURL(srcAttrValue) && (proxyOption == "all" || !url.IsHTTPS(srcAttrValue)) {
  78. sourceElement.SetAttr("src", proxifyFunction(router, srcAttrValue))
  79. }
  80. }
  81. })
  82. }
  83. }
  84. output, err := doc.Find("body").First().Html()
  85. if err != nil {
  86. return data
  87. }
  88. return output
  89. }
  90. func proxifySourceSet(element *goquery.Selection, router *mux.Router, proxifyFunction urlProxyRewriter, proxyOption, srcsetAttrValue string) {
  91. imageCandidates := sanitizer.ParseSrcSetAttribute(srcsetAttrValue)
  92. for _, imageCandidate := range imageCandidates {
  93. if !isDataURL(imageCandidate.ImageURL) && (proxyOption == "all" || !url.IsHTTPS(imageCandidate.ImageURL)) {
  94. imageCandidate.ImageURL = proxifyFunction(router, imageCandidate.ImageURL)
  95. }
  96. }
  97. element.SetAttr("srcset", imageCandidates.String())
  98. }
  99. func isDataURL(s string) bool {
  100. return strings.HasPrefix(s, "data:")
  101. }