rewriter_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rewrite // import "miniflux.app/reader/rewrite"
  5. import (
  6. "reflect"
  7. "strings"
  8. "testing"
  9. )
  10. func TestParseRules(t *testing.T) {
  11. rulesText := `add_dynamic_image,replace("article/(.*).svg"|"article/$1.png"),remove(".spam, .ads:not(.keep)")`
  12. expected := []rule{
  13. {name: "add_dynamic_image"},
  14. {name: "replace", args: []string{"article/(.*).svg", "article/$1.png"}},
  15. {name: "remove", args: []string{".spam, .ads:not(.keep)"}},
  16. }
  17. actual := parseRules(rulesText)
  18. if !reflect.DeepEqual(expected, actual) {
  19. t.Errorf(`Parsed rules do not match expected rules: got %v instead of %v`, actual, expected)
  20. }
  21. }
  22. func TestReplaceTextLinks(t *testing.T) {
  23. scenarios := map[string]string{
  24. `This is a link to example.org`: `This is a link to example.org`,
  25. `This is a link to ftp://example.org`: `This is a link to ftp://example.org`,
  26. `This is a link to www.example.org`: `This is a link to www.example.org`,
  27. `This is a link to http://example.org`: `This is a link to <a href="http://example.org">http://example.org</a>`,
  28. `This is a link to http://example.org, end of sentence.`: `This is a link to <a href="http://example.org">http://example.org</a>, end of sentence.`,
  29. `This is a link to https://example.org`: `This is a link to <a href="https://example.org">https://example.org</a>`,
  30. `This is a link to https://www.example.org/path/to?q=s`: `This is a link to <a href="https://www.example.org/path/to?q=s">https://www.example.org/path/to?q=s</a>`,
  31. `This is a link to https://example.org/index#hash-tag, http://example.org/.`: `This is a link to <a href="https://example.org/index#hash-tag">https://example.org/index#hash-tag</a>, <a href="http://example.org/">http://example.org/</a>.`,
  32. }
  33. for input, expected := range scenarios {
  34. actual := replaceTextLinks(input)
  35. if actual != expected {
  36. t.Errorf(`Unexpected link replacement, got "%s" instead of "%s"`, actual, expected)
  37. }
  38. }
  39. }
  40. func TestRewriteWithNoMatchingRule(t *testing.T) {
  41. output := Rewriter("https://example.org/article", `Some text.`, ``)
  42. expected := `Some text.`
  43. if expected != output {
  44. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  45. }
  46. }
  47. func TestRewriteWithYoutubeLink(t *testing.T) {
  48. output := Rewriter("https://www.youtube.com/watch?v=1234", "Video Description", ``)
  49. expected := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`
  50. if expected != output {
  51. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  52. }
  53. }
  54. func TestRewriteWithInexistingCustomRule(t *testing.T) {
  55. output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`)
  56. expected := `Video Description`
  57. if expected != output {
  58. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  59. }
  60. }
  61. func TestRewriteWithXkcdLink(t *testing.T) {
  62. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
  63. output := Rewriter("https://xkcd.com/1912/", description, ``)
  64. expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`
  65. if expected != output {
  66. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  67. }
  68. }
  69. func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
  70. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`
  71. output := Rewriter("https://xkcd.com/1912/", description, ``)
  72. expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`
  73. if expected != output {
  74. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  75. }
  76. }
  77. func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
  78. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
  79. output := Rewriter("https://xkcd.com/1912/", description, ``)
  80. expected := description
  81. if expected != output {
  82. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  83. }
  84. }
  85. func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
  86. description := "test"
  87. output := Rewriter("https://xkcd.com/1912/", description, ``)
  88. expected := description
  89. if expected != output {
  90. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  91. }
  92. }
  93. func TestRewriteWithXkcdAndNoImage(t *testing.T) {
  94. description := "test"
  95. output := Rewriter("https://xkcd.com/1912/", description, ``)
  96. expected := description
  97. if expected != output {
  98. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  99. }
  100. }
  101. func TestRewriteMailtoLink(t *testing.T) {
  102. description := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`
  103. output := Rewriter("https://www.qwantz.com/", description, ``)
  104. expected := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`
  105. if expected != output {
  106. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  107. }
  108. }
  109. func TestRewriteWithPDFLink(t *testing.T) {
  110. description := "test"
  111. output := Rewriter("https://example.org/document.pdf", description, ``)
  112. expected := `<a href="https://example.org/document.pdf">PDF</a><br>test`
  113. if expected != output {
  114. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  115. }
  116. }
  117. func TestRewriteWithNoLazyImage(t *testing.T) {
  118. description := `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`
  119. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  120. expected := description
  121. if expected != output {
  122. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  123. }
  124. }
  125. func TestRewriteWithLazyImage(t *testing.T) {
  126. description := `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  127. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  128. expected := `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  129. if expected != output {
  130. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  131. }
  132. }
  133. func TestRewriteWithLazyDivImage(t *testing.T) {
  134. description := `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  135. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  136. expected := `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  137. if expected != output {
  138. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  139. }
  140. }
  141. func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
  142. description := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  143. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  144. expected := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`
  145. if expected != output {
  146. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  147. }
  148. }
  149. func TestNewLineRewriteRule(t *testing.T) {
  150. description := "A\nB\nC"
  151. output := Rewriter("https://example.org/article", description, "nl2br")
  152. expected := `A<br>B<br>C`
  153. if expected != output {
  154. t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
  155. }
  156. }
  157. func TestConvertTextLinkRewriteRule(t *testing.T) {
  158. description := "Test: http://example.org/a/b"
  159. output := Rewriter("https://example.org/article", description, "convert_text_link")
  160. expected := `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`
  161. if expected != output {
  162. t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
  163. }
  164. }
  165. func TestMediumImage(t *testing.T) {
  166. content := `
  167. <figure class="ht hu hv hw hx hy cy cz paragraph-image">
  168. <div class="hz ia ib ic aj">
  169. <div class="cy cz hs">
  170. <div class="ii s ib ij">
  171. <div class="ik il s">
  172. <div class="id ie t u v if aj bk ig ih">
  173. <img alt="Image for post" class="t u v if aj im in io" src="https://miro.medium.com/max/60/1*ephLSqSzQYLvb7faDwzRbw.jpeg?q=20" width="1280" height="720"/>
  174. </div>
  175. <img alt="Image for post" class="id ie t u v if aj c" width="1280" height="720"/>
  176. <noscript>
  177. <img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcSet="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>
  178. </noscript>
  179. </div>
  180. </div>
  181. </div>
  182. </div>
  183. </figure>
  184. `
  185. expected := `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`
  186. output := Rewriter("https://example.org/article", content, "fix_medium_images")
  187. output = strings.TrimSpace(output)
  188. if expected != output {
  189. t.Errorf(`Not expected output: %s`, output)
  190. }
  191. }
  192. func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
  193. content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`
  194. expected := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`
  195. output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
  196. output = strings.TrimSpace(output)
  197. if expected != output {
  198. t.Errorf(`Not expected output: %s`, output)
  199. }
  200. }
  201. func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
  202. content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`
  203. expected := `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`
  204. output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
  205. output = strings.TrimSpace(output)
  206. if expected != output {
  207. t.Errorf(`Not expected output: %s`, output)
  208. }
  209. }
  210. func TestRewriteReplaceCustom(t *testing.T) {
  211. content := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`
  212. expected := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`
  213. output := Rewriter("https://example.org/article", content, `replace("article/(.*).svg"|"article/$1.png")`)
  214. if expected != output {
  215. t.Errorf(`Not expected output: %s`, output)
  216. }
  217. }
  218. func TestRewriteRemoveCustom(t *testing.T) {
  219. content := `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`
  220. expected := `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`
  221. output := Rewriter("https://example.org/article", content, `remove(".spam, .ads:not(.keep)")`)
  222. if expected != output {
  223. t.Errorf(`Not expected output: %s`, output)
  224. }
  225. }