rewriter_test.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rewrite // import "miniflux.app/reader/rewrite"
  5. import (
  6. "reflect"
  7. "strings"
  8. "testing"
  9. )
  10. func TestParseRules(t *testing.T) {
  11. rulesText := `add_dynamic_image,replace("article/(.*).svg"|"article/$1.png"),remove(".spam, .ads:not(.keep)")`
  12. expected := []rule{
  13. {name: "add_dynamic_image"},
  14. {name: "replace", args: []string{"article/(.*).svg", "article/$1.png"}},
  15. {name: "remove", args: []string{".spam, .ads:not(.keep)"}},
  16. }
  17. actual := parseRules(rulesText)
  18. if !reflect.DeepEqual(expected, actual) {
  19. t.Errorf(`Parsed rules do not match expected rules: got %v instead of %v`, actual, expected)
  20. }
  21. }
  22. func TestReplaceTextLinks(t *testing.T) {
  23. scenarios := map[string]string{
  24. `This is a link to example.org`: `This is a link to example.org`,
  25. `This is a link to ftp://example.org`: `This is a link to ftp://example.org`,
  26. `This is a link to www.example.org`: `This is a link to www.example.org`,
  27. `This is a link to http://example.org`: `This is a link to <a href="http://example.org">http://example.org</a>`,
  28. `This is a link to http://example.org, end of sentence.`: `This is a link to <a href="http://example.org">http://example.org</a>, end of sentence.`,
  29. `This is a link to https://example.org`: `This is a link to <a href="https://example.org">https://example.org</a>`,
  30. `This is a link to https://www.example.org/path/to?q=s`: `This is a link to <a href="https://www.example.org/path/to?q=s">https://www.example.org/path/to?q=s</a>`,
  31. `This is a link to https://example.org/index#hash-tag, http://example.org/.`: `This is a link to <a href="https://example.org/index#hash-tag">https://example.org/index#hash-tag</a>, <a href="http://example.org/">http://example.org/</a>.`,
  32. }
  33. for input, expected := range scenarios {
  34. actual := replaceTextLinks(input)
  35. if actual != expected {
  36. t.Errorf(`Unexpected link replacement, got "%s" instead of "%s"`, actual, expected)
  37. }
  38. }
  39. }
  40. func TestRewriteWithNoMatchingRule(t *testing.T) {
  41. output := Rewriter("https://example.org/article", `Some text.`, ``)
  42. expected := `Some text.`
  43. if expected != output {
  44. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  45. }
  46. }
  47. func TestRewriteWithYoutubeLink(t *testing.T) {
  48. output := Rewriter("https://www.youtube.com/watch?v=1234", "Video Description", ``)
  49. expected := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`
  50. if expected != output {
  51. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  52. }
  53. }
  54. func TestRewriteWithInexistingCustomRule(t *testing.T) {
  55. output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`)
  56. expected := `Video Description`
  57. if expected != output {
  58. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  59. }
  60. }
  61. func TestRewriteWithXkcdLink(t *testing.T) {
  62. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
  63. output := Rewriter("https://xkcd.com/1912/", description, ``)
  64. expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`
  65. if expected != output {
  66. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  67. }
  68. }
  69. func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
  70. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`
  71. output := Rewriter("https://xkcd.com/1912/", description, ``)
  72. expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`
  73. if expected != output {
  74. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  75. }
  76. }
  77. func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
  78. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
  79. output := Rewriter("https://xkcd.com/1912/", description, ``)
  80. expected := description
  81. if expected != output {
  82. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  83. }
  84. }
  85. func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
  86. description := "test"
  87. output := Rewriter("https://xkcd.com/1912/", description, ``)
  88. expected := description
  89. if expected != output {
  90. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  91. }
  92. }
  93. func TestRewriteWithXkcdAndNoImage(t *testing.T) {
  94. description := "test"
  95. output := Rewriter("https://xkcd.com/1912/", description, ``)
  96. expected := description
  97. if expected != output {
  98. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  99. }
  100. }
  101. func TestRewriteMailtoLink(t *testing.T) {
  102. description := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`
  103. output := Rewriter("https://www.qwantz.com/", description, ``)
  104. expected := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`
  105. if expected != output {
  106. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  107. }
  108. }
  109. func TestRewriteWithPDFLink(t *testing.T) {
  110. description := "test"
  111. output := Rewriter("https://example.org/document.pdf", description, ``)
  112. expected := `<a href="https://example.org/document.pdf">PDF</a><br>test`
  113. if expected != output {
  114. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  115. }
  116. }
  117. func TestRewriteWithNoLazyImage(t *testing.T) {
  118. description := `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`
  119. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  120. expected := description
  121. if expected != output {
  122. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  123. }
  124. }
  125. func TestRewriteWithLazyImage(t *testing.T) {
  126. description := `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  127. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  128. expected := `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  129. if expected != output {
  130. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  131. }
  132. }
  133. func TestRewriteWithLazyDivImage(t *testing.T) {
  134. description := `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  135. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  136. expected := `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  137. if expected != output {
  138. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  139. }
  140. }
  141. func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
  142. description := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  143. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  144. expected := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`
  145. if expected != output {
  146. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  147. }
  148. }
  149. func TestRewriteWithLazySrcset(t *testing.T) {
  150. description := `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`
  151. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  152. expected := `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`
  153. if expected != output {
  154. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  155. }
  156. }
  157. func TestRewriteWithImageAndLazySrcset(t *testing.T) {
  158. description := `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`
  159. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  160. expected := `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`
  161. if expected != output {
  162. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  163. }
  164. }
  165. func TestNewLineRewriteRule(t *testing.T) {
  166. description := "A\nB\nC"
  167. output := Rewriter("https://example.org/article", description, "nl2br")
  168. expected := `A<br>B<br>C`
  169. if expected != output {
  170. t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
  171. }
  172. }
  173. func TestConvertTextLinkRewriteRule(t *testing.T) {
  174. description := "Test: http://example.org/a/b"
  175. output := Rewriter("https://example.org/article", description, "convert_text_link")
  176. expected := `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`
  177. if expected != output {
  178. t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
  179. }
  180. }
  181. func TestMediumImage(t *testing.T) {
  182. content := `
  183. <figure class="ht hu hv hw hx hy cy cz paragraph-image">
  184. <div class="hz ia ib ic aj">
  185. <div class="cy cz hs">
  186. <div class="ii s ib ij">
  187. <div class="ik il s">
  188. <div class="id ie t u v if aj bk ig ih">
  189. <img alt="Image for post" class="t u v if aj im in io" src="https://miro.medium.com/max/60/1*ephLSqSzQYLvb7faDwzRbw.jpeg?q=20" width="1280" height="720"/>
  190. </div>
  191. <img alt="Image for post" class="id ie t u v if aj c" width="1280" height="720"/>
  192. <noscript>
  193. <img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcSet="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>
  194. </noscript>
  195. </div>
  196. </div>
  197. </div>
  198. </div>
  199. </figure>
  200. `
  201. expected := `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`
  202. output := Rewriter("https://example.org/article", content, "fix_medium_images")
  203. output = strings.TrimSpace(output)
  204. if expected != output {
  205. t.Errorf(`Not expected output: %s`, output)
  206. }
  207. }
  208. func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
  209. content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`
  210. expected := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`
  211. output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
  212. output = strings.TrimSpace(output)
  213. if expected != output {
  214. t.Errorf(`Not expected output: %s`, output)
  215. }
  216. }
  217. func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
  218. content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`
  219. expected := `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`
  220. output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
  221. output = strings.TrimSpace(output)
  222. if expected != output {
  223. t.Errorf(`Not expected output: %s`, output)
  224. }
  225. }
  226. func TestRewriteReplaceCustom(t *testing.T) {
  227. content := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`
  228. expected := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`
  229. output := Rewriter("https://example.org/article", content, `replace("article/(.*).svg"|"article/$1.png")`)
  230. if expected != output {
  231. t.Errorf(`Not expected output: %s`, output)
  232. }
  233. }
  234. func TestRewriteRemoveCustom(t *testing.T) {
  235. content := `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`
  236. expected := `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`
  237. output := Rewriter("https://example.org/article", content, `remove(".spam, .ads:not(.keep)")`)
  238. if expected != output {
  239. t.Errorf(`Not expected output: %s`, output)
  240. }
  241. }
  242. func TestRewriteAddCastopodEpisode(t *testing.T) {
  243. output := Rewriter("https://podcast.demo/@demo/episodes/test", "Episode Description", `add_castopod_episode`)
  244. expected := `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`
  245. if expected != output {
  246. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  247. }
  248. }
  249. func TestRewriteBase64Decode(t *testing.T) {
  250. content := `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`
  251. expected := `This is some base64 encoded content`
  252. output := Rewriter("https://example.org/article", content, `base64_decode`)
  253. if expected != output {
  254. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  255. }
  256. }
  257. func TestRewriteBase64DecodeInHTML(t *testing.T) {
  258. content := `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`
  259. expected := `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`
  260. output := Rewriter("https://example.org/article", content, `base64_decode`)
  261. if expected != output {
  262. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  263. }
  264. }
  265. func TestRewriteBase64DecodeArgs(t *testing.T) {
  266. content := `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`
  267. expected := `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`
  268. output := Rewriter("https://example.org/article", content, `base64_decode(".base64")`)
  269. if expected != output {
  270. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  271. }
  272. }