rewriter_test.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rewrite // import "miniflux.app/reader/rewrite"
  5. import (
  6. "strings"
  7. "testing"
  8. )
  9. func TestReplaceTextLinks(t *testing.T) {
  10. scenarios := map[string]string{
  11. `This is a link to example.org`: `This is a link to example.org`,
  12. `This is a link to ftp://example.org`: `This is a link to ftp://example.org`,
  13. `This is a link to www.example.org`: `This is a link to www.example.org`,
  14. `This is a link to http://example.org`: `This is a link to <a href="http://example.org">http://example.org</a>`,
  15. `This is a link to http://example.org, end of sentence.`: `This is a link to <a href="http://example.org">http://example.org</a>, end of sentence.`,
  16. `This is a link to https://example.org`: `This is a link to <a href="https://example.org">https://example.org</a>`,
  17. `This is a link to https://www.example.org/path/to?q=s`: `This is a link to <a href="https://www.example.org/path/to?q=s">https://www.example.org/path/to?q=s</a>`,
  18. `This is a link to https://example.org/index#hash-tag, http://example.org/.`: `This is a link to <a href="https://example.org/index#hash-tag">https://example.org/index#hash-tag</a>, <a href="http://example.org/">http://example.org/</a>.`,
  19. }
  20. for input, expected := range scenarios {
  21. actual := replaceTextLinks(input)
  22. if actual != expected {
  23. t.Errorf(`Unexpected link replacement, got "%s" instead of "%s"`, actual, expected)
  24. }
  25. }
  26. }
  27. func TestRewriteWithNoMatchingRule(t *testing.T) {
  28. output := Rewriter("https://example.org/article", `Some text.`, ``)
  29. expected := `Some text.`
  30. if expected != output {
  31. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  32. }
  33. }
  34. func TestRewriteWithYoutubeLink(t *testing.T) {
  35. output := Rewriter("https://www.youtube.com/watch?v=1234", "Video Description", ``)
  36. expected := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`
  37. if expected != output {
  38. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  39. }
  40. }
  41. func TestRewriteWithInexistingCustomRule(t *testing.T) {
  42. output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`)
  43. expected := `Video Description`
  44. if expected != output {
  45. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  46. }
  47. }
  48. func TestRewriteWithXkcdLink(t *testing.T) {
  49. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
  50. output := Rewriter("https://xkcd.com/1912/", description, ``)
  51. expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`
  52. if expected != output {
  53. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  54. }
  55. }
  56. func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
  57. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`
  58. output := Rewriter("https://xkcd.com/1912/", description, ``)
  59. expected := `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`
  60. if expected != output {
  61. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  62. }
  63. }
  64. func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
  65. description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
  66. output := Rewriter("https://xkcd.com/1912/", description, ``)
  67. expected := description
  68. if expected != output {
  69. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  70. }
  71. }
  72. func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
  73. description := "test"
  74. output := Rewriter("https://xkcd.com/1912/", description, ``)
  75. expected := description
  76. if expected != output {
  77. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  78. }
  79. }
  80. func TestRewriteWithXkcdAndNoImage(t *testing.T) {
  81. description := "test"
  82. output := Rewriter("https://xkcd.com/1912/", description, ``)
  83. expected := description
  84. if expected != output {
  85. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  86. }
  87. }
  88. func TestRewriteMailtoLink(t *testing.T) {
  89. description := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`
  90. output := Rewriter("https://www.qwantz.com/", description, ``)
  91. expected := `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`
  92. if expected != output {
  93. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  94. }
  95. }
  96. func TestRewriteWithPDFLink(t *testing.T) {
  97. description := "test"
  98. output := Rewriter("https://example.org/document.pdf", description, ``)
  99. expected := `<a href="https://example.org/document.pdf">PDF</a><br>test`
  100. if expected != output {
  101. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  102. }
  103. }
  104. func TestRewriteWithNoLazyImage(t *testing.T) {
  105. description := `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`
  106. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  107. expected := description
  108. if expected != output {
  109. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  110. }
  111. }
  112. func TestRewriteWithLazyImage(t *testing.T) {
  113. description := `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  114. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  115. expected := `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  116. if expected != output {
  117. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  118. }
  119. }
  120. func TestRewriteWithLazyDivImage(t *testing.T) {
  121. description := `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  122. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  123. expected := `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  124. if expected != output {
  125. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  126. }
  127. }
  128. func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
  129. description := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
  130. output := Rewriter("https://example.org/article", description, "add_dynamic_image")
  131. expected := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`
  132. if expected != output {
  133. t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
  134. }
  135. }
  136. func TestNewLineRewriteRule(t *testing.T) {
  137. description := "A\nB\nC"
  138. output := Rewriter("https://example.org/article", description, "nl2br")
  139. expected := `A<br>B<br>C`
  140. if expected != output {
  141. t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
  142. }
  143. }
  144. func TestConvertTextLinkRewriteRule(t *testing.T) {
  145. description := "Test: http://example.org/a/b"
  146. output := Rewriter("https://example.org/article", description, "convert_text_link")
  147. expected := `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`
  148. if expected != output {
  149. t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
  150. }
  151. }
  152. func TestMediumImage(t *testing.T) {
  153. content := `
  154. <figure class="ht hu hv hw hx hy cy cz paragraph-image">
  155. <div class="hz ia ib ic aj">
  156. <div class="cy cz hs">
  157. <div class="ii s ib ij">
  158. <div class="ik il s">
  159. <div class="id ie t u v if aj bk ig ih">
  160. <img alt="Image for post" class="t u v if aj im in io" src="https://miro.medium.com/max/60/1*ephLSqSzQYLvb7faDwzRbw.jpeg?q=20" width="1280" height="720"/>
  161. </div>
  162. <img alt="Image for post" class="id ie t u v if aj c" width="1280" height="720"/>
  163. <noscript>
  164. <img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcSet="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>
  165. </noscript>
  166. </div>
  167. </div>
  168. </div>
  169. </div>
  170. </figure>
  171. `
  172. expected := `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`
  173. output := Rewriter("https://example.org/article", content, "fix_medium_images")
  174. output = strings.TrimSpace(output)
  175. if expected != output {
  176. t.Errorf(`Not expected output: %s`, output)
  177. }
  178. }
  179. func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
  180. content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`
  181. expected := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`
  182. output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
  183. output = strings.TrimSpace(output)
  184. if expected != output {
  185. t.Errorf(`Not expected output: %s`, output)
  186. }
  187. }
  188. func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
  189. content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`
  190. expected := `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`
  191. output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
  192. output = strings.TrimSpace(output)
  193. if expected != output {
  194. t.Errorf(`Not expected output: %s`, output)
  195. }
  196. }
  197. func TestRewriteReplaceCustom(t *testing.T) {
  198. content := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`
  199. expected := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`
  200. output := Rewriter("https://example.org/artcle", content, `replace("article/(.*).svg"|"article/$1.png")`)
  201. if expected != output {
  202. t.Errorf(`Not expected output: %s`, output)
  203. }
  204. }