sanitizer_test.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package sanitizer // import "miniflux.app/reader/sanitizer"
  5. import "testing"
  6. func TestValidInput(t *testing.T) {
  7. input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test">.</p>`
  8. output := Sanitize("http://example.org/", input)
  9. if input != output {
  10. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  11. }
  12. }
  13. func TestSelfClosingTags(t *testing.T) {
  14. input := `<p>This <br> is a <strong>text</strong> <br/>with an image: <img src="http://example.org/" alt="Test"/>.</p>`
  15. output := Sanitize("http://example.org/", input)
  16. if input != output {
  17. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  18. }
  19. }
  20. func TestTable(t *testing.T) {
  21. input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
  22. output := Sanitize("http://example.org/", input)
  23. if input != output {
  24. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  25. }
  26. }
  27. func TestRelativeURL(t *testing.T) {
  28. input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png"/>`
  29. expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">link is relative</a> and this image: <img src="http://example.org/folder/image.png"/>`
  30. output := Sanitize("http://example.org/", input)
  31. if expected != output {
  32. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  33. }
  34. }
  35. func TestProtocolRelativeURL(t *testing.T) {
  36. input := `This <a href="//static.example.org/index.html">link is relative</a>.`
  37. expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">link is relative</a>.`
  38. output := Sanitize("http://example.org/", input)
  39. if expected != output {
  40. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  41. }
  42. }
  43. func TestInvalidTag(t *testing.T) {
  44. input := `<p>My invalid <b>tag</b>.</p>`
  45. expected := `<p>My invalid tag.</p>`
  46. output := Sanitize("http://example.org/", input)
  47. if expected != output {
  48. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  49. }
  50. }
  51. func TestVideoTag(t *testing.T) {
  52. input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
  53. expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
  54. output := Sanitize("http://example.org/", input)
  55. if expected != output {
  56. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  57. }
  58. }
  59. func TestAudioAndSourceTag(t *testing.T) {
  60. input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
  61. expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
  62. output := Sanitize("http://example.org/", input)
  63. if expected != output {
  64. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  65. }
  66. }
  67. func TestUnknownTag(t *testing.T) {
  68. input := `<p>My invalid <unknown>tag</unknown>.</p>`
  69. expected := `<p>My invalid tag.</p>`
  70. output := Sanitize("http://example.org/", input)
  71. if expected != output {
  72. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  73. }
  74. }
  75. func TestInvalidNestedTag(t *testing.T) {
  76. input := `<p>My invalid <b>tag with some <em>valid</em> tag</b>.</p>`
  77. expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
  78. output := Sanitize("http://example.org/", input)
  79. if expected != output {
  80. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  81. }
  82. }
  83. func TestInvalidIFrame(t *testing.T) {
  84. input := `<iframe src="http://example.org/"></iframe>`
  85. expected := ``
  86. output := Sanitize("http://example.org/", input)
  87. if expected != output {
  88. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  89. }
  90. }
  91. func TestInvalidURLScheme(t *testing.T) {
  92. input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
  93. expected := `<p>This link is not valid</p>`
  94. output := Sanitize("http://example.org/", input)
  95. if expected != output {
  96. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  97. }
  98. }
  99. func TestBlacklistedLink(t *testing.T) {
  100. input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
  101. expected := `<p>This image is not valid </p>`
  102. output := Sanitize("http://example.org/", input)
  103. if expected != output {
  104. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  105. }
  106. }
  107. func TestPixelTracker(t *testing.T) {
  108. input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
  109. expected := `<p> and </p>`
  110. output := Sanitize("http://example.org/", input)
  111. if expected != output {
  112. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  113. }
  114. }
  115. func TestXmlEntities(t *testing.T) {
  116. input := `<pre>echo "test" &gt; /etc/hosts</pre>`
  117. expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
  118. output := Sanitize("http://example.org/", input)
  119. if expected != output {
  120. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  121. }
  122. }
  123. func TestEspaceAttributes(t *testing.T) {
  124. input := `<td rowspan="<b>test</b>">test</td>`
  125. expected := `<td rowspan="&lt;b&gt;test&lt;/b&gt;">test</td>`
  126. output := Sanitize("http://example.org/", input)
  127. if expected != output {
  128. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  129. }
  130. }
  131. func TestReplaceYoutubeURL(t *testing.T) {
  132. input := `<iframe src="http://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  133. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups"></iframe>`
  134. output := Sanitize("http://example.org/", input)
  135. if expected != output {
  136. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  137. }
  138. }
  139. func TestReplaceSecureYoutubeURL(t *testing.T) {
  140. input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
  141. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups"></iframe>`
  142. output := Sanitize("http://example.org/", input)
  143. if expected != output {
  144. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  145. }
  146. }
  147. func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
  148. input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&amp;controls=0"></iframe>`
  149. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups"></iframe>`
  150. output := Sanitize("http://example.org/", input)
  151. if expected != output {
  152. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  153. }
  154. }
  155. func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
  156. input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
  157. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups"></iframe>`
  158. output := Sanitize("http://example.org/", input)
  159. if expected != output {
  160. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  161. }
  162. }
  163. func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
  164. input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
  165. expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups"></iframe>`
  166. output := Sanitize("http://example.org/", input)
  167. if expected != output {
  168. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  169. }
  170. }
  171. func TestReplaceIframeURL(t *testing.T) {
  172. input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>`
  173. expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0" sandbox="allow-scripts allow-same-origin allow-popups"></iframe>`
  174. output := Sanitize("http://example.org/", input)
  175. if expected != output {
  176. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  177. }
  178. }
  179. func TestReplaceNoScript(t *testing.T) {
  180. input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test"></noscript><p>After paragraph.</p>`
  181. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  182. output := Sanitize("http://example.org/", input)
  183. if expected != output {
  184. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  185. }
  186. }
  187. func TestReplaceScript(t *testing.T) {
  188. input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
  189. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  190. output := Sanitize("http://example.org/", input)
  191. if expected != output {
  192. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  193. }
  194. }
  195. func TestReplaceStyle(t *testing.T) {
  196. input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
  197. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  198. output := Sanitize("http://example.org/", input)
  199. if expected != output {
  200. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  201. }
  202. }