sanitizer_test.go 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
  4. import (
  5. "os"
  6. "strings"
  7. "testing"
  8. "golang.org/x/net/html"
  9. "miniflux.app/v2/internal/config"
  10. )
  11. func TestMain(m *testing.M) {
  12. config.Opts = config.NewOptions()
  13. exitCode := m.Run()
  14. os.Exit(exitCode)
  15. }
  16. func BenchmarkSanitize(b *testing.B) {
  17. var testCases = map[string][]string{
  18. "miniflux_github.html": {"https://github.com/miniflux/v2", ""},
  19. "miniflux_wikipedia.html": {"https://fr.wikipedia.org/wiki/Miniflux", ""},
  20. }
  21. for filename := range testCases {
  22. data, err := os.ReadFile("testdata/" + filename)
  23. if err != nil {
  24. b.Fatalf(`Unable to read file %q: %v`, filename, err)
  25. }
  26. testCases[filename][1] = string(data)
  27. }
  28. for range b.N {
  29. for _, v := range testCases {
  30. SanitizeHTMLWithDefaultOptions(v[0], v[1])
  31. }
  32. }
  33. }
  34. func FuzzSanitizer(f *testing.F) {
  35. f.Fuzz(func(t *testing.T, orig string) {
  36. tok := html.NewTokenizer(strings.NewReader(orig))
  37. i := 0
  38. for tok.Next() != html.ErrorToken {
  39. i++
  40. }
  41. out := SanitizeHTMLWithDefaultOptions("", orig)
  42. tok = html.NewTokenizer(strings.NewReader(out))
  43. j := 0
  44. for tok.Next() != html.ErrorToken {
  45. j++
  46. }
  47. if j > i {
  48. t.Errorf("Got more html tokens in the sanitized html.")
  49. }
  50. })
  51. }
  52. func TestValidInput(t *testing.T) {
  53. input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test" loading="lazy">.</p>`
  54. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  55. if input != output {
  56. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  57. }
  58. }
  59. func TestImgWithWidthAndHeightAttribute(t *testing.T) {
  60. input := `<img src="https://example.org/image.png" width="10" height="20">`
  61. expected := `<img src="https://example.org/image.png" width="10" height="20" loading="lazy">`
  62. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  63. if output != expected {
  64. t.Errorf(`Wrong output: %s`, output)
  65. }
  66. }
  67. func TestImgWithWidthAndHeightAttributeLargerThanMinifluxLayout(t *testing.T) {
  68. input := `<img src="https://example.org/image.png" width="1200" height="675">`
  69. expected := `<img src="https://example.org/image.png" loading="lazy">`
  70. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  71. if output != expected {
  72. t.Errorf(`Wrong output: %s`, output)
  73. }
  74. }
  75. func TestImgWithIncorrectWidthAndHeightAttribute(t *testing.T) {
  76. input := `<img src="https://example.org/image.png" width="10px" height="20px">`
  77. expected := `<img src="https://example.org/image.png" loading="lazy">`
  78. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  79. if output != expected {
  80. t.Errorf(`Wrong output: %s`, output)
  81. }
  82. }
  83. func TestImgWithEmptywidthAndHeightAttribute(t *testing.T) {
  84. input := `<img src="https://example.org/image.png" width="" height="">`
  85. expected := `<img src="https://example.org/image.png" loading="lazy">`
  86. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  87. if output != expected {
  88. t.Errorf(`Wrong output: %s`, output)
  89. }
  90. }
  91. func TestImgWithTextDataURL(t *testing.T) {
  92. input := `<img src="data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" alt="Example">`
  93. expected := ``
  94. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  95. if output != expected {
  96. t.Errorf(`Wrong output: %s`, output)
  97. }
  98. }
  99. func TestImgWithDataURL(t *testing.T) {
  100. input := `<img src="data:image/gif;base64,test" alt="Example">`
  101. expected := `<img src="data:image/gif;base64,test" alt="Example" loading="lazy">`
  102. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  103. if output != expected {
  104. t.Errorf(`Wrong output: %s`, output)
  105. }
  106. }
  107. func TestImgWithSrcsetAttribute(t *testing.T) {
  108. input := `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`
  109. expected := `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`
  110. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  111. if output != expected {
  112. t.Errorf(`Wrong output: %s`, output)
  113. }
  114. }
  115. func TestImgWithSrcsetAndNoSrcAttribute(t *testing.T) {
  116. input := `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" alt="Example">`
  117. expected := `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" alt="Example" loading="lazy">`
  118. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  119. if output != expected {
  120. t.Errorf(`Wrong output: %s`, output)
  121. }
  122. }
  123. func TestImgWithFetchPriorityAttribute(t *testing.T) {
  124. cases := []struct {
  125. input string
  126. expected string
  127. }{
  128. {
  129. `<img src="https://example.org/image.png" fetchpriority="high">`,
  130. `<img src="https://example.org/image.png" fetchpriority="high" loading="lazy">`,
  131. },
  132. {
  133. `<img src="https://example.org/image.png" fetchpriority="low">`,
  134. `<img src="https://example.org/image.png" fetchpriority="low" loading="lazy">`,
  135. },
  136. {
  137. `<img src="https://example.org/image.png" fetchpriority="auto">`,
  138. `<img src="https://example.org/image.png" fetchpriority="auto" loading="lazy">`,
  139. },
  140. }
  141. for _, tc := range cases {
  142. output := SanitizeHTMLWithDefaultOptions("http://example.org/", tc.input)
  143. if output != tc.expected {
  144. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  145. }
  146. }
  147. }
  148. func TestImgWithInvalidFetchPriorityAttribute(t *testing.T) {
  149. input := `<img src="https://example.org/image.png" fetchpriority="invalid">`
  150. expected := `<img src="https://example.org/image.png" loading="lazy">`
  151. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  152. if output != expected {
  153. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  154. }
  155. }
  156. func TestNonImgWithFetchPriorityAttribute(t *testing.T) {
  157. input := `<p fetchpriority="high">Text</p>`
  158. expected := `<p>Text</p>`
  159. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  160. if output != expected {
  161. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  162. }
  163. }
  164. func TestImgWithDecodingAttribute(t *testing.T) {
  165. cases := []struct {
  166. input string
  167. expected string
  168. }{
  169. {
  170. `<img src="https://example.org/image.png" decoding="sync">`,
  171. `<img src="https://example.org/image.png" decoding="sync" loading="lazy">`,
  172. },
  173. {
  174. `<img src="https://example.org/image.png" decoding="async">`,
  175. `<img src="https://example.org/image.png" decoding="async" loading="lazy">`,
  176. },
  177. {
  178. `<img src="https://example.org/image.png" decoding="auto">`,
  179. `<img src="https://example.org/image.png" decoding="auto" loading="lazy">`,
  180. },
  181. }
  182. for _, tc := range cases {
  183. output := SanitizeHTMLWithDefaultOptions("http://example.org/", tc.input)
  184. if output != tc.expected {
  185. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  186. }
  187. }
  188. }
  189. func TestImgWithInvalidDecodingAttribute(t *testing.T) {
  190. input := `<img src="https://example.org/image.png" decoding="invalid">`
  191. expected := `<img src="https://example.org/image.png" loading="lazy">`
  192. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  193. if output != expected {
  194. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  195. }
  196. }
  197. func TestNonImgWithDecodingAttribute(t *testing.T) {
  198. input := `<p decoding="async">Text</p>`
  199. expected := `<p>Text</p>`
  200. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  201. if output != expected {
  202. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  203. }
  204. }
  205. func TestSourceWithSrcsetAndMedia(t *testing.T) {
  206. input := `<picture><source media="(min-width: 800px)" srcset="elva-800w.jpg"></picture>`
  207. expected := `<picture><source media="(min-width: 800px)" srcset="http://example.org/elva-800w.jpg"></picture>`
  208. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  209. if output != expected {
  210. t.Errorf(`Wrong output: %s`, output)
  211. }
  212. }
  213. func TestMediumImgWithSrcset(t *testing.T) {
  214. input := `<img alt="Image for post" class="t u v ef aj" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407">`
  215. expected := `<img alt="Image for post" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" loading="lazy">`
  216. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  217. if output != expected {
  218. t.Errorf(`Wrong output: %s`, output)
  219. }
  220. }
  221. func TestSelfClosingTags(t *testing.T) {
  222. input := `<p>This <br> is a <strong>text</strong> <br/>with an image: <img src="http://example.org/" alt="Test" loading="lazy"/>.</p>`
  223. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  224. if input != output {
  225. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  226. }
  227. }
  228. func TestTable(t *testing.T) {
  229. input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
  230. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  231. if input != output {
  232. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  233. }
  234. }
  235. func TestRelativeURL(t *testing.T) {
  236. input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png"/>`
  237. expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a> and this image: <img src="http://example.org/folder/image.png" loading="lazy"/>`
  238. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  239. if expected != output {
  240. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  241. }
  242. }
  243. func TestProtocolRelativeURL(t *testing.T) {
  244. input := `This <a href="//static.example.org/index.html">link is relative</a>.`
  245. expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a>.`
  246. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  247. if expected != output {
  248. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  249. }
  250. }
  251. func TestInvalidTag(t *testing.T) {
  252. input := `<p>My invalid <z>tag</z>.</p>`
  253. expected := `<p>My invalid tag.</p>`
  254. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  255. if expected != output {
  256. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  257. }
  258. }
  259. func TestVideoTag(t *testing.T) {
  260. input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
  261. expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
  262. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  263. if expected != output {
  264. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  265. }
  266. }
  267. func TestAudioAndSourceTag(t *testing.T) {
  268. input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
  269. expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
  270. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  271. if expected != output {
  272. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  273. }
  274. }
  275. func TestUnknownTag(t *testing.T) {
  276. input := `<p>My invalid <unknown>tag</unknown>.</p>`
  277. expected := `<p>My invalid tag.</p>`
  278. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  279. if expected != output {
  280. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  281. }
  282. }
  283. func TestInvalidNestedTag(t *testing.T) {
  284. input := `<p>My invalid <z>tag with some <em>valid</em> tag</z>.</p>`
  285. expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
  286. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  287. if expected != output {
  288. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  289. }
  290. }
  291. func TestInvalidIFrame(t *testing.T) {
  292. input := `<iframe src="http://example.org/"></iframe>`
  293. expected := ``
  294. output := SanitizeHTMLWithDefaultOptions("http://example.com/", input)
  295. if expected != output {
  296. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  297. }
  298. }
  299. func TestIFrameWithChildElements(t *testing.T) {
  300. input := `<iframe src="https://www.youtube.com/"><p>test</p></iframe>`
  301. expected := `<iframe src="https://www.youtube.com/" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  302. output := SanitizeHTMLWithDefaultOptions("http://example.com/", input)
  303. if expected != output {
  304. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  305. }
  306. }
  307. func TestLinkWithTarget(t *testing.T) {
  308. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  309. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">an anchor</a></p>`
  310. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: true})
  311. if expected != output {
  312. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  313. }
  314. }
  315. func TestLinkWithNoTarget(t *testing.T) {
  316. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  317. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer">an anchor</a></p>`
  318. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: false})
  319. if expected != output {
  320. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  321. }
  322. }
  323. func TestAnchorLink(t *testing.T) {
  324. input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  325. expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  326. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  327. if expected != output {
  328. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  329. }
  330. }
  331. func TestInvalidURLScheme(t *testing.T) {
  332. input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
  333. expected := `<p>This link is not valid</p>`
  334. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  335. if expected != output {
  336. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  337. }
  338. }
  339. func TestAPTURIScheme(t *testing.T) {
  340. input := `<p>This link is <a href="apt:some-package?channel=test">valid</a></p>`
  341. expected := `<p>This link is <a href="apt:some-package?channel=test" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  342. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  343. if expected != output {
  344. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  345. }
  346. }
  347. func TestBitcoinURIScheme(t *testing.T) {
  348. input := `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W">valid</a></p>`
  349. expected := `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  350. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  351. if expected != output {
  352. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  353. }
  354. }
  355. func TestCallToURIScheme(t *testing.T) {
  356. input := `<p>This link is <a href="callto:12345679">valid</a></p>`
  357. expected := `<p>This link is <a href="callto:12345679" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  358. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  359. if expected != output {
  360. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  361. }
  362. }
  363. func TestFeedURIScheme(t *testing.T) {
  364. input := `<p>This link is <a href="feed://example.com/rss.xml">valid</a></p>`
  365. expected := `<p>This link is <a href="feed://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  366. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  367. if expected != output {
  368. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  369. }
  370. input = `<p>This link is <a href="feed:https://example.com/rss.xml">valid</a></p>`
  371. expected = `<p>This link is <a href="feed:https://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  372. output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  373. if expected != output {
  374. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  375. }
  376. }
  377. func TestGeoURIScheme(t *testing.T) {
  378. input := `<p>This link is <a href="geo:13.4125,103.8667">valid</a></p>`
  379. expected := `<p>This link is <a href="geo:13.4125,103.8667" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  380. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  381. if expected != output {
  382. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  383. }
  384. }
  385. func TestItunesURIScheme(t *testing.T) {
  386. input := `<p>This link is <a href="itms://itunes.com/apps/my-app-name">valid</a></p>`
  387. expected := `<p>This link is <a href="itms://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  388. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  389. if expected != output {
  390. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  391. }
  392. input = `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name">valid</a></p>`
  393. expected = `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  394. output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  395. if expected != output {
  396. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  397. }
  398. }
  399. func TestMagnetURIScheme(t *testing.T) {
  400. input := `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7">valid</a></p>`
  401. expected := `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  402. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  403. if expected != output {
  404. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  405. }
  406. }
  407. func TestMailtoURIScheme(t *testing.T) {
  408. input := `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A">valid</a></p>`
  409. expected := `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  410. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  411. if expected != output {
  412. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  413. }
  414. }
  415. func TestNewsURIScheme(t *testing.T) {
  416. input := `<p>This link is <a href="news://news.server.example/*">valid</a></p>`
  417. expected := `<p>This link is <a href="news://news.server.example/*" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  418. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  419. if expected != output {
  420. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  421. }
  422. input = `<p>This link is <a href="news:example.group.this">valid</a></p>`
  423. expected = `<p>This link is <a href="news:example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  424. output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  425. if expected != output {
  426. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  427. }
  428. input = `<p>This link is <a href="nntp://news.server.example/example.group.this">valid</a></p>`
  429. expected = `<p>This link is <a href="nntp://news.server.example/example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  430. output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  431. if expected != output {
  432. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  433. }
  434. }
  435. func TestRTMPURIScheme(t *testing.T) {
  436. input := `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov">valid</a></p>`
  437. expected := `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  438. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  439. if expected != output {
  440. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  441. }
  442. }
  443. func TestSIPURIScheme(t *testing.T) {
  444. input := `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone">valid</a></p>`
  445. expected := `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  446. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  447. if expected != output {
  448. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  449. }
  450. input = `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent">valid</a></p>`
  451. expected = `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  452. output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  453. if expected != output {
  454. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  455. }
  456. }
  457. func TestSkypeURIScheme(t *testing.T) {
  458. input := `<p>This link is <a href="skype:echo123?call">valid</a></p>`
  459. expected := `<p>This link is <a href="skype:echo123?call" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  460. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  461. if expected != output {
  462. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  463. }
  464. }
  465. func TestSpotifyURIScheme(t *testing.T) {
  466. input := `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx">valid</a></p>`
  467. expected := `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  468. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  469. if expected != output {
  470. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  471. }
  472. }
  473. func TestSteamURIScheme(t *testing.T) {
  474. input := `<p>This link is <a href="steam://settings/account">valid</a></p>`
  475. expected := `<p>This link is <a href="steam://settings/account" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  476. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  477. if expected != output {
  478. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  479. }
  480. }
  481. func TestSubversionURIScheme(t *testing.T) {
  482. input := `<p>This link is <a href="svn://example.org">valid</a></p>`
  483. expected := `<p>This link is <a href="svn://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  484. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  485. if expected != output {
  486. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  487. }
  488. input = `<p>This link is <a href="svn+ssh://example.org">valid</a></p>`
  489. expected = `<p>This link is <a href="svn+ssh://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  490. output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  491. if expected != output {
  492. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  493. }
  494. }
  495. func TestTelURIScheme(t *testing.T) {
  496. input := `<p>This link is <a href="tel:+1-201-555-0123">valid</a></p>`
  497. expected := `<p>This link is <a href="tel:+1-201-555-0123" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  498. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  499. if expected != output {
  500. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  501. }
  502. }
  503. func TestWebcalURIScheme(t *testing.T) {
  504. input := `<p>This link is <a href="webcal://example.com/calendar.ics">valid</a></p>`
  505. expected := `<p>This link is <a href="webcal://example.com/calendar.ics" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  506. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  507. if expected != output {
  508. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  509. }
  510. }
  511. func TestXMPPURIScheme(t *testing.T) {
  512. input := `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed">valid</a></p>`
  513. expected := `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  514. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  515. if expected != output {
  516. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  517. }
  518. }
  519. func TestBlacklistedLink(t *testing.T) {
  520. input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
  521. expected := `<p>This image is not valid </p>`
  522. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  523. if expected != output {
  524. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  525. }
  526. }
  527. func TestLinkWithTrackers(t *testing.T) {
  528. input := `<p>This link has trackers <a href="https://example.com/page?utm_source=newsletter">Test</a></p>`
  529. expected := `<p>This link has trackers <a href="https://example.com/page" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">Test</a></p>`
  530. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  531. if expected != output {
  532. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  533. }
  534. }
  535. func TestImageSrcWithTrackers(t *testing.T) {
  536. input := `<p>This image has trackers <img src="https://example.org/?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123"></p>`
  537. expected := `<p>This image has trackers <img src="https://example.org/?id=123" loading="lazy"></p>`
  538. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  539. if expected != output {
  540. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  541. }
  542. }
  543. func TestPixelTracker(t *testing.T) {
  544. input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
  545. expected := `<p> and </p>`
  546. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  547. if expected != output {
  548. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  549. }
  550. }
  551. func TestXmlEntities(t *testing.T) {
  552. input := `<pre>echo "test" &gt; /etc/hosts</pre>`
  553. expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
  554. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  555. if expected != output {
  556. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  557. }
  558. }
  559. func TestEspaceAttributes(t *testing.T) {
  560. input := `<td rowspan="<b>test</b>">test</td>`
  561. expected := `<td rowspan="&lt;b&gt;test&lt;/b&gt;">test</td>`
  562. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  563. if expected != output {
  564. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  565. }
  566. }
  567. func TestReplaceYoutubeURL(t *testing.T) {
  568. input := `<iframe src="http://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  569. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  570. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  571. if expected != output {
  572. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  573. }
  574. }
  575. func TestReplaceSecureYoutubeURL(t *testing.T) {
  576. input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
  577. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  578. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  579. if expected != output {
  580. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  581. }
  582. }
  583. func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
  584. input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&amp;controls=0"></iframe>`
  585. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  586. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  587. if expected != output {
  588. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  589. }
  590. }
  591. func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
  592. input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
  593. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  594. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  595. if expected != output {
  596. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  597. }
  598. }
  599. func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
  600. input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
  601. expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  602. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  603. if expected != output {
  604. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  605. }
  606. }
  607. func TestReplaceYoutubeURLWithCustomURL(t *testing.T) {
  608. os.Clearenv()
  609. os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
  610. var err error
  611. parser := config.NewParser()
  612. config.Opts, err = parser.ParseEnvironmentVariables()
  613. if err != nil {
  614. t.Fatalf(`Parsing failure: %v`, err)
  615. }
  616. input := `<iframe src="https://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  617. expected := `<iframe src="https://invidious.custom/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  618. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  619. if expected != output {
  620. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  621. }
  622. }
  623. func TestReplaceIframeVimedoDNTURL(t *testing.T) {
  624. input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>`
  625. expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0&amp;dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  626. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  627. if expected != output {
  628. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  629. }
  630. }
  631. func TestReplaceNoScript(t *testing.T) {
  632. input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test" loading="lazy"></noscript><p>After paragraph.</p>`
  633. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  634. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  635. if expected != output {
  636. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  637. }
  638. }
  639. func TestReplaceScript(t *testing.T) {
  640. input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
  641. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  642. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  643. if expected != output {
  644. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  645. }
  646. }
  647. func TestReplaceStyle(t *testing.T) {
  648. input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
  649. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  650. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  651. if expected != output {
  652. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  653. }
  654. }
  655. func TestHiddenParagraph(t *testing.T) {
  656. input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>`
  657. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  658. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  659. if expected != output {
  660. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  661. }
  662. }
  663. func TestAttributesAreStripped(t *testing.T) {
  664. input := `<p style="color: red;">Some text.<hr style="color: blue"/>Test.</p>`
  665. expected := `<p>Some text.<hr/>Test.</p>`
  666. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  667. if expected != output {
  668. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  669. }
  670. }
  671. func TestMathML(t *testing.T) {
  672. input := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  673. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  674. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  675. if expected != output {
  676. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  677. }
  678. }
  679. func TestInvalidMathMLXMLNamespace(t *testing.T) {
  680. input := `<math xmlns="http://example.org"><msup><mi>x</mi><mn>2</mn></msup></math>`
  681. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  682. output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
  683. if expected != output {
  684. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  685. }
  686. }