sanitizer_test.go 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
  4. import (
  5. "os"
  6. "strings"
  7. "testing"
  8. "golang.org/x/net/html"
  9. "miniflux.app/v2/internal/config"
  10. )
  11. func sanitizeHTMLWithDefaultOptions(baseURL, rawHTML string) string {
  12. return SanitizeHTML(baseURL, rawHTML, &SanitizerOptions{
  13. OpenLinksInNewTab: true,
  14. })
  15. }
  16. func BenchmarkSanitize(b *testing.B) {
  17. var testCases = map[string][]string{
  18. "miniflux_github.html": {"https://github.com/miniflux/v2", ""},
  19. "miniflux_wikipedia.html": {"https://fr.wikipedia.org/wiki/Miniflux", ""},
  20. }
  21. for filename := range testCases {
  22. data, err := os.ReadFile("testdata/" + filename)
  23. if err != nil {
  24. b.Fatalf(`Unable to read file %q: %v`, filename, err)
  25. }
  26. testCases[filename][1] = string(data)
  27. }
  28. for b.Loop() {
  29. for _, v := range testCases {
  30. sanitizeHTMLWithDefaultOptions(v[0], v[1])
  31. }
  32. }
  33. }
  34. func FuzzSanitizer(f *testing.F) {
  35. f.Fuzz(func(t *testing.T, orig string) {
  36. tok := html.NewTokenizer(strings.NewReader(orig))
  37. i := 0
  38. for tok.Next() != html.ErrorToken {
  39. i++
  40. }
  41. out := sanitizeHTMLWithDefaultOptions("", orig)
  42. tok = html.NewTokenizer(strings.NewReader(out))
  43. j := 0
  44. for tok.Next() != html.ErrorToken {
  45. j++
  46. }
  47. if j > i {
  48. t.Errorf("Got more html tokens in the sanitized html.")
  49. }
  50. })
  51. }
  52. func TestValidInput(t *testing.T) {
  53. input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test" loading="lazy">.</p>`
  54. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  55. if input != output {
  56. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  57. }
  58. }
  59. func TestImgWithWidthAndHeightAttribute(t *testing.T) {
  60. input := `<img src="https://example.org/image.png" width="10" height="20">`
  61. expected := `<img src="https://example.org/image.png" width="10" height="20" loading="lazy">`
  62. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  63. if output != expected {
  64. t.Errorf(`Wrong output: %s`, output)
  65. }
  66. }
  67. func TestImgWithWidthAttributeLargerThanMinifluxLayout(t *testing.T) {
  68. input := `<img src="https://example.org/image.png" width="1200" height="675">`
  69. expected := `<img src="https://example.org/image.png" loading="lazy">`
  70. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  71. if output != expected {
  72. t.Errorf(`Wrong output: %s`, output)
  73. }
  74. }
  75. func TestImgWithIncorrectWidthAndHeightAttribute(t *testing.T) {
  76. input := `<img src="https://example.org/image.png" width="10px" height="20px">`
  77. expected := `<img src="https://example.org/image.png" loading="lazy">`
  78. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  79. if output != expected {
  80. t.Errorf(`Wrong output: %s`, output)
  81. }
  82. }
  83. func TestImgWithIncorrectWidthAttribute(t *testing.T) {
  84. input := `<img src="https://example.org/image.png" width="10px" height="20">`
  85. expected := `<img src="https://example.org/image.png" height="20" loading="lazy">`
  86. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  87. if output != expected {
  88. t.Errorf(`Wrong output: %s`, output)
  89. }
  90. }
  91. func TestImgWithEmptyWidthAndHeightAttribute(t *testing.T) {
  92. input := `<img src="https://example.org/image.png" width="" height="">`
  93. expected := `<img src="https://example.org/image.png" loading="lazy">`
  94. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  95. if output != expected {
  96. t.Errorf(`Wrong output: %s`, output)
  97. }
  98. }
  99. func TestImgWithIncorrectHeightAttribute(t *testing.T) {
  100. input := `<img src="https://example.org/image.png" width="10" height="20px">`
  101. expected := `<img src="https://example.org/image.png" width="10" loading="lazy">`
  102. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  103. if output != expected {
  104. t.Errorf(`Wrong output: %s`, output)
  105. }
  106. }
  107. func TestImgWithNegativeWidthAttribute(t *testing.T) {
  108. input := `<img src="https://example.org/image.png" width="-10" height="20">`
  109. expected := `<img src="https://example.org/image.png" height="20" loading="lazy">`
  110. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  111. if output != expected {
  112. t.Errorf(`Wrong output: %s`, output)
  113. }
  114. }
  115. func TestImgWithNegativeHeightAttribute(t *testing.T) {
  116. input := `<img src="https://example.org/image.png" width="10" height="-20">`
  117. expected := `<img src="https://example.org/image.png" width="10" loading="lazy">`
  118. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  119. if output != expected {
  120. t.Errorf(`Wrong output: %s`, output)
  121. }
  122. }
  123. func TestImgWithTextDataURL(t *testing.T) {
  124. input := `<img src="data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" alt="Example">`
  125. expected := ``
  126. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  127. if output != expected {
  128. t.Errorf(`Wrong output: %s`, output)
  129. }
  130. }
  131. func TestImgWithDataURL(t *testing.T) {
  132. input := `<img src="data:image/gif;base64,test" alt="Example">`
  133. expected := `<img src="data:image/gif;base64,test" alt="Example" loading="lazy">`
  134. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  135. if output != expected {
  136. t.Errorf(`Wrong output: %s`, output)
  137. }
  138. }
  139. func TestImgWithSrcsetAttribute(t *testing.T) {
  140. input := `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`
  141. expected := `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`
  142. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  143. if output != expected {
  144. t.Errorf(`Wrong output: %s`, output)
  145. }
  146. }
  147. func TestImgWithSrcsetAndNoSrcAttribute(t *testing.T) {
  148. input := `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" alt="Example">`
  149. expected := `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" alt="Example" loading="lazy">`
  150. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  151. if output != expected {
  152. t.Errorf(`Wrong output: %s`, output)
  153. }
  154. }
  155. func TestImgWithFetchPriorityAttribute(t *testing.T) {
  156. cases := []struct {
  157. input string
  158. expected string
  159. }{
  160. {
  161. `<img src="https://example.org/image.png" fetchpriority="high">`,
  162. `<img src="https://example.org/image.png" fetchpriority="high" loading="lazy">`,
  163. },
  164. {
  165. `<img src="https://example.org/image.png" fetchpriority="low">`,
  166. `<img src="https://example.org/image.png" fetchpriority="low" loading="lazy">`,
  167. },
  168. {
  169. `<img src="https://example.org/image.png" fetchpriority="auto">`,
  170. `<img src="https://example.org/image.png" fetchpriority="auto" loading="lazy">`,
  171. },
  172. }
  173. for _, tc := range cases {
  174. output := sanitizeHTMLWithDefaultOptions("http://example.org/", tc.input)
  175. if output != tc.expected {
  176. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  177. }
  178. }
  179. }
  180. func TestImgWithInvalidFetchPriorityAttribute(t *testing.T) {
  181. input := `<img src="https://example.org/image.png" fetchpriority="invalid">`
  182. expected := `<img src="https://example.org/image.png" loading="lazy">`
  183. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  184. if output != expected {
  185. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  186. }
  187. }
  188. func TestNonImgWithFetchPriorityAttribute(t *testing.T) {
  189. input := `<p fetchpriority="high">Text</p>`
  190. expected := `<p>Text</p>`
  191. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  192. if output != expected {
  193. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  194. }
  195. }
  196. func TestImgWithDecodingAttribute(t *testing.T) {
  197. cases := []struct {
  198. input string
  199. expected string
  200. }{
  201. {
  202. `<img src="https://example.org/image.png" decoding="sync">`,
  203. `<img src="https://example.org/image.png" decoding="sync" loading="lazy">`,
  204. },
  205. {
  206. `<img src="https://example.org/image.png" decoding="async">`,
  207. `<img src="https://example.org/image.png" decoding="async" loading="lazy">`,
  208. },
  209. {
  210. `<img src="https://example.org/image.png" decoding="auto">`,
  211. `<img src="https://example.org/image.png" decoding="auto" loading="lazy">`,
  212. },
  213. }
  214. for _, tc := range cases {
  215. output := sanitizeHTMLWithDefaultOptions("http://example.org/", tc.input)
  216. if output != tc.expected {
  217. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  218. }
  219. }
  220. }
  221. func TestImgWithInvalidDecodingAttribute(t *testing.T) {
  222. input := `<img src="https://example.org/image.png" decoding="invalid">`
  223. expected := `<img src="https://example.org/image.png" loading="lazy">`
  224. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  225. if output != expected {
  226. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  227. }
  228. }
  229. func TestNonImgWithDecodingAttribute(t *testing.T) {
  230. input := `<p decoding="async">Text</p>`
  231. expected := `<p>Text</p>`
  232. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  233. if output != expected {
  234. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  235. }
  236. }
  237. func TestSourceWithSrcsetAndMedia(t *testing.T) {
  238. input := `<picture><source media="(min-width: 800px)" srcset="elva-800w.jpg"></picture>`
  239. expected := `<picture><source media="(min-width: 800px)" srcset="http://example.org/elva-800w.jpg"></picture>`
  240. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  241. if output != expected {
  242. t.Errorf(`Wrong output: %s`, output)
  243. }
  244. }
  245. func TestMediumImgWithSrcset(t *testing.T) {
  246. input := `<img alt="Image for post" class="t u v ef aj" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407">`
  247. expected := `<img alt="Image for post" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" loading="lazy">`
  248. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  249. if output != expected {
  250. t.Errorf(`Wrong output: %s`, output)
  251. }
  252. }
  253. func TestSelfClosingTags(t *testing.T) {
  254. input := `<p>This <br> is a <strong>text</strong> <br/>with an image: <img src="http://example.org/" alt="Test" loading="lazy"/>.</p>`
  255. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  256. if input != output {
  257. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  258. }
  259. }
  260. func TestTable(t *testing.T) {
  261. input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
  262. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  263. if input != output {
  264. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  265. }
  266. }
  267. func TestRelativeURL(t *testing.T) {
  268. input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png"/>`
  269. expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a> and this image: <img src="http://example.org/folder/image.png" loading="lazy"/>`
  270. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  271. if expected != output {
  272. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  273. }
  274. }
  275. func TestProtocolRelativeURL(t *testing.T) {
  276. input := `This <a href="//static.example.org/index.html">link is relative</a>.`
  277. expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a>.`
  278. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  279. if expected != output {
  280. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  281. }
  282. }
  283. func TestInvalidTag(t *testing.T) {
  284. input := `<p>My invalid <z>tag</z>.</p>`
  285. expected := `<p>My invalid tag.</p>`
  286. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  287. if expected != output {
  288. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  289. }
  290. }
  291. func TestVideoTag(t *testing.T) {
  292. input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
  293. expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
  294. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  295. if expected != output {
  296. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  297. }
  298. }
  299. func TestAudioAndSourceTag(t *testing.T) {
  300. input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
  301. expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
  302. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  303. if expected != output {
  304. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  305. }
  306. }
  307. func TestUnknownTag(t *testing.T) {
  308. input := `<p>My invalid <unknown>tag</unknown>.</p>`
  309. expected := `<p>My invalid tag.</p>`
  310. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  311. if expected != output {
  312. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  313. }
  314. }
  315. func TestInvalidNestedTag(t *testing.T) {
  316. input := `<p>My invalid <z>tag with some <em>valid</em> tag</z>.</p>`
  317. expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
  318. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  319. if expected != output {
  320. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  321. }
  322. }
  323. func TestInvalidIFrame(t *testing.T) {
  324. config.Opts = config.NewConfigOptions()
  325. input := `<iframe src="http://example.org/"></iframe>`
  326. expected := ``
  327. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  328. if expected != output {
  329. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  330. }
  331. }
  332. func TestSameDomainIFrame(t *testing.T) {
  333. config.Opts = config.NewConfigOptions()
  334. input := `<iframe src="http://example.com/test"></iframe>`
  335. expected := ``
  336. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  337. if expected != output {
  338. t.Errorf(`Wrong output: %q != %q`, expected, output)
  339. }
  340. }
  341. func TestInvidiousIFrame(t *testing.T) {
  342. config.Opts = config.NewConfigOptions()
  343. input := `<iframe src="https://yewtu.be/watch?v=video_id"></iframe>`
  344. expected := `<iframe src="https://yewtu.be/watch?v=video_id" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  345. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  346. if expected != output {
  347. t.Errorf(`Wrong output: %q != %q`, expected, output)
  348. }
  349. }
  350. func TestCustomYoutubeEmbedURL(t *testing.T) {
  351. os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://www.invidious.custom/embed/")
  352. defer os.Clearenv()
  353. var err error
  354. if config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables(); err != nil {
  355. t.Fatalf(`Parsing failure: %v`, err)
  356. }
  357. input := `<iframe src="https://www.invidious.custom/embed/1234"></iframe>`
  358. expected := `<iframe src="https://www.invidious.custom/embed/1234" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  359. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  360. if expected != output {
  361. t.Errorf(`Wrong output: %q != %q`, expected, output)
  362. }
  363. }
  364. func TestIFrameWithChildElements(t *testing.T) {
  365. config.Opts = config.NewConfigOptions()
  366. input := `<iframe src="https://www.youtube.com/"><p>test</p></iframe>`
  367. expected := `<iframe src="https://www.youtube.com/" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  368. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  369. if expected != output {
  370. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  371. }
  372. }
  373. func TestIFrameWithReferrerPolicy(t *testing.T) {
  374. config.Opts = config.NewConfigOptions()
  375. input := `<iframe src="https://www.youtube.com/embed/test123" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  376. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  377. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  378. if expected != output {
  379. t.Errorf(`Wrong output: %q != %q`, expected, output)
  380. }
  381. }
  382. func TestLinkWithTarget(t *testing.T) {
  383. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  384. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">an anchor</a></p>`
  385. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: true})
  386. if expected != output {
  387. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  388. }
  389. }
  390. func TestLinkWithNoTarget(t *testing.T) {
  391. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  392. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer">an anchor</a></p>`
  393. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: false})
  394. if expected != output {
  395. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  396. }
  397. }
  398. func TestAnchorLink(t *testing.T) {
  399. input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  400. expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  401. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  402. if expected != output {
  403. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  404. }
  405. }
  406. func TestInvalidURLScheme(t *testing.T) {
  407. input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
  408. expected := `<p>This link is not valid</p>`
  409. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  410. if expected != output {
  411. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  412. }
  413. }
  414. func TestAPTURIScheme(t *testing.T) {
  415. input := `<p>This link is <a href="apt:some-package?channel=test">valid</a></p>`
  416. expected := `<p>This link is <a href="apt:some-package?channel=test" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  417. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  418. if expected != output {
  419. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  420. }
  421. }
  422. func TestBitcoinURIScheme(t *testing.T) {
  423. input := `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W">valid</a></p>`
  424. expected := `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  425. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  426. if expected != output {
  427. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  428. }
  429. }
  430. func TestCallToURIScheme(t *testing.T) {
  431. input := `<p>This link is <a href="callto:12345679">valid</a></p>`
  432. expected := `<p>This link is <a href="callto:12345679" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  433. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  434. if expected != output {
  435. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  436. }
  437. }
  438. func TestFeedURIScheme(t *testing.T) {
  439. input := `<p>This link is <a href="feed://example.com/rss.xml">valid</a></p>`
  440. expected := `<p>This link is <a href="feed://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  441. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  442. if expected != output {
  443. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  444. }
  445. input = `<p>This link is <a href="feed:https://example.com/rss.xml">valid</a></p>`
  446. expected = `<p>This link is <a href="feed:https://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  447. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  448. if expected != output {
  449. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  450. }
  451. }
  452. func TestGeoURIScheme(t *testing.T) {
  453. input := `<p>This link is <a href="geo:13.4125,103.8667">valid</a></p>`
  454. expected := `<p>This link is <a href="geo:13.4125,103.8667" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  455. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  456. if expected != output {
  457. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  458. }
  459. }
  460. func TestItunesURIScheme(t *testing.T) {
  461. input := `<p>This link is <a href="itms://itunes.com/apps/my-app-name">valid</a></p>`
  462. expected := `<p>This link is <a href="itms://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  463. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  464. if expected != output {
  465. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  466. }
  467. input = `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name">valid</a></p>`
  468. expected = `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  469. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  470. if expected != output {
  471. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  472. }
  473. }
  474. func TestMagnetURIScheme(t *testing.T) {
  475. input := `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7">valid</a></p>`
  476. expected := `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  477. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  478. if expected != output {
  479. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  480. }
  481. }
  482. func TestMailtoURIScheme(t *testing.T) {
  483. input := `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A">valid</a></p>`
  484. expected := `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  485. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  486. if expected != output {
  487. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  488. }
  489. }
  490. func TestNewsURIScheme(t *testing.T) {
  491. input := `<p>This link is <a href="news://news.server.example/*">valid</a></p>`
  492. expected := `<p>This link is <a href="news://news.server.example/*" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  493. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  494. if expected != output {
  495. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  496. }
  497. input = `<p>This link is <a href="news:example.group.this">valid</a></p>`
  498. expected = `<p>This link is <a href="news:example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  499. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  500. if expected != output {
  501. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  502. }
  503. input = `<p>This link is <a href="nntp://news.server.example/example.group.this">valid</a></p>`
  504. expected = `<p>This link is <a href="nntp://news.server.example/example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  505. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  506. if expected != output {
  507. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  508. }
  509. }
  510. func TestRTMPURIScheme(t *testing.T) {
  511. input := `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov">valid</a></p>`
  512. expected := `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  513. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  514. if expected != output {
  515. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  516. }
  517. }
  518. func TestSIPURIScheme(t *testing.T) {
  519. input := `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone">valid</a></p>`
  520. expected := `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  521. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  522. if expected != output {
  523. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  524. }
  525. input = `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent">valid</a></p>`
  526. expected = `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  527. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  528. if expected != output {
  529. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  530. }
  531. }
  532. func TestSkypeURIScheme(t *testing.T) {
  533. input := `<p>This link is <a href="skype:echo123?call">valid</a></p>`
  534. expected := `<p>This link is <a href="skype:echo123?call" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  535. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  536. if expected != output {
  537. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  538. }
  539. }
  540. func TestSpotifyURIScheme(t *testing.T) {
  541. input := `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx">valid</a></p>`
  542. expected := `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  543. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  544. if expected != output {
  545. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  546. }
  547. }
  548. func TestSteamURIScheme(t *testing.T) {
  549. input := `<p>This link is <a href="steam://settings/account">valid</a></p>`
  550. expected := `<p>This link is <a href="steam://settings/account" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  551. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  552. if expected != output {
  553. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  554. }
  555. }
  556. func TestSubversionURIScheme(t *testing.T) {
  557. input := `<p>This link is <a href="svn://example.org">valid</a></p>`
  558. expected := `<p>This link is <a href="svn://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  559. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  560. if expected != output {
  561. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  562. }
  563. input = `<p>This link is <a href="svn+ssh://example.org">valid</a></p>`
  564. expected = `<p>This link is <a href="svn+ssh://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  565. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  566. if expected != output {
  567. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  568. }
  569. }
  570. func TestTelURIScheme(t *testing.T) {
  571. input := `<p>This link is <a href="tel:+1-201-555-0123">valid</a></p>`
  572. expected := `<p>This link is <a href="tel:+1-201-555-0123" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  573. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  574. if expected != output {
  575. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  576. }
  577. }
  578. func TestWebcalURIScheme(t *testing.T) {
  579. input := `<p>This link is <a href="webcal://example.com/calendar.ics">valid</a></p>`
  580. expected := `<p>This link is <a href="webcal://example.com/calendar.ics" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  581. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  582. if expected != output {
  583. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  584. }
  585. }
  586. func TestXMPPURIScheme(t *testing.T) {
  587. input := `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed">valid</a></p>`
  588. expected := `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`
  589. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  590. if expected != output {
  591. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  592. }
  593. }
  594. func TestBlacklistedLink(t *testing.T) {
  595. input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
  596. expected := `<p>This image is not valid </p>`
  597. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  598. if expected != output {
  599. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  600. }
  601. }
  602. func TestLinkWithTrackers(t *testing.T) {
  603. input := `<p>This link has trackers <a href="https://example.com/page?utm_source=newsletter">Test</a></p>`
  604. expected := `<p>This link has trackers <a href="https://example.com/page" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">Test</a></p>`
  605. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  606. if expected != output {
  607. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  608. }
  609. }
  610. func TestImageSrcWithTrackers(t *testing.T) {
  611. input := `<p>This image has trackers <img src="https://example.org/?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123"></p>`
  612. expected := `<p>This image has trackers <img src="https://example.org/?id=123" loading="lazy"></p>`
  613. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  614. if expected != output {
  615. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  616. }
  617. }
  618. func Test1x1PixelTracker(t *testing.T) {
  619. input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
  620. expected := `<p> and </p>`
  621. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  622. if expected != output {
  623. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  624. }
  625. }
  626. func Test0x0PixelTracker(t *testing.T) {
  627. input := `<p><img src="https://tracker1.example.org/" height="0" width="0"> and <img src="https://tracker2.example.org/" height="0" width="0"/></p>`
  628. expected := `<p> and </p>`
  629. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  630. if expected != output {
  631. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  632. }
  633. }
  634. func TestXmlEntities(t *testing.T) {
  635. input := `<pre>echo "test" &gt; /etc/hosts</pre>`
  636. expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
  637. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  638. if expected != output {
  639. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  640. }
  641. }
  642. func TestEspaceAttributes(t *testing.T) {
  643. input := `<td rowspan="<b>test</b>">test</td>`
  644. expected := `<td rowspan="&lt;b&gt;test&lt;/b&gt;">test</td>`
  645. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  646. if expected != output {
  647. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  648. }
  649. }
  650. func TestReplaceYoutubeURL(t *testing.T) {
  651. os.Clearenv()
  652. var err error
  653. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  654. if err != nil {
  655. t.Fatalf(`Parsing failure: %v`, err)
  656. }
  657. input := `<iframe src="http://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  658. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  659. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  660. if expected != output {
  661. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  662. }
  663. }
  664. func TestReplaceSecureYoutubeURL(t *testing.T) {
  665. os.Clearenv()
  666. var err error
  667. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  668. if err != nil {
  669. t.Fatalf(`Parsing failure: %v`, err)
  670. }
  671. input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
  672. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  673. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  674. if expected != output {
  675. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  676. }
  677. }
  678. func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
  679. os.Clearenv()
  680. var err error
  681. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  682. if err != nil {
  683. t.Fatalf(`Parsing failure: %v`, err)
  684. }
  685. input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&amp;controls=0"></iframe>`
  686. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  687. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  688. if expected != output {
  689. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  690. }
  691. }
  692. func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
  693. os.Clearenv()
  694. var err error
  695. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  696. if err != nil {
  697. t.Fatalf(`Parsing failure: %v`, err)
  698. }
  699. input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
  700. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  701. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  702. if expected != output {
  703. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  704. }
  705. }
  706. func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
  707. os.Clearenv()
  708. var err error
  709. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  710. if err != nil {
  711. t.Fatalf(`Parsing failure: %v`, err)
  712. }
  713. input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
  714. expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  715. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  716. if expected != output {
  717. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  718. }
  719. }
  720. func TestReplaceYoutubeURLWithCustomURL(t *testing.T) {
  721. defer os.Clearenv()
  722. os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
  723. var err error
  724. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  725. if err != nil {
  726. t.Fatalf(`Parsing failure: %v`, err)
  727. }
  728. input := `<iframe src="https://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  729. expected := `<iframe src="https://invidious.custom/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" referrerpolicy="strict-origin-when-cross-origin" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  730. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  731. if expected != output {
  732. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  733. }
  734. }
  735. func TestVimeoIframeRewriteWithQueryString(t *testing.T) {
  736. input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>`
  737. expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0&amp;dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  738. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  739. if expected != output {
  740. t.Errorf(`Wrong output: %q != %q`, expected, output)
  741. }
  742. }
  743. func TestVimeoIframeRewriteWithoutQueryString(t *testing.T) {
  744. input := `<iframe src="https://player.vimeo.com/video/123456"></iframe>`
  745. expected := `<iframe src="https://player.vimeo.com/video/123456?dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  746. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  747. if expected != output {
  748. t.Errorf(`Wrong output: %q != %q`, expected, output)
  749. }
  750. }
  751. func TestReplaceNoScript(t *testing.T) {
  752. input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test" loading="lazy"></noscript><p>After paragraph.</p>`
  753. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  754. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  755. if expected != output {
  756. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  757. }
  758. }
  759. func TestReplaceScript(t *testing.T) {
  760. input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
  761. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  762. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  763. if expected != output {
  764. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  765. }
  766. }
  767. func TestReplaceStyle(t *testing.T) {
  768. input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
  769. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  770. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  771. if expected != output {
  772. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  773. }
  774. }
  775. func TestHiddenParagraph(t *testing.T) {
  776. input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>`
  777. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  778. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  779. if expected != output {
  780. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  781. }
  782. }
  783. func TestAttributesAreStripped(t *testing.T) {
  784. input := `<p style="color: red;">Some text.<hr style="color: blue"/>Test.</p>`
  785. expected := `<p>Some text.<hr/>Test.</p>`
  786. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  787. if expected != output {
  788. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  789. }
  790. }
  791. func TestMathML(t *testing.T) {
  792. input := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  793. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  794. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  795. if expected != output {
  796. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  797. }
  798. }
  799. func TestInvalidMathMLXMLNamespace(t *testing.T) {
  800. input := `<math xmlns="http://example.org"><msup><mi>x</mi><mn>2</mn></msup></math>`
  801. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  802. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  803. if expected != output {
  804. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  805. }
  806. }
  807. func TestBlockedResourcesSubstrings(t *testing.T) {
  808. input := `<p>Before paragraph.</p><img src="http://stats.wordpress.com/something.php" alt="Blocked Resource"><p>After paragraph.</p>`
  809. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  810. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  811. if expected != output {
  812. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  813. }
  814. input = `<p>Before paragraph.</p><img src="http://twitter.com/share?text=This+is+google+a+search+engine&url=https%3A%2F%2Fwww.google.com" alt="Blocked Resource"><p>After paragraph.</p>`
  815. expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
  816. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  817. if expected != output {
  818. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  819. }
  820. input = `<p>Before paragraph.</p><img src="http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.google.com%[title]=This+Is%2C+Google+a+search+engine" alt="Blocked Resource"><p>After paragraph.</p>`
  821. expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
  822. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  823. if expected != output {
  824. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  825. }
  826. }