sanitizer_test.go 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
  4. import (
  5. "fmt"
  6. "os"
  7. "strings"
  8. "testing"
  9. "golang.org/x/net/html"
  10. "miniflux.app/v2/internal/config"
  11. )
  12. func sanitizeHTMLWithDefaultOptions(baseURL, rawHTML string) string {
  13. return SanitizeHTML(baseURL, rawHTML, &SanitizerOptions{
  14. OpenLinksInNewTab: true,
  15. })
  16. }
  17. func BenchmarkSanitize(b *testing.B) {
  18. var testCases = map[string][]string{
  19. "miniflux_github.html": {"https://github.com/miniflux/v2", ""},
  20. "miniflux_wikipedia.html": {"https://fr.wikipedia.org/wiki/Miniflux", ""},
  21. }
  22. for filename := range testCases {
  23. data, err := os.ReadFile("testdata/" + filename)
  24. if err != nil {
  25. b.Fatalf(`Unable to read file %q: %v`, filename, err)
  26. }
  27. testCases[filename][1] = string(data)
  28. }
  29. for b.Loop() {
  30. for _, v := range testCases {
  31. sanitizeHTMLWithDefaultOptions(v[0], v[1])
  32. }
  33. }
  34. }
  35. func FuzzSanitizer(f *testing.F) {
  36. f.Fuzz(func(t *testing.T, orig string) {
  37. tok := html.NewTokenizer(strings.NewReader(orig))
  38. i := 0
  39. for tok.Next() != html.ErrorToken {
  40. i++
  41. }
  42. out := sanitizeHTMLWithDefaultOptions("", orig)
  43. tok = html.NewTokenizer(strings.NewReader(out))
  44. j := 0
  45. for tok.Next() != html.ErrorToken {
  46. j++
  47. }
  48. if j > i {
  49. t.Errorf("Got more html tokens in the sanitized html.")
  50. }
  51. })
  52. }
  53. func TestValidInput(t *testing.T) {
  54. input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test" loading="lazy">.</p>`
  55. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  56. if input != output {
  57. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  58. }
  59. }
  60. func TestImgSanitization(t *testing.T) {
  61. baseURL := "http://example.org/"
  62. testCases := []struct {
  63. name string
  64. input string
  65. expected string
  66. }{
  67. {
  68. name: "width-and-height-attributes",
  69. input: `<img src="https://example.org/image.png" width="10" height="20">`,
  70. expected: `<img src="https://example.org/image.png" width="10" height="20" loading="lazy">`,
  71. },
  72. {
  73. name: "invalid-width-and-height-attributes",
  74. input: `<img src="https://example.org/image.png" width="10px" height="20px">`,
  75. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  76. },
  77. {
  78. name: "invalid-width-attribute",
  79. input: `<img src="https://example.org/image.png" width="10px" height="20">`,
  80. expected: `<img src="https://example.org/image.png" height="20" loading="lazy">`,
  81. },
  82. {
  83. name: "empty-width-and-height-attributes",
  84. input: `<img src="https://example.org/image.png" width="" height="">`,
  85. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  86. },
  87. {
  88. name: "invalid-height-attribute",
  89. input: `<img src="https://example.org/image.png" width="10" height="20px">`,
  90. expected: `<img src="https://example.org/image.png" width="10" loading="lazy">`,
  91. },
  92. {
  93. name: "negative-width-attribute",
  94. input: `<img src="https://example.org/image.png" width="-10" height="20">`,
  95. expected: `<img src="https://example.org/image.png" height="20" loading="lazy">`,
  96. },
  97. {
  98. name: "negative-height-attribute",
  99. input: `<img src="https://example.org/image.png" width="10" height="-20">`,
  100. expected: `<img src="https://example.org/image.png" width="10" loading="lazy">`,
  101. },
  102. {
  103. name: "text-data-url",
  104. input: `<img src="data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" alt="Example">`,
  105. expected: ``,
  106. },
  107. {
  108. name: "image-data-url",
  109. input: `<img src="data:image/gif;base64,test" alt="Example">`,
  110. expected: `<img src="data:image/gif;base64,test" alt="Example" loading="lazy">`,
  111. },
  112. {
  113. name: "srcset-attribute",
  114. input: `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`,
  115. expected: `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`,
  116. },
  117. {
  118. name: "srcset-attribute-without-src",
  119. input: `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" alt="Example">`,
  120. expected: `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" alt="Example" loading="lazy">`,
  121. },
  122. {
  123. name: "srcset-attribute-with-blocked-candidate",
  124. input: `<img srcset="https://stats.wordpress.com/tracker.png 1x, /example-640w.jpg 2x" src="/example-640w.jpg" alt="Example">`,
  125. expected: `<img srcset="http://example.org/example-640w.jpg 2x" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`,
  126. },
  127. {
  128. name: "srcset-attribute-all-candidates-invalid",
  129. input: `<img srcset="javascript:alert(1) 1x, data:text/plain;base64,SGVsbG8= 2x" alt="Example">`,
  130. expected: ``,
  131. },
  132. {
  133. name: "fetchpriority-high",
  134. input: `<img src="https://example.org/image.png" fetchpriority="high">`,
  135. expected: `<img src="https://example.org/image.png" fetchpriority="high" loading="lazy">`,
  136. },
  137. {
  138. name: "fetchpriority-low",
  139. input: `<img src="https://example.org/image.png" fetchpriority="low">`,
  140. expected: `<img src="https://example.org/image.png" fetchpriority="low" loading="lazy">`,
  141. },
  142. {
  143. name: "fetchpriority-auto",
  144. input: `<img src="https://example.org/image.png" fetchpriority="auto">`,
  145. expected: `<img src="https://example.org/image.png" fetchpriority="auto" loading="lazy">`,
  146. },
  147. {
  148. name: "fetchpriority-invalid",
  149. input: `<img src="https://example.org/image.png" fetchpriority="invalid">`,
  150. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  151. },
  152. {
  153. name: "decoding-sync",
  154. input: `<img src="https://example.org/image.png" decoding="sync">`,
  155. expected: `<img src="https://example.org/image.png" decoding="sync" loading="lazy">`,
  156. },
  157. {
  158. name: "decoding-async",
  159. input: `<img src="https://example.org/image.png" decoding="async">`,
  160. expected: `<img src="https://example.org/image.png" decoding="async" loading="lazy">`,
  161. },
  162. {
  163. name: "decoding-auto",
  164. input: `<img src="https://example.org/image.png" decoding="auto">`,
  165. expected: `<img src="https://example.org/image.png" decoding="auto" loading="lazy">`,
  166. },
  167. {
  168. name: "decoding-invalid",
  169. input: `<img src="https://example.org/image.png" decoding="invalid">`,
  170. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  171. },
  172. }
  173. for _, tc := range testCases {
  174. t.Run(tc.name, func(t *testing.T) {
  175. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  176. if output != tc.expected {
  177. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  178. }
  179. })
  180. }
  181. }
  182. func TestNonImgWithFetchPriorityAttribute(t *testing.T) {
  183. input := `<p fetchpriority="high">Text</p>`
  184. expected := `<p>Text</p>`
  185. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  186. if output != expected {
  187. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  188. }
  189. }
  190. func TestNonImgWithDecodingAttribute(t *testing.T) {
  191. input := `<p decoding="async">Text</p>`
  192. expected := `<p>Text</p>`
  193. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  194. if output != expected {
  195. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  196. }
  197. }
  198. func TestMediumImgWithSrcset(t *testing.T) {
  199. input := `<img alt="Image for post" class="t u v ef aj" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407">`
  200. expected := `<img alt="Image for post" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407" loading="lazy">`
  201. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  202. if output != expected {
  203. t.Errorf(`Wrong output: %s`, output)
  204. }
  205. }
  206. func TestSelfClosingTags(t *testing.T) {
  207. baseURL := "http://example.org/"
  208. testCases := []struct {
  209. name string
  210. input string
  211. expected string
  212. }{
  213. {
  214. name: "br",
  215. input: `<p>Line<br>Break</p>`,
  216. expected: `<p>Line<br>Break</p>`,
  217. },
  218. {
  219. name: "hr",
  220. input: `<p>Before</p><hr><p>After</p>`,
  221. expected: `<p>Before</p><hr><p>After</p>`,
  222. },
  223. {
  224. name: "img",
  225. input: `<p>Image <img src="http://example.org/image.png" alt="Test"></p>`,
  226. expected: `<p>Image <img src="http://example.org/image.png" alt="Test" loading="lazy"></p>`,
  227. },
  228. {
  229. name: "source",
  230. input: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
  231. expected: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
  232. },
  233. {
  234. name: "wbr",
  235. input: `<p>soft<wbr>break</p>`,
  236. expected: `<p>soft<wbr>break</p>`,
  237. },
  238. }
  239. for _, tc := range testCases {
  240. t.Run(tc.name, func(t *testing.T) {
  241. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  242. if output != tc.expected {
  243. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  244. }
  245. })
  246. }
  247. }
  248. func TestTable(t *testing.T) {
  249. input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
  250. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  251. if input != output {
  252. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  253. }
  254. }
  255. func TestRelativeURL(t *testing.T) {
  256. input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png">`
  257. expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a> and this image: <img src="http://example.org/folder/image.png" loading="lazy">`
  258. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  259. if expected != output {
  260. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  261. }
  262. }
  263. func TestProtocolRelativeURL(t *testing.T) {
  264. input := `This <a href="//static.example.org/index.html">link is relative</a>.`
  265. expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a>.`
  266. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  267. if expected != output {
  268. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  269. }
  270. }
  271. func TestInvalidTag(t *testing.T) {
  272. input := `<p>My invalid <z>tag</z>.</p>`
  273. expected := `<p>My invalid tag.</p>`
  274. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  275. if expected != output {
  276. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  277. }
  278. }
  279. func TestSourceSanitization(t *testing.T) {
  280. baseURL := "http://example.org/"
  281. testCases := []struct {
  282. name string
  283. input string
  284. expected string
  285. }{
  286. {
  287. name: "srcset-and-media",
  288. input: `<picture><source media="(min-width: 800px)" srcset="elva-800w.jpg"></picture>`,
  289. expected: `<picture><source media="(min-width: 800px)" srcset="http://example.org/elva-800w.jpg"></picture>`,
  290. },
  291. {
  292. name: "src-attribute",
  293. input: `<picture><source src="video.mp4" type="video/mp4"></picture>`,
  294. expected: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
  295. },
  296. {
  297. name: "srcset-with-blocked-candidate",
  298. input: `<picture><source srcset="https://stats.wordpress.com/tracker.png 1x, /elva-800w.jpg 2x"></picture>`,
  299. expected: `<picture><source srcset="http://example.org/elva-800w.jpg 2x"></picture>`,
  300. },
  301. {
  302. name: "srcset-all-invalid",
  303. input: `<picture><source srcset="javascript:alert(1) 1x, data:text/plain;base64,SGVsbG8= 2x"></picture>`,
  304. expected: `<picture></picture>`,
  305. },
  306. }
  307. for _, tc := range testCases {
  308. t.Run(tc.name, func(t *testing.T) {
  309. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  310. if output != tc.expected {
  311. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  312. }
  313. })
  314. }
  315. }
  316. func TestVideoTag(t *testing.T) {
  317. input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
  318. expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
  319. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  320. if expected != output {
  321. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  322. }
  323. }
  324. func TestAudioAndSourceTag(t *testing.T) {
  325. input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
  326. expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
  327. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  328. if expected != output {
  329. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  330. }
  331. }
  332. func TestUnknownTag(t *testing.T) {
  333. input := `<p>My invalid <unknown>tag</unknown>.</p>`
  334. expected := `<p>My invalid tag.</p>`
  335. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  336. if expected != output {
  337. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  338. }
  339. }
  340. func TestInvalidNestedTag(t *testing.T) {
  341. input := `<p>My invalid <z>tag with some <em>valid</em> tag</z>.</p>`
  342. expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
  343. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  344. if expected != output {
  345. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  346. }
  347. }
  348. func TestInvalidIFrame(t *testing.T) {
  349. config.Opts = config.NewConfigOptions()
  350. input := `<iframe src="http://example.org/"></iframe>`
  351. expected := ``
  352. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  353. if expected != output {
  354. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  355. }
  356. }
  357. func TestBlockedIFrameWithChildElements(t *testing.T) {
  358. config.Opts = config.NewConfigOptions()
  359. input := `<iframe src="http://example.org/"><p>test</p></iframe>`
  360. expected := ``
  361. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  362. if expected != output {
  363. t.Errorf(`Wrong output: %q != %q`, expected, output)
  364. }
  365. }
  366. func TestSameDomainIFrame(t *testing.T) {
  367. config.Opts = config.NewConfigOptions()
  368. input := `<iframe src="http://example.com/test"></iframe>`
  369. expected := ``
  370. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  371. if expected != output {
  372. t.Errorf(`Wrong output: %q != %q`, expected, output)
  373. }
  374. }
  375. func TestInvidiousIFrame(t *testing.T) {
  376. config.Opts = config.NewConfigOptions()
  377. input := `<iframe src="https://yewtu.be/watch?v=video_id"></iframe>`
  378. expected := `<iframe src="https://yewtu.be/watch?v=video_id" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  379. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  380. if expected != output {
  381. t.Errorf(`Wrong output: %q != %q`, expected, output)
  382. }
  383. }
  384. func TestIFrameAllowList(t *testing.T) {
  385. config.Opts = config.NewConfigOptions()
  386. allowedDomains := []string{
  387. "bandcamp.com",
  388. "cdn.embedly.com",
  389. "dailymotion.com",
  390. "framatube.org",
  391. "open.spotify.com",
  392. "player.bilibili.com",
  393. "player.twitch.tv",
  394. "player.vimeo.com",
  395. "soundcloud.com",
  396. "vk.com",
  397. "w.soundcloud.com",
  398. "youtube-nocookie.com",
  399. "youtube.com",
  400. }
  401. for _, domain := range allowedDomains {
  402. t.Run(domain, func(t *testing.T) {
  403. input := fmt.Sprintf(`<iframe src="https://%s/video/test"></iframe>`, domain)
  404. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  405. if !strings.Contains(output, "<iframe") {
  406. t.Errorf(`iframe from %q should be allowed, got: %q`, domain, output)
  407. }
  408. })
  409. }
  410. }
  411. func TestCustomYoutubeEmbedURL(t *testing.T) {
  412. os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://www.invidious.custom/embed/")
  413. defer os.Clearenv()
  414. var err error
  415. if config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables(); err != nil {
  416. t.Fatalf(`Parsing failure: %v`, err)
  417. }
  418. input := `<iframe src="https://www.invidious.custom/embed/1234"></iframe>`
  419. expected := `<iframe src="https://www.invidious.custom/embed/1234" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  420. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  421. if expected != output {
  422. t.Errorf(`Wrong output: %q != %q`, expected, output)
  423. }
  424. }
  425. func TestIFrameWithChildElements(t *testing.T) {
  426. config.Opts = config.NewConfigOptions()
  427. input := `<iframe src="https://www.youtube.com/"><p>test</p></iframe>`
  428. expected := `<iframe src="https://www.youtube.com/" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  429. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  430. if expected != output {
  431. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  432. }
  433. }
  434. func TestIFrameWithReferrerPolicy(t *testing.T) {
  435. config.Opts = config.NewConfigOptions()
  436. input := `<iframe src="https://www.youtube.com/embed/test123" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  437. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  438. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  439. if expected != output {
  440. t.Errorf(`Wrong output: %q != %q`, expected, output)
  441. }
  442. }
  443. func TestLinkWithTarget(t *testing.T) {
  444. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  445. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">an anchor</a></p>`
  446. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: true})
  447. if expected != output {
  448. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  449. }
  450. }
  451. func TestLinkWithNoTarget(t *testing.T) {
  452. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  453. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer">an anchor</a></p>`
  454. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: false})
  455. if expected != output {
  456. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  457. }
  458. }
  459. func TestAnchorLink(t *testing.T) {
  460. input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  461. expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  462. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  463. if expected != output {
  464. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  465. }
  466. }
  467. func TestInvalidURLScheme(t *testing.T) {
  468. input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
  469. expected := `<p>This link is not valid</p>`
  470. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  471. if expected != output {
  472. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  473. }
  474. }
  475. func TestURISchemes(t *testing.T) {
  476. baseURL := "http://example.org/"
  477. testCases := []struct {
  478. name string
  479. input string
  480. expected string
  481. }{
  482. {
  483. name: "apt",
  484. input: `<p>This link is <a href="apt:some-package?channel=test">valid</a></p>`,
  485. expected: `<p>This link is <a href="apt:some-package?channel=test" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  486. },
  487. {
  488. name: "bitcoin",
  489. input: `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W">valid</a></p>`,
  490. expected: `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  491. },
  492. {
  493. name: "callto",
  494. input: `<p>This link is <a href="callto:12345679">valid</a></p>`,
  495. expected: `<p>This link is <a href="callto:12345679" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  496. },
  497. {
  498. name: "feed-double-slash",
  499. input: `<p>This link is <a href="feed://example.com/rss.xml">valid</a></p>`,
  500. expected: `<p>This link is <a href="feed://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  501. },
  502. {
  503. name: "feed-https",
  504. input: `<p>This link is <a href="feed:https://example.com/rss.xml">valid</a></p>`,
  505. expected: `<p>This link is <a href="feed:https://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  506. },
  507. {
  508. name: "geo",
  509. input: `<p>This link is <a href="geo:13.4125,103.8667">valid</a></p>`,
  510. expected: `<p>This link is <a href="geo:13.4125,103.8667" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  511. },
  512. {
  513. name: "itms",
  514. input: `<p>This link is <a href="itms://itunes.com/apps/my-app-name">valid</a></p>`,
  515. expected: `<p>This link is <a href="itms://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  516. },
  517. {
  518. name: "itms-apps",
  519. input: `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name">valid</a></p>`,
  520. expected: `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  521. },
  522. {
  523. name: "magnet",
  524. input: `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7">valid</a></p>`,
  525. expected: `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  526. },
  527. {
  528. name: "mailto",
  529. input: `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A">valid</a></p>`,
  530. expected: `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  531. },
  532. {
  533. name: "news-double-slash",
  534. input: `<p>This link is <a href="news://news.server.example/*">valid</a></p>`,
  535. expected: `<p>This link is <a href="news://news.server.example/*" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  536. },
  537. {
  538. name: "news-single-colon",
  539. input: `<p>This link is <a href="news:example.group.this">valid</a></p>`,
  540. expected: `<p>This link is <a href="news:example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  541. },
  542. {
  543. name: "nntp",
  544. input: `<p>This link is <a href="nntp://news.server.example/example.group.this">valid</a></p>`,
  545. expected: `<p>This link is <a href="nntp://news.server.example/example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  546. },
  547. {
  548. name: "rtmp",
  549. input: `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov">valid</a></p>`,
  550. expected: `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  551. },
  552. {
  553. name: "sip",
  554. input: `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone">valid</a></p>`,
  555. expected: `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  556. },
  557. {
  558. name: "sips",
  559. input: `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent">valid</a></p>`,
  560. expected: `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  561. },
  562. {
  563. name: "skype",
  564. input: `<p>This link is <a href="skype:echo123?call">valid</a></p>`,
  565. expected: `<p>This link is <a href="skype:echo123?call" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  566. },
  567. {
  568. name: "spotify",
  569. input: `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx">valid</a></p>`,
  570. expected: `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  571. },
  572. {
  573. name: "steam",
  574. input: `<p>This link is <a href="steam://settings/account">valid</a></p>`,
  575. expected: `<p>This link is <a href="steam://settings/account" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  576. },
  577. {
  578. name: "svn",
  579. input: `<p>This link is <a href="svn://example.org">valid</a></p>`,
  580. expected: `<p>This link is <a href="svn://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  581. },
  582. {
  583. name: "svn-ssh",
  584. input: `<p>This link is <a href="svn+ssh://example.org">valid</a></p>`,
  585. expected: `<p>This link is <a href="svn+ssh://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  586. },
  587. {
  588. name: "tel",
  589. input: `<p>This link is <a href="tel:+1-201-555-0123">valid</a></p>`,
  590. expected: `<p>This link is <a href="tel:+1-201-555-0123" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  591. },
  592. {
  593. name: "webcal",
  594. input: `<p>This link is <a href="webcal://example.com/calendar.ics">valid</a></p>`,
  595. expected: `<p>This link is <a href="webcal://example.com/calendar.ics" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  596. },
  597. {
  598. name: "xmpp",
  599. input: `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed">valid</a></p>`,
  600. expected: `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  601. },
  602. }
  603. for _, tc := range testCases {
  604. t.Run(tc.name, func(t *testing.T) {
  605. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  606. if tc.expected != output {
  607. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  608. }
  609. })
  610. }
  611. }
  612. func TestBlacklistedLink(t *testing.T) {
  613. input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
  614. expected := `<p>This image is not valid </p>`
  615. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  616. if expected != output {
  617. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  618. }
  619. }
  620. func TestLinkWithTrackers(t *testing.T) {
  621. input := `<p>This link has trackers <a href="https://example.com/page?utm_source=newsletter">Test</a></p>`
  622. expected := `<p>This link has trackers <a href="https://example.com/page" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">Test</a></p>`
  623. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  624. if expected != output {
  625. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  626. }
  627. }
  628. func TestImageSrcWithTrackers(t *testing.T) {
  629. input := `<p>This image has trackers <img src="https://example.org/?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123"></p>`
  630. expected := `<p>This image has trackers <img src="https://example.org/?id=123" loading="lazy"></p>`
  631. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  632. if expected != output {
  633. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  634. }
  635. }
  636. func Test1x1PixelTracker(t *testing.T) {
  637. input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
  638. expected := `<p> and </p>`
  639. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  640. if expected != output {
  641. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  642. }
  643. }
  644. func Test0x0PixelTracker(t *testing.T) {
  645. input := `<p><img src="https://tracker1.example.org/" height="0" width="0"> and <img src="https://tracker2.example.org/" height="0" width="0"/></p>`
  646. expected := `<p> and </p>`
  647. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  648. if expected != output {
  649. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  650. }
  651. }
  652. func TestXmlEntities(t *testing.T) {
  653. input := `<pre>echo "test" &gt; /etc/hosts</pre>`
  654. expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
  655. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  656. if expected != output {
  657. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  658. }
  659. }
  660. func TestEspaceAttributes(t *testing.T) {
  661. input := `<td rowspan="<b>injection</b>">text</td>`
  662. expected := `text`
  663. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  664. if expected != output {
  665. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  666. }
  667. }
  668. func TestReplaceYoutubeURL(t *testing.T) {
  669. os.Clearenv()
  670. var err error
  671. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  672. if err != nil {
  673. t.Fatalf(`Parsing failure: %v`, err)
  674. }
  675. input := `<iframe src="http://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  676. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  677. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  678. if expected != output {
  679. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  680. }
  681. }
  682. func TestReplaceSecureYoutubeURL(t *testing.T) {
  683. os.Clearenv()
  684. var err error
  685. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  686. if err != nil {
  687. t.Fatalf(`Parsing failure: %v`, err)
  688. }
  689. input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
  690. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  691. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  692. if expected != output {
  693. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  694. }
  695. }
  696. func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
  697. os.Clearenv()
  698. var err error
  699. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  700. if err != nil {
  701. t.Fatalf(`Parsing failure: %v`, err)
  702. }
  703. input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&amp;controls=0"></iframe>`
  704. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  705. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  706. if expected != output {
  707. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  708. }
  709. }
  710. func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
  711. os.Clearenv()
  712. var err error
  713. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  714. if err != nil {
  715. t.Fatalf(`Parsing failure: %v`, err)
  716. }
  717. input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
  718. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  719. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  720. if expected != output {
  721. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  722. }
  723. }
  724. func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
  725. os.Clearenv()
  726. var err error
  727. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  728. if err != nil {
  729. t.Fatalf(`Parsing failure: %v`, err)
  730. }
  731. input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
  732. expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  733. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  734. if expected != output {
  735. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  736. }
  737. }
  738. func TestReplaceYoutubeURLWithCustomURL(t *testing.T) {
  739. defer os.Clearenv()
  740. os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
  741. var err error
  742. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  743. if err != nil {
  744. t.Fatalf(`Parsing failure: %v`, err)
  745. }
  746. input := `<iframe src="https://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  747. expected := `<iframe src="https://invidious.custom/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  748. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  749. if expected != output {
  750. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  751. }
  752. }
  753. func TestVimeoIframeRewriteWithQueryString(t *testing.T) {
  754. input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>`
  755. expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0&amp;dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  756. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  757. if expected != output {
  758. t.Errorf(`Wrong output: %q != %q`, expected, output)
  759. }
  760. }
  761. func TestVimeoIframeRewriteWithoutQueryString(t *testing.T) {
  762. input := `<iframe src="https://player.vimeo.com/video/123456"></iframe>`
  763. expected := `<iframe src="https://player.vimeo.com/video/123456?dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  764. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  765. if expected != output {
  766. t.Errorf(`Wrong output: %q != %q`, expected, output)
  767. }
  768. }
  769. func TestReplaceNoScript(t *testing.T) {
  770. input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test" loading="lazy"></noscript><p>After paragraph.</p>`
  771. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  772. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  773. if expected != output {
  774. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  775. }
  776. }
  777. func TestReplaceScript(t *testing.T) {
  778. input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
  779. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  780. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  781. if expected != output {
  782. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  783. }
  784. }
  785. func TestReplaceStyle(t *testing.T) {
  786. input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
  787. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  788. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  789. if expected != output {
  790. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  791. }
  792. }
  793. func TestHiddenParagraph(t *testing.T) {
  794. input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>`
  795. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  796. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  797. if expected != output {
  798. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  799. }
  800. }
  801. func TestAttributesAreStripped(t *testing.T) {
  802. input := `<p style="color: red;">Some text.<hr style="color: blue"/>Test.</p>`
  803. expected := `<p>Some text.</p><hr>Test.<p></p>`
  804. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  805. if expected != output {
  806. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  807. }
  808. }
  809. func TestMathML(t *testing.T) {
  810. input := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  811. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  812. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  813. if expected != output {
  814. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  815. }
  816. }
  817. func TestInvalidMathMLXMLNamespace(t *testing.T) {
  818. input := `<math xmlns="http://example.org"><msup><mi>x</mi><mn>2</mn></msup></math>`
  819. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  820. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  821. if expected != output {
  822. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  823. }
  824. }
  825. func TestBlockedResourcesSubstrings(t *testing.T) {
  826. input := `<p>Before paragraph.</p><img src="http://stats.wordpress.com/something.php" alt="Blocked Resource"><p>After paragraph.</p>`
  827. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  828. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  829. if expected != output {
  830. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  831. }
  832. input = `<p>Before paragraph.</p><img src="http://twitter.com/share?text=This+is+google+a+search+engine&url=https%3A%2F%2Fwww.google.com" alt="Blocked Resource"><p>After paragraph.</p>`
  833. expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
  834. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  835. if expected != output {
  836. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  837. }
  838. input = `<p>Before paragraph.</p><img src="http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.google.com%[title]=This+Is%2C+Google+a+search+engine" alt="Blocked Resource"><p>After paragraph.</p>`
  839. expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
  840. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  841. if expected != output {
  842. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  843. }
  844. }
  845. func TestAttrLowerCase(t *testing.T) {
  846. baseURL := "http://example.org/"
  847. testCases := []struct {
  848. name string
  849. input string
  850. expected string
  851. }{
  852. {
  853. name: "href-and-hidden-mixed-case",
  854. input: `<a HrEF="http://example.com" HIddEN>test</a>`,
  855. expected: ``,
  856. },
  857. {
  858. name: "href-mixed-case",
  859. input: `<a HrEF="http://example.com">test</a>`,
  860. expected: `<a href="http://example.com" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">test</a>`,
  861. },
  862. }
  863. for _, tc := range testCases {
  864. t.Run(tc.name, func(t *testing.T) {
  865. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  866. if tc.expected != output {
  867. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  868. }
  869. })
  870. }
  871. }
  872. func TestDeeplyNestedpage(t *testing.T) {
  873. input := "test"
  874. // -3 instead of -1 because <html><body> is automatically added.
  875. for range maxDepth - 3 {
  876. input = "<div>" + input + "</div>"
  877. }
  878. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  879. want := "test"
  880. if output != want {
  881. t.Errorf(`Wrong output: "%s" != "%s"`, want, output)
  882. }
  883. input = "test"
  884. for range maxDepth - 2 {
  885. input = "<div>" + input + "</div>"
  886. }
  887. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  888. if output != "" {
  889. t.Errorf(`Wrong output: "%s" != "%s"`, "", output)
  890. }
  891. }