sanitizer_test.go 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
  4. import (
  5. "os"
  6. "strings"
  7. "testing"
  8. "golang.org/x/net/html"
  9. "miniflux.app/v2/internal/config"
  10. )
  11. func sanitizeHTMLWithDefaultOptions(baseURL, rawHTML string) string {
  12. return SanitizeHTML(baseURL, rawHTML, &SanitizerOptions{
  13. OpenLinksInNewTab: true,
  14. })
  15. }
  16. func BenchmarkSanitize(b *testing.B) {
  17. var testCases = map[string][]string{
  18. "miniflux_github.html": {"https://github.com/miniflux/v2", ""},
  19. "miniflux_wikipedia.html": {"https://fr.wikipedia.org/wiki/Miniflux", ""},
  20. }
  21. for filename := range testCases {
  22. data, err := os.ReadFile("testdata/" + filename)
  23. if err != nil {
  24. b.Fatalf(`Unable to read file %q: %v`, filename, err)
  25. }
  26. testCases[filename][1] = string(data)
  27. }
  28. for b.Loop() {
  29. for _, v := range testCases {
  30. sanitizeHTMLWithDefaultOptions(v[0], v[1])
  31. }
  32. }
  33. }
  34. func FuzzSanitizer(f *testing.F) {
  35. f.Fuzz(func(t *testing.T, orig string) {
  36. tok := html.NewTokenizer(strings.NewReader(orig))
  37. i := 0
  38. for tok.Next() != html.ErrorToken {
  39. i++
  40. }
  41. out := sanitizeHTMLWithDefaultOptions("", orig)
  42. tok = html.NewTokenizer(strings.NewReader(out))
  43. j := 0
  44. for tok.Next() != html.ErrorToken {
  45. j++
  46. }
  47. if j > i {
  48. t.Errorf("Got more html tokens in the sanitized html.")
  49. }
  50. })
  51. }
  52. func TestValidInput(t *testing.T) {
  53. input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test" loading="lazy">.</p>`
  54. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  55. if input != output {
  56. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  57. }
  58. }
  59. func TestImgSanitization(t *testing.T) {
  60. baseURL := "http://example.org/"
  61. testCases := []struct {
  62. name string
  63. input string
  64. expected string
  65. }{
  66. {
  67. name: "width-and-height-attributes",
  68. input: `<img src="https://example.org/image.png" width="10" height="20">`,
  69. expected: `<img src="https://example.org/image.png" width="10" height="20" loading="lazy">`,
  70. },
  71. {
  72. name: "invalid-width-and-height-attributes",
  73. input: `<img src="https://example.org/image.png" width="10px" height="20px">`,
  74. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  75. },
  76. {
  77. name: "invalid-width-attribute",
  78. input: `<img src="https://example.org/image.png" width="10px" height="20">`,
  79. expected: `<img src="https://example.org/image.png" height="20" loading="lazy">`,
  80. },
  81. {
  82. name: "empty-width-and-height-attributes",
  83. input: `<img src="https://example.org/image.png" width="" height="">`,
  84. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  85. },
  86. {
  87. name: "invalid-height-attribute",
  88. input: `<img src="https://example.org/image.png" width="10" height="20px">`,
  89. expected: `<img src="https://example.org/image.png" width="10" loading="lazy">`,
  90. },
  91. {
  92. name: "negative-width-attribute",
  93. input: `<img src="https://example.org/image.png" width="-10" height="20">`,
  94. expected: `<img src="https://example.org/image.png" height="20" loading="lazy">`,
  95. },
  96. {
  97. name: "negative-height-attribute",
  98. input: `<img src="https://example.org/image.png" width="10" height="-20">`,
  99. expected: `<img src="https://example.org/image.png" width="10" loading="lazy">`,
  100. },
  101. {
  102. name: "text-data-url",
  103. input: `<img src="data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" alt="Example">`,
  104. expected: ``,
  105. },
  106. {
  107. name: "image-data-url",
  108. input: `<img src="data:image/gif;base64,test" alt="Example">`,
  109. expected: `<img src="data:image/gif;base64,test" alt="Example" loading="lazy">`,
  110. },
  111. {
  112. name: "srcset-attribute",
  113. input: `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`,
  114. expected: `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`,
  115. },
  116. {
  117. name: "srcset-attribute-without-src",
  118. input: `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" alt="Example">`,
  119. expected: `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" alt="Example" loading="lazy">`,
  120. },
  121. {
  122. name: "srcset-attribute-with-blocked-candidate",
  123. input: `<img srcset="https://stats.wordpress.com/tracker.png 1x, /example-640w.jpg 2x" src="/example-640w.jpg" alt="Example">`,
  124. expected: `<img srcset="http://example.org/example-640w.jpg 2x" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`,
  125. },
  126. {
  127. name: "srcset-attribute-all-candidates-invalid",
  128. input: `<img srcset="javascript:alert(1) 1x, data:text/plain;base64,SGVsbG8= 2x" alt="Example">`,
  129. expected: ``,
  130. },
  131. {
  132. name: "fetchpriority-high",
  133. input: `<img src="https://example.org/image.png" fetchpriority="high">`,
  134. expected: `<img src="https://example.org/image.png" fetchpriority="high" loading="lazy">`,
  135. },
  136. {
  137. name: "fetchpriority-low",
  138. input: `<img src="https://example.org/image.png" fetchpriority="low">`,
  139. expected: `<img src="https://example.org/image.png" fetchpriority="low" loading="lazy">`,
  140. },
  141. {
  142. name: "fetchpriority-auto",
  143. input: `<img src="https://example.org/image.png" fetchpriority="auto">`,
  144. expected: `<img src="https://example.org/image.png" fetchpriority="auto" loading="lazy">`,
  145. },
  146. {
  147. name: "fetchpriority-invalid",
  148. input: `<img src="https://example.org/image.png" fetchpriority="invalid">`,
  149. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  150. },
  151. {
  152. name: "decoding-sync",
  153. input: `<img src="https://example.org/image.png" decoding="sync">`,
  154. expected: `<img src="https://example.org/image.png" decoding="sync" loading="lazy">`,
  155. },
  156. {
  157. name: "decoding-async",
  158. input: `<img src="https://example.org/image.png" decoding="async">`,
  159. expected: `<img src="https://example.org/image.png" decoding="async" loading="lazy">`,
  160. },
  161. {
  162. name: "decoding-auto",
  163. input: `<img src="https://example.org/image.png" decoding="auto">`,
  164. expected: `<img src="https://example.org/image.png" decoding="auto" loading="lazy">`,
  165. },
  166. {
  167. name: "decoding-invalid",
  168. input: `<img src="https://example.org/image.png" decoding="invalid">`,
  169. expected: `<img src="https://example.org/image.png" loading="lazy">`,
  170. },
  171. }
  172. for _, tc := range testCases {
  173. t.Run(tc.name, func(t *testing.T) {
  174. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  175. if output != tc.expected {
  176. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  177. }
  178. })
  179. }
  180. }
  181. func TestNonImgWithFetchPriorityAttribute(t *testing.T) {
  182. input := `<p fetchpriority="high">Text</p>`
  183. expected := `<p>Text</p>`
  184. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  185. if output != expected {
  186. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  187. }
  188. }
  189. func TestNonImgWithDecodingAttribute(t *testing.T) {
  190. input := `<p decoding="async">Text</p>`
  191. expected := `<p>Text</p>`
  192. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  193. if output != expected {
  194. t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
  195. }
  196. }
  197. func TestMediumImgWithSrcset(t *testing.T) {
  198. input := `<img alt="Image for post" class="t u v ef aj" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407">`
  199. expected := `<img alt="Image for post" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407" loading="lazy">`
  200. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  201. if output != expected {
  202. t.Errorf(`Wrong output: %s`, output)
  203. }
  204. }
  205. func TestSelfClosingTags(t *testing.T) {
  206. baseURL := "http://example.org/"
  207. testCases := []struct {
  208. name string
  209. input string
  210. expected string
  211. }{
  212. {
  213. name: "br",
  214. input: `<p>Line<br>Break</p>`,
  215. expected: `<p>Line<br>Break</p>`,
  216. },
  217. {
  218. name: "hr",
  219. input: `<p>Before</p><hr><p>After</p>`,
  220. expected: `<p>Before</p><hr><p>After</p>`,
  221. },
  222. {
  223. name: "img",
  224. input: `<p>Image <img src="http://example.org/image.png" alt="Test"></p>`,
  225. expected: `<p>Image <img src="http://example.org/image.png" alt="Test" loading="lazy"></p>`,
  226. },
  227. {
  228. name: "source",
  229. input: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
  230. expected: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
  231. },
  232. {
  233. name: "wbr",
  234. input: `<p>soft<wbr>break</p>`,
  235. expected: `<p>soft<wbr>break</p>`,
  236. },
  237. }
  238. for _, tc := range testCases {
  239. t.Run(tc.name, func(t *testing.T) {
  240. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  241. if output != tc.expected {
  242. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  243. }
  244. })
  245. }
  246. }
  247. func TestTable(t *testing.T) {
  248. input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
  249. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  250. if input != output {
  251. t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
  252. }
  253. }
  254. func TestRelativeURL(t *testing.T) {
  255. input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png">`
  256. expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a> and this image: <img src="http://example.org/folder/image.png" loading="lazy">`
  257. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  258. if expected != output {
  259. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  260. }
  261. }
  262. func TestProtocolRelativeURL(t *testing.T) {
  263. input := `This <a href="//static.example.org/index.html">link is relative</a>.`
  264. expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a>.`
  265. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  266. if expected != output {
  267. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  268. }
  269. }
  270. func TestInvalidTag(t *testing.T) {
  271. input := `<p>My invalid <z>tag</z>.</p>`
  272. expected := `<p>My invalid tag.</p>`
  273. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  274. if expected != output {
  275. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  276. }
  277. }
  278. func TestSourceSanitization(t *testing.T) {
  279. baseURL := "http://example.org/"
  280. testCases := []struct {
  281. name string
  282. input string
  283. expected string
  284. }{
  285. {
  286. name: "srcset-and-media",
  287. input: `<picture><source media="(min-width: 800px)" srcset="elva-800w.jpg"></picture>`,
  288. expected: `<picture><source media="(min-width: 800px)" srcset="http://example.org/elva-800w.jpg"></picture>`,
  289. },
  290. {
  291. name: "src-attribute",
  292. input: `<picture><source src="video.mp4" type="video/mp4"></picture>`,
  293. expected: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
  294. },
  295. {
  296. name: "srcset-with-blocked-candidate",
  297. input: `<picture><source srcset="https://stats.wordpress.com/tracker.png 1x, /elva-800w.jpg 2x"></picture>`,
  298. expected: `<picture><source srcset="http://example.org/elva-800w.jpg 2x"></picture>`,
  299. },
  300. {
  301. name: "srcset-all-invalid",
  302. input: `<picture><source srcset="javascript:alert(1) 1x, data:text/plain;base64,SGVsbG8= 2x"></picture>`,
  303. expected: `<picture></picture>`,
  304. },
  305. }
  306. for _, tc := range testCases {
  307. t.Run(tc.name, func(t *testing.T) {
  308. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  309. if output != tc.expected {
  310. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  311. }
  312. })
  313. }
  314. }
  315. func TestVideoTag(t *testing.T) {
  316. input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
  317. expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
  318. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  319. if expected != output {
  320. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  321. }
  322. }
  323. func TestAudioAndSourceTag(t *testing.T) {
  324. input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
  325. expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
  326. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  327. if expected != output {
  328. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  329. }
  330. }
  331. func TestUnknownTag(t *testing.T) {
  332. input := `<p>My invalid <unknown>tag</unknown>.</p>`
  333. expected := `<p>My invalid tag.</p>`
  334. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  335. if expected != output {
  336. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  337. }
  338. }
  339. func TestInvalidNestedTag(t *testing.T) {
  340. input := `<p>My invalid <z>tag with some <em>valid</em> tag</z>.</p>`
  341. expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
  342. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  343. if expected != output {
  344. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  345. }
  346. }
  347. func TestInvalidIFrame(t *testing.T) {
  348. config.Opts = config.NewConfigOptions()
  349. input := `<iframe src="http://example.org/"></iframe>`
  350. expected := ``
  351. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  352. if expected != output {
  353. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  354. }
  355. }
  356. func TestSameDomainIFrame(t *testing.T) {
  357. config.Opts = config.NewConfigOptions()
  358. input := `<iframe src="http://example.com/test"></iframe>`
  359. expected := ``
  360. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  361. if expected != output {
  362. t.Errorf(`Wrong output: %q != %q`, expected, output)
  363. }
  364. }
  365. func TestInvidiousIFrame(t *testing.T) {
  366. config.Opts = config.NewConfigOptions()
  367. input := `<iframe src="https://yewtu.be/watch?v=video_id"></iframe>`
  368. expected := `<iframe src="https://yewtu.be/watch?v=video_id" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  369. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  370. if expected != output {
  371. t.Errorf(`Wrong output: %q != %q`, expected, output)
  372. }
  373. }
  374. func TestCustomYoutubeEmbedURL(t *testing.T) {
  375. os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://www.invidious.custom/embed/")
  376. defer os.Clearenv()
  377. var err error
  378. if config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables(); err != nil {
  379. t.Fatalf(`Parsing failure: %v`, err)
  380. }
  381. input := `<iframe src="https://www.invidious.custom/embed/1234"></iframe>`
  382. expected := `<iframe src="https://www.invidious.custom/embed/1234" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  383. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  384. if expected != output {
  385. t.Errorf(`Wrong output: %q != %q`, expected, output)
  386. }
  387. }
  388. func TestIFrameWithChildElements(t *testing.T) {
  389. config.Opts = config.NewConfigOptions()
  390. input := `<iframe src="https://www.youtube.com/"><p>test</p></iframe>`
  391. expected := `<iframe src="https://www.youtube.com/" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  392. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  393. if expected != output {
  394. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  395. }
  396. }
  397. func TestIFrameWithReferrerPolicy(t *testing.T) {
  398. config.Opts = config.NewConfigOptions()
  399. input := `<iframe src="https://www.youtube.com/embed/test123" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  400. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  401. output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
  402. if expected != output {
  403. t.Errorf(`Wrong output: %q != %q`, expected, output)
  404. }
  405. }
  406. func TestLinkWithTarget(t *testing.T) {
  407. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  408. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">an anchor</a></p>`
  409. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: true})
  410. if expected != output {
  411. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  412. }
  413. }
  414. func TestLinkWithNoTarget(t *testing.T) {
  415. input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
  416. expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer">an anchor</a></p>`
  417. output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: false})
  418. if expected != output {
  419. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  420. }
  421. }
  422. func TestAnchorLink(t *testing.T) {
  423. input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  424. expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
  425. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  426. if expected != output {
  427. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  428. }
  429. }
  430. func TestInvalidURLScheme(t *testing.T) {
  431. input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
  432. expected := `<p>This link is not valid</p>`
  433. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  434. if expected != output {
  435. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  436. }
  437. }
  438. func TestURISchemes(t *testing.T) {
  439. baseURL := "http://example.org/"
  440. testCases := []struct {
  441. name string
  442. input string
  443. expected string
  444. }{
  445. {
  446. name: "apt",
  447. input: `<p>This link is <a href="apt:some-package?channel=test">valid</a></p>`,
  448. expected: `<p>This link is <a href="apt:some-package?channel=test" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  449. },
  450. {
  451. name: "bitcoin",
  452. input: `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W">valid</a></p>`,
  453. expected: `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  454. },
  455. {
  456. name: "callto",
  457. input: `<p>This link is <a href="callto:12345679">valid</a></p>`,
  458. expected: `<p>This link is <a href="callto:12345679" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  459. },
  460. {
  461. name: "feed-double-slash",
  462. input: `<p>This link is <a href="feed://example.com/rss.xml">valid</a></p>`,
  463. expected: `<p>This link is <a href="feed://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  464. },
  465. {
  466. name: "feed-https",
  467. input: `<p>This link is <a href="feed:https://example.com/rss.xml">valid</a></p>`,
  468. expected: `<p>This link is <a href="feed:https://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  469. },
  470. {
  471. name: "geo",
  472. input: `<p>This link is <a href="geo:13.4125,103.8667">valid</a></p>`,
  473. expected: `<p>This link is <a href="geo:13.4125,103.8667" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  474. },
  475. {
  476. name: "itms",
  477. input: `<p>This link is <a href="itms://itunes.com/apps/my-app-name">valid</a></p>`,
  478. expected: `<p>This link is <a href="itms://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  479. },
  480. {
  481. name: "itms-apps",
  482. input: `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name">valid</a></p>`,
  483. expected: `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  484. },
  485. {
  486. name: "magnet",
  487. input: `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7">valid</a></p>`,
  488. expected: `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&amp;xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  489. },
  490. {
  491. name: "mailto",
  492. input: `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A">valid</a></p>`,
  493. expected: `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&amp;body=My%20idea%20is%3A%20%0A" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  494. },
  495. {
  496. name: "news-double-slash",
  497. input: `<p>This link is <a href="news://news.server.example/*">valid</a></p>`,
  498. expected: `<p>This link is <a href="news://news.server.example/*" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  499. },
  500. {
  501. name: "news-single-colon",
  502. input: `<p>This link is <a href="news:example.group.this">valid</a></p>`,
  503. expected: `<p>This link is <a href="news:example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  504. },
  505. {
  506. name: "nntp",
  507. input: `<p>This link is <a href="nntp://news.server.example/example.group.this">valid</a></p>`,
  508. expected: `<p>This link is <a href="nntp://news.server.example/example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  509. },
  510. {
  511. name: "rtmp",
  512. input: `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov">valid</a></p>`,
  513. expected: `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  514. },
  515. {
  516. name: "sip",
  517. input: `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone">valid</a></p>`,
  518. expected: `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  519. },
  520. {
  521. name: "sips",
  522. input: `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent">valid</a></p>`,
  523. expected: `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&amp;priority=urgent" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  524. },
  525. {
  526. name: "skype",
  527. input: `<p>This link is <a href="skype:echo123?call">valid</a></p>`,
  528. expected: `<p>This link is <a href="skype:echo123?call" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  529. },
  530. {
  531. name: "spotify",
  532. input: `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx">valid</a></p>`,
  533. expected: `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  534. },
  535. {
  536. name: "steam",
  537. input: `<p>This link is <a href="steam://settings/account">valid</a></p>`,
  538. expected: `<p>This link is <a href="steam://settings/account" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  539. },
  540. {
  541. name: "svn",
  542. input: `<p>This link is <a href="svn://example.org">valid</a></p>`,
  543. expected: `<p>This link is <a href="svn://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  544. },
  545. {
  546. name: "svn-ssh",
  547. input: `<p>This link is <a href="svn+ssh://example.org">valid</a></p>`,
  548. expected: `<p>This link is <a href="svn+ssh://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  549. },
  550. {
  551. name: "tel",
  552. input: `<p>This link is <a href="tel:+1-201-555-0123">valid</a></p>`,
  553. expected: `<p>This link is <a href="tel:+1-201-555-0123" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  554. },
  555. {
  556. name: "webcal",
  557. input: `<p>This link is <a href="webcal://example.com/calendar.ics">valid</a></p>`,
  558. expected: `<p>This link is <a href="webcal://example.com/calendar.ics" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  559. },
  560. {
  561. name: "xmpp",
  562. input: `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed">valid</a></p>`,
  563. expected: `<p>This link is <a href="xmpp:user@host?subscribe&amp;type=subscribed" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
  564. },
  565. }
  566. for _, tc := range testCases {
  567. t.Run(tc.name, func(t *testing.T) {
  568. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  569. if tc.expected != output {
  570. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  571. }
  572. })
  573. }
  574. }
  575. func TestBlacklistedLink(t *testing.T) {
  576. input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
  577. expected := `<p>This image is not valid </p>`
  578. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  579. if expected != output {
  580. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  581. }
  582. }
  583. func TestLinkWithTrackers(t *testing.T) {
  584. input := `<p>This link has trackers <a href="https://example.com/page?utm_source=newsletter">Test</a></p>`
  585. expected := `<p>This link has trackers <a href="https://example.com/page" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">Test</a></p>`
  586. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  587. if expected != output {
  588. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  589. }
  590. }
  591. func TestImageSrcWithTrackers(t *testing.T) {
  592. input := `<p>This image has trackers <img src="https://example.org/?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123"></p>`
  593. expected := `<p>This image has trackers <img src="https://example.org/?id=123" loading="lazy"></p>`
  594. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  595. if expected != output {
  596. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  597. }
  598. }
  599. func Test1x1PixelTracker(t *testing.T) {
  600. input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
  601. expected := `<p> and </p>`
  602. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  603. if expected != output {
  604. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  605. }
  606. }
  607. func Test0x0PixelTracker(t *testing.T) {
  608. input := `<p><img src="https://tracker1.example.org/" height="0" width="0"> and <img src="https://tracker2.example.org/" height="0" width="0"/></p>`
  609. expected := `<p> and </p>`
  610. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  611. if expected != output {
  612. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  613. }
  614. }
  615. func TestXmlEntities(t *testing.T) {
  616. input := `<pre>echo "test" &gt; /etc/hosts</pre>`
  617. expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
  618. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  619. if expected != output {
  620. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  621. }
  622. }
  623. func TestEspaceAttributes(t *testing.T) {
  624. input := `<td rowspan="<b>injection</b>">text</td>`
  625. expected := `text`
  626. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  627. if expected != output {
  628. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  629. }
  630. }
  631. func TestReplaceYoutubeURL(t *testing.T) {
  632. os.Clearenv()
  633. var err error
  634. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  635. if err != nil {
  636. t.Fatalf(`Parsing failure: %v`, err)
  637. }
  638. input := `<iframe src="http://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  639. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  640. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  641. if expected != output {
  642. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  643. }
  644. }
  645. func TestReplaceSecureYoutubeURL(t *testing.T) {
  646. os.Clearenv()
  647. var err error
  648. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  649. if err != nil {
  650. t.Fatalf(`Parsing failure: %v`, err)
  651. }
  652. input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
  653. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  654. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  655. if expected != output {
  656. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  657. }
  658. }
  659. func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
  660. os.Clearenv()
  661. var err error
  662. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  663. if err != nil {
  664. t.Fatalf(`Parsing failure: %v`, err)
  665. }
  666. input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&amp;controls=0"></iframe>`
  667. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  668. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  669. if expected != output {
  670. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  671. }
  672. }
  673. func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
  674. os.Clearenv()
  675. var err error
  676. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  677. if err != nil {
  678. t.Fatalf(`Parsing failure: %v`, err)
  679. }
  680. input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
  681. expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  682. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  683. if expected != output {
  684. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  685. }
  686. }
  687. func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
  688. os.Clearenv()
  689. var err error
  690. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  691. if err != nil {
  692. t.Fatalf(`Parsing failure: %v`, err)
  693. }
  694. input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
  695. expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  696. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  697. if expected != output {
  698. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  699. }
  700. }
  701. func TestReplaceYoutubeURLWithCustomURL(t *testing.T) {
  702. defer os.Clearenv()
  703. os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
  704. var err error
  705. config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
  706. if err != nil {
  707. t.Fatalf(`Parsing failure: %v`, err)
  708. }
  709. input := `<iframe src="https://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
  710. expected := `<iframe src="https://invidious.custom/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
  711. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  712. if expected != output {
  713. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  714. }
  715. }
  716. func TestVimeoIframeRewriteWithQueryString(t *testing.T) {
  717. input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>`
  718. expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0&amp;dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  719. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  720. if expected != output {
  721. t.Errorf(`Wrong output: %q != %q`, expected, output)
  722. }
  723. }
  724. func TestVimeoIframeRewriteWithoutQueryString(t *testing.T) {
  725. input := `<iframe src="https://player.vimeo.com/video/123456"></iframe>`
  726. expected := `<iframe src="https://player.vimeo.com/video/123456?dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
  727. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  728. if expected != output {
  729. t.Errorf(`Wrong output: %q != %q`, expected, output)
  730. }
  731. }
  732. func TestReplaceNoScript(t *testing.T) {
  733. input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test" loading="lazy"></noscript><p>After paragraph.</p>`
  734. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  735. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  736. if expected != output {
  737. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  738. }
  739. }
  740. func TestReplaceScript(t *testing.T) {
  741. input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
  742. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  743. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  744. if expected != output {
  745. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  746. }
  747. }
  748. func TestReplaceStyle(t *testing.T) {
  749. input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
  750. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  751. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  752. if expected != output {
  753. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  754. }
  755. }
  756. func TestHiddenParagraph(t *testing.T) {
  757. input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>`
  758. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  759. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  760. if expected != output {
  761. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  762. }
  763. }
  764. func TestAttributesAreStripped(t *testing.T) {
  765. input := `<p style="color: red;">Some text.<hr style="color: blue"/>Test.</p>`
  766. expected := `<p>Some text.</p><hr>Test.<p></p>`
  767. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  768. if expected != output {
  769. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  770. }
  771. }
  772. func TestMathML(t *testing.T) {
  773. input := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  774. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  775. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  776. if expected != output {
  777. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  778. }
  779. }
  780. func TestInvalidMathMLXMLNamespace(t *testing.T) {
  781. input := `<math xmlns="http://example.org"><msup><mi>x</mi><mn>2</mn></msup></math>`
  782. expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
  783. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  784. if expected != output {
  785. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  786. }
  787. }
  788. func TestBlockedResourcesSubstrings(t *testing.T) {
  789. input := `<p>Before paragraph.</p><img src="http://stats.wordpress.com/something.php" alt="Blocked Resource"><p>After paragraph.</p>`
  790. expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
  791. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  792. if expected != output {
  793. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  794. }
  795. input = `<p>Before paragraph.</p><img src="http://twitter.com/share?text=This+is+google+a+search+engine&url=https%3A%2F%2Fwww.google.com" alt="Blocked Resource"><p>After paragraph.</p>`
  796. expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
  797. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  798. if expected != output {
  799. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  800. }
  801. input = `<p>Before paragraph.</p><img src="http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.google.com%[title]=This+Is%2C+Google+a+search+engine" alt="Blocked Resource"><p>After paragraph.</p>`
  802. expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
  803. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  804. if expected != output {
  805. t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
  806. }
  807. }
  808. func TestAttrLowerCase(t *testing.T) {
  809. baseURL := "http://example.org/"
  810. testCases := []struct {
  811. name string
  812. input string
  813. expected string
  814. }{
  815. {
  816. name: "href-and-hidden-mixed-case",
  817. input: `<a HrEF="http://example.com" HIddEN>test</a>`,
  818. expected: ``,
  819. },
  820. {
  821. name: "href-mixed-case",
  822. input: `<a HrEF="http://example.com">test</a>`,
  823. expected: `<a href="http://example.com" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">test</a>`,
  824. },
  825. }
  826. for _, tc := range testCases {
  827. tc := tc
  828. t.Run(tc.name, func(t *testing.T) {
  829. output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
  830. if tc.expected != output {
  831. t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
  832. }
  833. })
  834. }
  835. }
  836. func TestDeeplyNestedpage(t *testing.T) {
  837. input := "test"
  838. // -3 instead of -1 because <html><body> is automatically added.
  839. for range maxDepth - 3 {
  840. input = "<div>" + input + "</div>"
  841. }
  842. output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  843. want := "test"
  844. if output != want {
  845. t.Errorf(`Wrong output: "%s" != "%s"`, want, output)
  846. }
  847. input = "test"
  848. for range maxDepth - 2 {
  849. input = "<div>" + input + "</div>"
  850. }
  851. output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
  852. if output != "" {
  853. t.Errorf(`Wrong output: "%s" != "%s"`, "", output)
  854. }
  855. }