| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036 |
- // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
- // SPDX-License-Identifier: Apache-2.0
- package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
- import (
- "fmt"
- "os"
- "strings"
- "testing"
- "golang.org/x/net/html"
- "miniflux.app/v2/internal/config"
- )
- func sanitizeHTMLWithDefaultOptions(baseURL, rawHTML string) string {
- return SanitizeHTML(baseURL, rawHTML, &SanitizerOptions{
- OpenLinksInNewTab: true,
- })
- }
- func BenchmarkSanitize(b *testing.B) {
- var testCases = map[string][]string{
- "miniflux_github.html": {"https://github.com/miniflux/v2", ""},
- "miniflux_wikipedia.html": {"https://fr.wikipedia.org/wiki/Miniflux", ""},
- }
- for filename := range testCases {
- data, err := os.ReadFile("testdata/" + filename)
- if err != nil {
- b.Fatalf(`Unable to read file %q: %v`, filename, err)
- }
- testCases[filename][1] = string(data)
- }
- for b.Loop() {
- for _, v := range testCases {
- sanitizeHTMLWithDefaultOptions(v[0], v[1])
- }
- }
- }
- func FuzzSanitizer(f *testing.F) {
- f.Fuzz(func(t *testing.T, orig string) {
- tok := html.NewTokenizer(strings.NewReader(orig))
- i := 0
- for tok.Next() != html.ErrorToken {
- i++
- }
- out := sanitizeHTMLWithDefaultOptions("", orig)
- tok = html.NewTokenizer(strings.NewReader(out))
- j := 0
- for tok.Next() != html.ErrorToken {
- j++
- }
- if j > i {
- t.Errorf("Got more html tokens in the sanitized html.")
- }
- })
- }
- func TestValidInput(t *testing.T) {
- input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test" loading="lazy">.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if input != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
- }
- }
- func TestImgSanitization(t *testing.T) {
- baseURL := "http://example.org/"
- testCases := []struct {
- name string
- input string
- expected string
- }{
- {
- name: "width-and-height-attributes",
- input: `<img src="https://example.org/image.png" width="10" height="20">`,
- expected: `<img src="https://example.org/image.png" width="10" height="20" loading="lazy">`,
- },
- {
- name: "invalid-width-and-height-attributes",
- input: `<img src="https://example.org/image.png" width="10px" height="20px">`,
- expected: `<img src="https://example.org/image.png" loading="lazy">`,
- },
- {
- name: "invalid-width-attribute",
- input: `<img src="https://example.org/image.png" width="10px" height="20">`,
- expected: `<img src="https://example.org/image.png" height="20" loading="lazy">`,
- },
- {
- name: "empty-width-and-height-attributes",
- input: `<img src="https://example.org/image.png" width="" height="">`,
- expected: `<img src="https://example.org/image.png" loading="lazy">`,
- },
- {
- name: "invalid-height-attribute",
- input: `<img src="https://example.org/image.png" width="10" height="20px">`,
- expected: `<img src="https://example.org/image.png" width="10" loading="lazy">`,
- },
- {
- name: "negative-width-attribute",
- input: `<img src="https://example.org/image.png" width="-10" height="20">`,
- expected: `<img src="https://example.org/image.png" height="20" loading="lazy">`,
- },
- {
- name: "negative-height-attribute",
- input: `<img src="https://example.org/image.png" width="10" height="-20">`,
- expected: `<img src="https://example.org/image.png" width="10" loading="lazy">`,
- },
- {
- name: "text-data-url",
- input: `<img src="data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" alt="Example">`,
- expected: ``,
- },
- {
- name: "image-data-url",
- input: `<img src="data:image/gif;base64,test" alt="Example">`,
- expected: `<img src="data:image/gif;base64,test" alt="Example" loading="lazy">`,
- },
- {
- name: "srcset-attribute",
- input: `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`,
- expected: `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`,
- },
- {
- name: "srcset-attribute-without-src",
- input: `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" alt="Example">`,
- expected: `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" alt="Example" loading="lazy">`,
- },
- {
- name: "srcset-attribute-with-blocked-candidate",
- input: `<img srcset="https://stats.wordpress.com/tracker.png 1x, /example-640w.jpg 2x" src="/example-640w.jpg" alt="Example">`,
- expected: `<img srcset="http://example.org/example-640w.jpg 2x" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`,
- },
- {
- name: "srcset-attribute-all-candidates-invalid",
- input: `<img srcset="javascript:alert(1) 1x, data:text/plain;base64,SGVsbG8= 2x" alt="Example">`,
- expected: ``,
- },
- {
- name: "fetchpriority-high",
- input: `<img src="https://example.org/image.png" fetchpriority="high">`,
- expected: `<img src="https://example.org/image.png" fetchpriority="high" loading="lazy">`,
- },
- {
- name: "fetchpriority-low",
- input: `<img src="https://example.org/image.png" fetchpriority="low">`,
- expected: `<img src="https://example.org/image.png" fetchpriority="low" loading="lazy">`,
- },
- {
- name: "fetchpriority-auto",
- input: `<img src="https://example.org/image.png" fetchpriority="auto">`,
- expected: `<img src="https://example.org/image.png" fetchpriority="auto" loading="lazy">`,
- },
- {
- name: "fetchpriority-invalid",
- input: `<img src="https://example.org/image.png" fetchpriority="invalid">`,
- expected: `<img src="https://example.org/image.png" loading="lazy">`,
- },
- {
- name: "decoding-sync",
- input: `<img src="https://example.org/image.png" decoding="sync">`,
- expected: `<img src="https://example.org/image.png" decoding="sync" loading="lazy">`,
- },
- {
- name: "decoding-async",
- input: `<img src="https://example.org/image.png" decoding="async">`,
- expected: `<img src="https://example.org/image.png" decoding="async" loading="lazy">`,
- },
- {
- name: "decoding-auto",
- input: `<img src="https://example.org/image.png" decoding="auto">`,
- expected: `<img src="https://example.org/image.png" decoding="auto" loading="lazy">`,
- },
- {
- name: "decoding-invalid",
- input: `<img src="https://example.org/image.png" decoding="invalid">`,
- expected: `<img src="https://example.org/image.png" loading="lazy">`,
- },
- }
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
- if output != tc.expected {
- t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
- }
- })
- }
- }
- func TestNonImgWithFetchPriorityAttribute(t *testing.T) {
- input := `<p fetchpriority="high">Text</p>`
- expected := `<p>Text</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if output != expected {
- t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
- }
- }
- func TestNonImgWithDecodingAttribute(t *testing.T) {
- input := `<p decoding="async">Text</p>`
- expected := `<p>Text</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if output != expected {
- t.Errorf(`Wrong output: expected %q, got %q`, expected, output)
- }
- }
- func TestMediumImgWithSrcset(t *testing.T) {
- input := `<img alt="Image for post" class="t u v ef aj" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407">`
- expected := `<img alt="Image for post" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407" loading="lazy">`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if output != expected {
- t.Errorf(`Wrong output: %s`, output)
- }
- }
- func TestSelfClosingTags(t *testing.T) {
- baseURL := "http://example.org/"
- testCases := []struct {
- name string
- input string
- expected string
- }{
- {
- name: "br",
- input: `<p>Line<br>Break</p>`,
- expected: `<p>Line<br>Break</p>`,
- },
- {
- name: "hr",
- input: `<p>Before</p><hr><p>After</p>`,
- expected: `<p>Before</p><hr><p>After</p>`,
- },
- {
- name: "img",
- input: `<p>Image <img src="http://example.org/image.png" alt="Test"></p>`,
- expected: `<p>Image <img src="http://example.org/image.png" alt="Test" loading="lazy"></p>`,
- },
- {
- name: "source",
- input: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
- expected: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
- },
- {
- name: "wbr",
- input: `<p>soft<wbr>break</p>`,
- expected: `<p>soft<wbr>break</p>`,
- },
- }
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
- if output != tc.expected {
- t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
- }
- })
- }
- }
- func TestTable(t *testing.T) {
- input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if input != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
- }
- }
- func TestRelativeURL(t *testing.T) {
- input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png">`
- expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a> and this image: <img src="http://example.org/folder/image.png" loading="lazy">`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestProtocolRelativeURL(t *testing.T) {
- input := `This <a href="//static.example.org/index.html">link is relative</a>.`
- expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">link is relative</a>.`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestInvalidTag(t *testing.T) {
- input := `<p>My invalid <z>tag</z>.</p>`
- expected := `<p>My invalid tag.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestSourceSanitization(t *testing.T) {
- baseURL := "http://example.org/"
- testCases := []struct {
- name string
- input string
- expected string
- }{
- {
- name: "srcset-and-media",
- input: `<picture><source media="(min-width: 800px)" srcset="elva-800w.jpg"></picture>`,
- expected: `<picture><source media="(min-width: 800px)" srcset="http://example.org/elva-800w.jpg"></picture>`,
- },
- {
- name: "src-attribute",
- input: `<picture><source src="video.mp4" type="video/mp4"></picture>`,
- expected: `<picture><source src="http://example.org/video.mp4" type="video/mp4"></picture>`,
- },
- {
- name: "srcset-with-blocked-candidate",
- input: `<picture><source srcset="https://stats.wordpress.com/tracker.png 1x, /elva-800w.jpg 2x"></picture>`,
- expected: `<picture><source srcset="http://example.org/elva-800w.jpg 2x"></picture>`,
- },
- {
- name: "srcset-all-invalid",
- input: `<picture><source srcset="javascript:alert(1) 1x, data:text/plain;base64,SGVsbG8= 2x"></picture>`,
- expected: `<picture></picture>`,
- },
- }
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
- if output != tc.expected {
- t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
- }
- })
- }
- }
- func TestVideoTag(t *testing.T) {
- input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
- expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestAudioAndSourceTag(t *testing.T) {
- input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
- expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestUnknownTag(t *testing.T) {
- input := `<p>My invalid <unknown>tag</unknown>.</p>`
- expected := `<p>My invalid tag.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestInvalidNestedTag(t *testing.T) {
- input := `<p>My invalid <z>tag with some <em>valid</em> tag</z>.</p>`
- expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestInvalidIFrame(t *testing.T) {
- config.Opts = config.NewConfigOptions()
- input := `<iframe src="http://example.org/"></iframe>`
- expected := ``
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestBlockedIFrameWithChildElements(t *testing.T) {
- config.Opts = config.NewConfigOptions()
- input := `<iframe src="http://example.org/"><p>test</p></iframe>`
- expected := ``
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if expected != output {
- t.Errorf(`Wrong output: %q != %q`, expected, output)
- }
- }
- func TestSameDomainIFrame(t *testing.T) {
- config.Opts = config.NewConfigOptions()
- input := `<iframe src="http://example.com/test"></iframe>`
- expected := ``
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if expected != output {
- t.Errorf(`Wrong output: %q != %q`, expected, output)
- }
- }
- func TestInvidiousIFrame(t *testing.T) {
- config.Opts = config.NewConfigOptions()
- input := `<iframe src="https://yewtu.be/watch?v=video_id"></iframe>`
- expected := `<iframe src="https://yewtu.be/watch?v=video_id" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if expected != output {
- t.Errorf(`Wrong output: %q != %q`, expected, output)
- }
- }
- func TestIFrameAllowList(t *testing.T) {
- config.Opts = config.NewConfigOptions()
- allowedDomains := []string{
- "bandcamp.com",
- "cdn.embedly.com",
- "dailymotion.com",
- "framatube.org",
- "open.spotify.com",
- "player.bilibili.com",
- "player.twitch.tv",
- "player.vimeo.com",
- "soundcloud.com",
- "vk.com",
- "w.soundcloud.com",
- "youtube-nocookie.com",
- "youtube.com",
- }
- for _, domain := range allowedDomains {
- t.Run(domain, func(t *testing.T) {
- input := fmt.Sprintf(`<iframe src="https://%s/video/test"></iframe>`, domain)
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if !strings.Contains(output, "<iframe") {
- t.Errorf(`iframe from %q should be allowed, got: %q`, domain, output)
- }
- })
- }
- }
- func TestCustomYoutubeEmbedURL(t *testing.T) {
- os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://www.invidious.custom/embed/")
- defer os.Clearenv()
- var err error
- if config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables(); err != nil {
- t.Fatalf(`Parsing failure: %v`, err)
- }
- input := `<iframe src="https://www.invidious.custom/embed/1234"></iframe>`
- expected := `<iframe src="https://www.invidious.custom/embed/1234" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if expected != output {
- t.Errorf(`Wrong output: %q != %q`, expected, output)
- }
- }
- func TestIFrameWithChildElements(t *testing.T) {
- config.Opts = config.NewConfigOptions()
- input := `<iframe src="https://www.youtube.com/"><p>test</p></iframe>`
- expected := `<iframe src="https://www.youtube.com/" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestIFrameWithReferrerPolicy(t *testing.T) {
- config.Opts = config.NewConfigOptions()
- input := `<iframe src="https://www.youtube.com/embed/test123" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.com/", input)
- if expected != output {
- t.Errorf(`Wrong output: %q != %q`, expected, output)
- }
- }
- func TestLinkWithTarget(t *testing.T) {
- input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
- expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">an anchor</a></p>`
- output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: true})
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestLinkWithNoTarget(t *testing.T) {
- input := `<p>This link is <a href="http://example.org/index.html">an anchor</a></p>`
- expected := `<p>This link is <a href="http://example.org/index.html" rel="noopener noreferrer" referrerpolicy="no-referrer">an anchor</a></p>`
- output := SanitizeHTML("http://example.org/", input, &SanitizerOptions{OpenLinksInNewTab: false})
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestAnchorLink(t *testing.T) {
- input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
- expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestInvalidURLScheme(t *testing.T) {
- input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
- expected := `<p>This link is not valid</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestURISchemes(t *testing.T) {
- baseURL := "http://example.org/"
- testCases := []struct {
- name string
- input string
- expected string
- }{
- {
- name: "apt",
- input: `<p>This link is <a href="apt:some-package?channel=test">valid</a></p>`,
- expected: `<p>This link is <a href="apt:some-package?channel=test" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "bitcoin",
- input: `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W">valid</a></p>`,
- expected: `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "callto",
- input: `<p>This link is <a href="callto:12345679">valid</a></p>`,
- expected: `<p>This link is <a href="callto:12345679" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "feed-double-slash",
- input: `<p>This link is <a href="feed://example.com/rss.xml">valid</a></p>`,
- expected: `<p>This link is <a href="feed://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "feed-https",
- input: `<p>This link is <a href="feed:https://example.com/rss.xml">valid</a></p>`,
- expected: `<p>This link is <a href="feed:https://example.com/rss.xml" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "geo",
- input: `<p>This link is <a href="geo:13.4125,103.8667">valid</a></p>`,
- expected: `<p>This link is <a href="geo:13.4125,103.8667" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "itms",
- input: `<p>This link is <a href="itms://itunes.com/apps/my-app-name">valid</a></p>`,
- expected: `<p>This link is <a href="itms://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "itms-apps",
- input: `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name">valid</a></p>`,
- expected: `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "magnet",
- input: `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7">valid</a></p>`,
- expected: `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "mailto",
- input: `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&body=My%20idea%20is%3A%20%0A">valid</a></p>`,
- expected: `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&body=My%20idea%20is%3A%20%0A" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "news-double-slash",
- input: `<p>This link is <a href="news://news.server.example/*">valid</a></p>`,
- expected: `<p>This link is <a href="news://news.server.example/*" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "news-single-colon",
- input: `<p>This link is <a href="news:example.group.this">valid</a></p>`,
- expected: `<p>This link is <a href="news:example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "nntp",
- input: `<p>This link is <a href="nntp://news.server.example/example.group.this">valid</a></p>`,
- expected: `<p>This link is <a href="nntp://news.server.example/example.group.this" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "rtmp",
- input: `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov">valid</a></p>`,
- expected: `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "sip",
- input: `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone">valid</a></p>`,
- expected: `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "sips",
- input: `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&priority=urgent">valid</a></p>`,
- expected: `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&priority=urgent" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "skype",
- input: `<p>This link is <a href="skype:echo123?call">valid</a></p>`,
- expected: `<p>This link is <a href="skype:echo123?call" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "spotify",
- input: `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx">valid</a></p>`,
- expected: `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "steam",
- input: `<p>This link is <a href="steam://settings/account">valid</a></p>`,
- expected: `<p>This link is <a href="steam://settings/account" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "svn",
- input: `<p>This link is <a href="svn://example.org">valid</a></p>`,
- expected: `<p>This link is <a href="svn://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "svn-ssh",
- input: `<p>This link is <a href="svn+ssh://example.org">valid</a></p>`,
- expected: `<p>This link is <a href="svn+ssh://example.org" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "tel",
- input: `<p>This link is <a href="tel:+1-201-555-0123">valid</a></p>`,
- expected: `<p>This link is <a href="tel:+1-201-555-0123" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "webcal",
- input: `<p>This link is <a href="webcal://example.com/calendar.ics">valid</a></p>`,
- expected: `<p>This link is <a href="webcal://example.com/calendar.ics" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- {
- name: "xmpp",
- input: `<p>This link is <a href="xmpp:user@host?subscribe&type=subscribed">valid</a></p>`,
- expected: `<p>This link is <a href="xmpp:user@host?subscribe&type=subscribed" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">valid</a></p>`,
- },
- }
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
- if tc.expected != output {
- t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
- }
- })
- }
- }
- func TestBlacklistedLink(t *testing.T) {
- input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
- expected := `<p>This image is not valid </p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestLinkWithTrackers(t *testing.T) {
- input := `<p>This link has trackers <a href="https://example.com/page?utm_source=newsletter">Test</a></p>`
- expected := `<p>This link has trackers <a href="https://example.com/page" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">Test</a></p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestImageSrcWithTrackers(t *testing.T) {
- input := `<p>This image has trackers <img src="https://example.org/?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123"></p>`
- expected := `<p>This image has trackers <img src="https://example.org/?id=123" loading="lazy"></p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func Test1x1PixelTracker(t *testing.T) {
- input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
- expected := `<p> and </p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func Test0x0PixelTracker(t *testing.T) {
- input := `<p><img src="https://tracker1.example.org/" height="0" width="0"> and <img src="https://tracker2.example.org/" height="0" width="0"/></p>`
- expected := `<p> and </p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestXmlEntities(t *testing.T) {
- input := `<pre>echo "test" > /etc/hosts</pre>`
- expected := `<pre>echo "test" > /etc/hosts</pre>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestEspaceAttributes(t *testing.T) {
- input := `<td rowspan="<b>injection</b>">text</td>`
- expected := `text`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceYoutubeURL(t *testing.T) {
- os.Clearenv()
- var err error
- config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
- if err != nil {
- t.Fatalf(`Parsing failure: %v`, err)
- }
- input := `<iframe src="http://www.youtube.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent"></iframe>`
- expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceSecureYoutubeURL(t *testing.T) {
- os.Clearenv()
- var err error
- config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
- if err != nil {
- t.Fatalf(`Parsing failure: %v`, err)
- }
- input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
- expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
- os.Clearenv()
- var err error
- config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
- if err != nil {
- t.Fatalf(`Parsing failure: %v`, err)
- }
- input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&controls=0"></iframe>`
- expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
- os.Clearenv()
- var err error
- config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
- if err != nil {
- t.Fatalf(`Parsing failure: %v`, err)
- }
- input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
- expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
- os.Clearenv()
- var err error
- config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
- if err != nil {
- t.Fatalf(`Parsing failure: %v`, err)
- }
- input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
- expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceYoutubeURLWithCustomURL(t *testing.T) {
- defer os.Clearenv()
- os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
- var err error
- config.Opts, err = config.NewConfigParser().ParseEnvironmentVariables()
- if err != nil {
- t.Fatalf(`Parsing failure: %v`, err)
- }
- input := `<iframe src="https://www.youtube.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent"></iframe>`
- expected := `<iframe src="https://invidious.custom/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy" referrerpolicy="strict-origin-when-cross-origin"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestVimeoIframeRewriteWithQueryString(t *testing.T) {
- input := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0"></iframe>`
- expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0&dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: %q != %q`, expected, output)
- }
- }
- func TestVimeoIframeRewriteWithoutQueryString(t *testing.T) {
- input := `<iframe src="https://player.vimeo.com/video/123456"></iframe>`
- expected := `<iframe src="https://player.vimeo.com/video/123456?dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: %q != %q`, expected, output)
- }
- }
- func TestReplaceNoScript(t *testing.T) {
- input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test" loading="lazy"></noscript><p>After paragraph.</p>`
- expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceScript(t *testing.T) {
- input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
- expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestReplaceStyle(t *testing.T) {
- input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
- expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestHiddenParagraph(t *testing.T) {
- input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>`
- expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestAttributesAreStripped(t *testing.T) {
- input := `<p style="color: red;">Some text.<hr style="color: blue"/>Test.</p>`
- expected := `<p>Some text.</p><hr>Test.<p></p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestMathML(t *testing.T) {
- input := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
- expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestInvalidMathMLXMLNamespace(t *testing.T) {
- input := `<math xmlns="http://example.org"><msup><mi>x</mi><mn>2</mn></msup></math>`
- expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestBlockedResourcesSubstrings(t *testing.T) {
- input := `<p>Before paragraph.</p><img src="http://stats.wordpress.com/something.php" alt="Blocked Resource"><p>After paragraph.</p>`
- expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- input = `<p>Before paragraph.</p><img src="http://twitter.com/share?text=This+is+google+a+search+engine&url=https%3A%2F%2Fwww.google.com" alt="Blocked Resource"><p>After paragraph.</p>`
- expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
- output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- input = `<p>Before paragraph.</p><img src="http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.google.com%[title]=This+Is%2C+Google+a+search+engine" alt="Blocked Resource"><p>After paragraph.</p>`
- expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
- output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if expected != output {
- t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
- }
- }
- func TestAttrLowerCase(t *testing.T) {
- baseURL := "http://example.org/"
- testCases := []struct {
- name string
- input string
- expected string
- }{
- {
- name: "href-and-hidden-mixed-case",
- input: `<a HrEF="http://example.com" HIddEN>test</a>`,
- expected: ``,
- },
- {
- name: "href-mixed-case",
- input: `<a HrEF="http://example.com">test</a>`,
- expected: `<a href="http://example.com" rel="noopener noreferrer" referrerpolicy="no-referrer" target="_blank">test</a>`,
- },
- }
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- output := sanitizeHTMLWithDefaultOptions(baseURL, tc.input)
- if tc.expected != output {
- t.Errorf(`Wrong output for input %q: expected %q, got %q`, tc.input, tc.expected, output)
- }
- })
- }
- }
- func TestDeeplyNestedpage(t *testing.T) {
- maxDepth := 512 // html.Parse has a maximum depth of 512
- input := "test"
- // -2 instead of -1 because <html><body> is automatically added.
- for range maxDepth - 2 {
- input = "<div>" + input + "</div>"
- }
- output := sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- want := "test"
- if output != want {
- t.Errorf(`Wrong output: "%s" != "%s"`, want, output)
- }
- input = "test"
- for range maxDepth - 1 {
- input = "<div>" + input + "</div>"
- }
- output = sanitizeHTMLWithDefaultOptions("http://example.org/", input)
- if output != "" {
- t.Errorf(`Wrong output: "%s" != "%s"`, "", output)
- }
- }
|