html_test.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. package html // import "github.com/tdewolff/minify/html"
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "net/url"
  8. "os"
  9. "regexp"
  10. "testing"
  11. "github.com/tdewolff/minify"
  12. "github.com/tdewolff/minify/css"
  13. "github.com/tdewolff/minify/js"
  14. "github.com/tdewolff/minify/json"
  15. "github.com/tdewolff/minify/svg"
  16. "github.com/tdewolff/minify/xml"
  17. "github.com/tdewolff/test"
  18. )
  19. func TestHTML(t *testing.T) {
  20. htmlTests := []struct {
  21. html string
  22. expected string
  23. }{
  24. {`html`, `html`},
  25. {`<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">`, `<!doctype html>`},
  26. {`<!-- comment -->`, ``},
  27. {`<style><!--\ncss\n--></style>`, `<style><!--\ncss\n--></style>`},
  28. {`<style>&</style>`, `<style>&</style>`},
  29. {`<html><head></head><body>x</body></html>`, `x`},
  30. {`<meta http-equiv="content-type" content="text/html; charset=utf-8">`, `<meta charset=utf-8>`},
  31. {`<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />`, `<meta charset=utf-8>`},
  32. {`<meta http-equiv="Content-Security-Policy" content="default-src 'self'; img-src https://*; child-src 'none';">`, `<meta http-equiv=content-security-policy content="default-src 'self'; img-src https://*; child-src 'none';">`},
  33. {`<meta name="keywords" content="a, b">`, `<meta name=keywords content=a,b>`},
  34. {`<meta name="viewport" content="width = 996" />`, `<meta name=viewport content="width=996">`},
  35. {`<span attr="test"></span>`, `<span attr=test></span>`},
  36. {`<span attr='test&apos;test'></span>`, `<span attr="test'test"></span>`},
  37. {`<span attr="test&quot;test"></span>`, `<span attr='test"test'></span>`},
  38. {`<span attr='test""&apos;&amp;test'></span>`, `<span attr='test""&#39;&amp;test'></span>`},
  39. {`<span attr="test/test"></span>`, `<span attr=test/test></span>`},
  40. {`<span>&amp;</span>`, `<span>&amp;</span>`},
  41. {`<span clear=none method=GET></span>`, `<span></span>`},
  42. {`<span onload="javascript:x;"></span>`, `<span onload=x;></span>`},
  43. {`<span selected="selected"></span>`, `<span selected></span>`},
  44. {`<noscript><html><img id="x"></noscript>`, `<noscript><img id=x></noscript>`},
  45. {`<body id="main"></body>`, `<body id=main>`},
  46. {`<link href="data:text/plain, data">`, `<link href=data:,+data>`},
  47. {`<svg width="100" height="100"><circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" /></svg>`, `<svg width="100" height="100"><circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" /></svg>`},
  48. {`</span >`, `</span>`},
  49. {`<meta name=viewport content="width=0.1, initial-scale=1.0 , maximum-scale=1000">`, `<meta name=viewport content="width=.1,initial-scale=1,maximum-scale=1e3">`},
  50. {`<br/>`, `<br>`},
  51. // increase coverage
  52. {`<script style="css">js</script>`, `<script style=css>js</script>`},
  53. {`<script type="application/javascript">js</script>`, `<script type=application/javascript>js</script>`},
  54. {`<meta http-equiv="content-type" content="text/plain, text/html">`, `<meta http-equiv=content-type content=text/plain,text/html>`},
  55. {`<meta http-equiv="content-style-type" content="text/less">`, `<meta http-equiv=content-style-type content=text/less>`},
  56. {`<meta http-equiv="content-style-type" content="text/less; charset=utf-8">`, `<meta http-equiv=content-style-type content="text/less;charset=utf-8">`},
  57. {`<meta http-equiv="content-script-type" content="application/js">`, `<meta http-equiv=content-script-type content=application/js>`},
  58. {`<span attr=""></span>`, `<span attr></span>`},
  59. {`<code>x</code>`, `<code>x</code>`},
  60. {`<p></p><p></p>`, `<p><p>`},
  61. {`<ul><li></li> <li></li></ul>`, `<ul><li><li></ul>`},
  62. {`<p></p><a></a>`, `<p></p><a></a>`},
  63. {`<p></p>x<a></a>`, `<p></p>x<a></a>`},
  64. {`<span style=>`, `<span>`},
  65. {`<button onclick=>`, `<button>`},
  66. // whitespace
  67. {`cats and dogs `, `cats and dogs`},
  68. {` <div> <i> test </i> <b> test </b> </div> `, `<div><i>test</i> <b>test</b></div>`},
  69. {`<strong>x </strong>y`, `<strong>x </strong>y`},
  70. {`<strong>x </strong> y`, `<strong>x</strong> y`},
  71. {"<strong>x </strong>\ny", "<strong>x</strong>\ny"},
  72. {`<p>x </p>y`, `<p>x</p>y`},
  73. {`x <p>y</p>`, `x<p>y`},
  74. {` <!doctype html> <!--comment--> <html> <body><p></p></body></html> `, `<!doctype html><p>`}, // spaces before html and at the start of html are dropped
  75. {`<p>x<br> y`, `<p>x<br>y`},
  76. {`<p>x </b> <b> y`, `<p>x</b> <b>y`},
  77. {`a <code></code> b`, `a <code></code>b`},
  78. {`a <code>code</code> b`, `a <code>code</code> b`},
  79. {`a <code> code </code> b`, `a <code>code</code> b`},
  80. {`a <script>script</script> b`, `a <script>script</script>b`},
  81. {"text\n<!--comment-->\ntext", "text\ntext"},
  82. {"abc\n</body>\ndef", "abc\ndef"},
  83. {"<x>\n<!--y-->\n</x>", "<x></x>"},
  84. {"a <template> b </template> c", "a <template>b</template>c"},
  85. // from HTML Minifier
  86. {`<DIV TITLE="blah">boo</DIV>`, `<div title=blah>boo</div>`},
  87. {"<p title\n\n\t =\n \"bar\">foo</p>", `<p title=bar>foo`},
  88. {`<p class=" foo ">foo bar baz</p>`, `<p class=foo>foo bar baz`},
  89. {`<input maxlength=" 5 ">`, `<input maxlength=5>`},
  90. {`<input type="text">`, `<input>`},
  91. {`<form method="get">`, `<form>`},
  92. {`<script language="Javascript">alert(1)</script>`, `<script>alert(1)</script>`},
  93. {`<script></script>`, ``},
  94. {`<p onclick=" JavaScript: x">x</p>`, `<p onclick=" x">x`},
  95. {`<span Selected="selected"></span>`, `<span selected></span>`},
  96. {`<table><thead><tr><th>foo</th><th>bar</th></tr></thead><tfoot><tr><th>baz</th><th>qux</th></tr></tfoot><tbody><tr><td>boo</td><td>moo</td></tr></tbody></table>`,
  97. `<table><thead><tr><th>foo<th>bar<tfoot><tr><th>baz<th>qux<tbody><tr><td>boo<td>moo</table>`},
  98. {`<select><option>foo</option><option>bar</option></select>`, `<select><option>foo<option>bar</select>`},
  99. {`<meta name="keywords" content="A, B">`, `<meta name=keywords content=A,B>`},
  100. {`<iframe><html> <p> x </p> </html></iframe>`, `<iframe><p>x</iframe>`},
  101. {`<math> &int;_a_^b^{f(x)<over>1+x} dx </math>`, `<math> &int;_a_^b^{f(x)<over>1+x} dx </math>`},
  102. {`<script language="x" charset="x" src="y"></script>`, `<script src=y></script>`},
  103. {`<style media="all">x</style>`, `<style>x</style>`},
  104. {`<a id="abc" name="abc">y</a>`, `<a id=abc>y</a>`},
  105. {`<a id="" value="">y</a>`, `<a value>y</a>`},
  106. // from Kangax html-minfier
  107. {`<span style="font-family:&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,Helvetica,Arial,sans-serif">text</span>`, `<span style='font-family:"Helvetica Neue","Helvetica",Helvetica,Arial,sans-serif'>text</span>`},
  108. // go-fuzz
  109. {`<meta e t n content=ful><a b`, `<meta e t n content=ful><a b>`},
  110. {`<img alt=a'b="">`, `<img alt='a&#39;b=""'>`},
  111. {`</b`, `</b`},
  112. {`<title></`, `<title></`},
  113. {`<svg <`, `<svg <`},
  114. {`<svg "`, `<svg "`},
  115. {`<svg></`, `<svg></`},
  116. {`<script><!--<`, `<script><!--<`},
  117. // bugs
  118. {`<p>text</p><br>text`, `<p>text</p><br>text`}, // #122
  119. {`text <img> text`, `text <img> text`}, // #89
  120. {`text <progress></progress> text`, `text <progress></progress> text`}, // #89
  121. {`<pre> <x> a b </x> </pre>`, `<pre> <x> a b </x> </pre>`}, // #82
  122. {`<svg id="1"></svg>`, `<svg id="1"></svg>`}, // #67
  123. }
  124. m := minify.New()
  125. m.AddFunc("text/html", Minify)
  126. m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  127. _, err := io.Copy(w, r)
  128. return err
  129. })
  130. m.AddFunc("text/javascript", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  131. _, err := io.Copy(w, r)
  132. return err
  133. })
  134. for _, tt := range htmlTests {
  135. t.Run(tt.html, func(t *testing.T) {
  136. r := bytes.NewBufferString(tt.html)
  137. w := &bytes.Buffer{}
  138. err := Minify(m, w, r, nil)
  139. test.Minify(t, tt.html, err, w.String(), tt.expected)
  140. })
  141. }
  142. }
  143. func TestHTMLKeepEndTags(t *testing.T) {
  144. htmlTests := []struct {
  145. html string
  146. expected string
  147. }{
  148. {`<p></p><p></p>`, `<p></p><p></p>`},
  149. {`<ul><li></li><li></li></ul>`, `<ul><li></li><li></li></ul>`},
  150. }
  151. m := minify.New()
  152. htmlMinifier := &Minifier{KeepEndTags: true}
  153. for _, tt := range htmlTests {
  154. t.Run(tt.html, func(t *testing.T) {
  155. r := bytes.NewBufferString(tt.html)
  156. w := &bytes.Buffer{}
  157. err := htmlMinifier.Minify(m, w, r, nil)
  158. test.Minify(t, tt.html, err, w.String(), tt.expected)
  159. })
  160. }
  161. }
  162. func TestHTMLKeepConditionalComments(t *testing.T) {
  163. htmlTests := []struct {
  164. html string
  165. expected string
  166. }{
  167. {`<!--[if IE 6]> <b> </b> <![endif]-->`, `<!--[if IE 6]><b></b><![endif]-->`},
  168. {`<![if IE 6]> <b> </b> <![endif]>`, `<![if IE 6]><b></b><![endif]>`},
  169. {`<!--[if !mso]><!--> <b> </b> <!--<![endif]-->`, `<!--[if !mso]><!--><b></b><!--<![endif]-->`},
  170. }
  171. m := minify.New()
  172. htmlMinifier := &Minifier{KeepConditionalComments: true}
  173. for _, tt := range htmlTests {
  174. t.Run(tt.html, func(t *testing.T) {
  175. r := bytes.NewBufferString(tt.html)
  176. w := &bytes.Buffer{}
  177. err := htmlMinifier.Minify(m, w, r, nil)
  178. test.Minify(t, tt.html, err, w.String(), tt.expected)
  179. })
  180. }
  181. }
  182. func TestHTMLKeepWhitespace(t *testing.T) {
  183. htmlTests := []struct {
  184. html string
  185. expected string
  186. }{
  187. {`cats and dogs `, `cats and dogs`},
  188. {` <div> <i> test </i> <b> test </b> </div> `, `<div> <i> test </i> <b> test </b> </div>`},
  189. {`<strong>x </strong>y`, `<strong>x </strong>y`},
  190. {`<strong>x </strong> y`, `<strong>x </strong> y`},
  191. {"<strong>x </strong>\ny", "<strong>x </strong>\ny"},
  192. {`<p>x </p>y`, `<p>x </p>y`},
  193. {`x <p>y</p>`, `x <p>y`},
  194. {` <!doctype html> <!--comment--> <html> <body><p></p></body></html> `, `<!doctype html><p>`}, // spaces before html and at the start of html are dropped
  195. {`<p>x<br> y`, `<p>x<br> y`},
  196. {`<p>x </b> <b> y`, `<p>x </b> <b> y`},
  197. {`a <code>code</code> b`, `a <code>code</code> b`},
  198. {`a <code></code> b`, `a <code></code> b`},
  199. {`a <script>script</script> b`, `a <script>script</script> b`},
  200. {"text\n<!--comment-->\ntext", "text\ntext"},
  201. {"text\n<!--comment-->text<!--comment--> text", "text\ntext text"},
  202. {"abc\n</body>\ndef", "abc\ndef"},
  203. {"<x>\n<!--y-->\n</x>", "<x>\n</x>"},
  204. {"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"},
  205. }
  206. m := minify.New()
  207. htmlMinifier := &Minifier{KeepWhitespace: true}
  208. for _, tt := range htmlTests {
  209. t.Run(tt.html, func(t *testing.T) {
  210. r := bytes.NewBufferString(tt.html)
  211. w := &bytes.Buffer{}
  212. err := htmlMinifier.Minify(m, w, r, nil)
  213. test.Minify(t, tt.html, err, w.String(), tt.expected)
  214. })
  215. }
  216. }
  217. func TestHTMLURL(t *testing.T) {
  218. htmlTests := []struct {
  219. url string
  220. html string
  221. expected string
  222. }{
  223. {`http://example.com/`, `<a href=http://example.com/>link</a>`, `<a href=//example.com/>link</a>`},
  224. {`https://example.com/`, `<a href=http://example.com/>link</a>`, `<a href=http://example.com/>link</a>`},
  225. {`http://example.com/`, `<a href=https://example.com/>link</a>`, `<a href=https://example.com/>link</a>`},
  226. {`https://example.com/`, `<a href=https://example.com/>link</a>`, `<a href=//example.com/>link</a>`},
  227. {`http://example.com/`, `<a href=" http://example.com ">x</a>`, `<a href=//example.com>x</a>`},
  228. {`http://example.com/`, `<link rel="stylesheet" type="text/css" href="http://example.com">`, `<link rel=stylesheet href=//example.com>`},
  229. {`http://example.com/`, `<!doctype html> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head profile="http://dublincore.org/documents/dcq-html/"> <!-- Barlesque 2.75.0 --> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />`,
  230. `<!doctype html><html xmlns=//www.w3.org/1999/xhtml xml:lang=en><head profile=//dublincore.org/documents/dcq-html/><meta charset=utf-8>`},
  231. {`http://example.com/`, `<html xmlns="http://www.w3.org/1999/xhtml"></html>`, `<html xmlns=//www.w3.org/1999/xhtml>`},
  232. {`https://example.com/`, `<html xmlns="http://www.w3.org/1999/xhtml"></html>`, `<html xmlns=http://www.w3.org/1999/xhtml>`},
  233. {`http://example.com/`, `<html xmlns="https://www.w3.org/1999/xhtml"></html>`, `<html xmlns=https://www.w3.org/1999/xhtml>`},
  234. {`https://example.com/`, `<html xmlns="https://www.w3.org/1999/xhtml"></html>`, `<html xmlns=//www.w3.org/1999/xhtml>`},
  235. }
  236. m := minify.New()
  237. m.AddFunc("text/html", Minify)
  238. for _, tt := range htmlTests {
  239. t.Run(tt.url, func(t *testing.T) {
  240. r := bytes.NewBufferString(tt.html)
  241. w := &bytes.Buffer{}
  242. m.URL, _ = url.Parse(tt.url)
  243. err := Minify(m, w, r, nil)
  244. test.Minify(t, tt.html, err, w.String(), tt.expected)
  245. })
  246. }
  247. }
  248. func TestSpecialTagClosing(t *testing.T) {
  249. m := minify.New()
  250. m.AddFunc("text/html", Minify)
  251. m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  252. b, err := ioutil.ReadAll(r)
  253. test.Error(t, err, nil)
  254. test.String(t, string(b), "</script>")
  255. _, err = w.Write(b)
  256. return err
  257. })
  258. html := `<style></script></style>`
  259. r := bytes.NewBufferString(html)
  260. w := &bytes.Buffer{}
  261. err := Minify(m, w, r, nil)
  262. test.Minify(t, html, err, w.String(), html)
  263. }
  264. func TestReaderErrors(t *testing.T) {
  265. r := test.NewErrorReader(0)
  266. w := &bytes.Buffer{}
  267. m := minify.New()
  268. err := Minify(m, w, r, nil)
  269. test.T(t, err, test.ErrPlain, "return error at first read")
  270. }
  271. func TestWriterErrors(t *testing.T) {
  272. errorTests := []struct {
  273. html string
  274. n []int
  275. }{
  276. {`<!doctype>`, []int{0}},
  277. {`text`, []int{0}},
  278. {`<foo attr=val>`, []int{0, 1, 2, 3, 4, 5}},
  279. {`</foo>`, []int{0}},
  280. {`<style>x</style>`, []int{2}},
  281. {`<textarea>x</textarea>`, []int{2}},
  282. {`<code>x</code>`, []int{2}},
  283. {`<pre>x</pre>`, []int{2}},
  284. {`<svg>x</svg>`, []int{0}},
  285. {`<math>x</math>`, []int{0}},
  286. {`<!--[if IE 6]> text <![endif]-->`, []int{0, 1, 2}},
  287. {`<![if IE 6]> text <![endif]>`, []int{0}},
  288. }
  289. m := minify.New()
  290. m.Add("text/html", &Minifier{
  291. KeepConditionalComments: true,
  292. })
  293. for _, tt := range errorTests {
  294. for _, n := range tt.n {
  295. t.Run(fmt.Sprint(tt.html, " ", tt.n), func(t *testing.T) {
  296. r := bytes.NewBufferString(tt.html)
  297. w := test.NewErrorWriter(n)
  298. err := m.Minify("text/html", w, r)
  299. test.T(t, err, test.ErrPlain)
  300. })
  301. }
  302. }
  303. }
  304. func TestMinifyErrors(t *testing.T) {
  305. errorTests := []struct {
  306. html string
  307. err error
  308. }{
  309. {`<style>abc</style>`, test.ErrPlain},
  310. {`<path style="abc"/>`, test.ErrPlain},
  311. {`<path onclick="abc"/>`, test.ErrPlain},
  312. {`<svg></svg>`, test.ErrPlain},
  313. {`<math></math>`, test.ErrPlain},
  314. }
  315. m := minify.New()
  316. m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  317. return test.ErrPlain
  318. })
  319. m.AddFunc("text/javascript", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  320. return test.ErrPlain
  321. })
  322. m.AddFunc("image/svg+xml", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  323. return test.ErrPlain
  324. })
  325. m.AddFunc("application/mathml+xml", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  326. return test.ErrPlain
  327. })
  328. for _, tt := range errorTests {
  329. t.Run(tt.html, func(t *testing.T) {
  330. r := bytes.NewBufferString(tt.html)
  331. w := &bytes.Buffer{}
  332. err := Minify(m, w, r, nil)
  333. test.T(t, err, tt.err)
  334. })
  335. }
  336. }
  337. ////////////////////////////////////////////////////////////////
  338. func ExampleMinify() {
  339. m := minify.New()
  340. m.AddFunc("text/html", Minify)
  341. m.AddFunc("text/css", css.Minify)
  342. m.AddFunc("text/javascript", js.Minify)
  343. m.AddFunc("image/svg+xml", svg.Minify)
  344. m.AddFuncRegexp(regexp.MustCompile("[/+]json$"), json.Minify)
  345. m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), xml.Minify)
  346. // set URL to minify link locations too
  347. m.URL, _ = url.Parse("https://www.example.com/")
  348. if err := m.Minify("text/html", os.Stdout, os.Stdin); err != nil {
  349. panic(err)
  350. }
  351. }
  352. func ExampleMinify_options() {
  353. m := minify.New()
  354. m.Add("text/html", &Minifier{
  355. KeepDefaultAttrVals: true,
  356. KeepWhitespace: true,
  357. })
  358. if err := m.Minify("text/html", os.Stdout, os.Stdin); err != nil {
  359. panic(err)
  360. }
  361. }
  362. func ExampleMinify_reader() {
  363. b := bytes.NewReader([]byte("<html><body><h1>Example</h1></body></html>"))
  364. m := minify.New()
  365. m.Add("text/html", &Minifier{})
  366. r := m.Reader("text/html", b)
  367. if _, err := io.Copy(os.Stdout, r); err != nil {
  368. panic(err)
  369. }
  370. // Output: <h1>Example</h1>
  371. }
  372. func ExampleMinify_writer() {
  373. m := minify.New()
  374. m.Add("text/html", &Minifier{})
  375. w := m.Writer("text/html", os.Stdout)
  376. w.Write([]byte("<html><body><h1>Example</h1></body></html>"))
  377. w.Close()
  378. // Output: <h1>Example</h1>
  379. }