html_test.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. package html // import "github.com/tdewolff/minify/html"
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "net/url"
  8. "os"
  9. "regexp"
  10. "testing"
  11. "github.com/tdewolff/minify"
  12. "github.com/tdewolff/minify/css"
  13. "github.com/tdewolff/minify/js"
  14. "github.com/tdewolff/minify/json"
  15. "github.com/tdewolff/minify/svg"
  16. "github.com/tdewolff/minify/xml"
  17. "github.com/tdewolff/test"
  18. )
  19. func TestHTML(t *testing.T) {
  20. htmlTests := []struct {
  21. html string
  22. expected string
  23. }{
  24. {`html`, `html`},
  25. {`<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">`, `<!doctype html>`},
  26. {`<!-- comment -->`, ``},
  27. {`<style><!--\ncss\n--></style>`, `<style><!--\ncss\n--></style>`},
  28. {`<style>&</style>`, `<style>&</style>`},
  29. {`<html><head></head><body>x</body></html>`, `x`},
  30. {`<meta http-equiv="content-type" content="text/html; charset=utf-8">`, `<meta charset=utf-8>`},
  31. {`<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />`, `<meta charset=utf-8>`},
  32. {`<meta name="keywords" content="a, b">`, `<meta name=keywords content=a,b>`},
  33. {`<meta name="viewport" content="width = 996" />`, `<meta name=viewport content="width=996">`},
  34. {`<span attr="test"></span>`, `<span attr=test></span>`},
  35. {`<span attr='test&apos;test'></span>`, `<span attr="test'test"></span>`},
  36. {`<span attr="test&quot;test"></span>`, `<span attr='test"test'></span>`},
  37. {`<span attr='test""&apos;&amp;test'></span>`, `<span attr='test""&#39;&amp;test'></span>`},
  38. {`<span attr="test/test"></span>`, `<span attr=test/test></span>`},
  39. {`<span>&amp;</span>`, `<span>&amp;</span>`},
  40. {`<span clear=none method=GET></span>`, `<span></span>`},
  41. {`<span onload="javascript:x;"></span>`, `<span onload=x;></span>`},
  42. {`<span selected="selected"></span>`, `<span selected></span>`},
  43. {`<noscript><html><img id="x"></noscript>`, `<noscript><img id=x></noscript>`},
  44. {`<body id="main"></body>`, `<body id=main>`},
  45. {`<link href="data:text/plain, data">`, `<link href=data:,+data>`},
  46. {`<svg width="100" height="100"><circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" /></svg>`, `<svg width="100" height="100"><circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" /></svg>`},
  47. {`</span >`, `</span>`},
  48. {`<meta name=viewport content="width=0.1, initial-scale=1.0 , maximum-scale=1000">`, `<meta name=viewport content="width=.1,initial-scale=1,maximum-scale=1e3">`},
  49. {`<br/>`, `<br>`},
  50. // increase coverage
  51. {`<script style="css">js</script>`, `<script style=css>js</script>`},
  52. {`<script type="application/javascript">js</script>`, `<script type=application/javascript>js</script>`},
  53. {`<meta http-equiv="content-type" content="text/plain, text/html">`, `<meta http-equiv=content-type content=text/plain,text/html>`},
  54. {`<meta http-equiv="content-style-type" content="text/less">`, `<meta http-equiv=content-style-type content=text/less>`},
  55. {`<meta http-equiv="content-style-type" content="text/less; charset=utf-8">`, `<meta http-equiv=content-style-type content="text/less;charset=utf-8">`},
  56. {`<meta http-equiv="content-script-type" content="application/js">`, `<meta http-equiv=content-script-type content=application/js>`},
  57. {`<span attr=""></span>`, `<span attr></span>`},
  58. {`<code>x</code>`, `<code>x</code>`},
  59. {`<p></p><p></p>`, `<p><p>`},
  60. {`<ul><li></li> <li></li></ul>`, `<ul><li><li></ul>`},
  61. {`<p></p><a></a>`, `<p></p><a></a>`},
  62. {`<p></p>x<a></a>`, `<p></p>x<a></a>`},
  63. {`<span style=>`, `<span>`},
  64. {`<button onclick=>`, `<button>`},
  65. // whitespace
  66. {`cats and dogs `, `cats and dogs`},
  67. {` <div> <i> test </i> <b> test </b> </div> `, `<div><i>test</i> <b>test</b></div>`},
  68. {`<strong>x </strong>y`, `<strong>x </strong>y`},
  69. {`<strong>x </strong> y`, `<strong>x</strong> y`},
  70. {"<strong>x </strong>\ny", "<strong>x</strong>\ny"},
  71. {`<p>x </p>y`, `<p>x</p>y`},
  72. {`x <p>y</p>`, `x<p>y`},
  73. {` <!doctype html> <!--comment--> <html> <body><p></p></body></html> `, `<!doctype html><p>`}, // spaces before html and at the start of html are dropped
  74. {`<p>x<br> y`, `<p>x<br>y`},
  75. {`<p>x </b> <b> y`, `<p>x</b> <b>y`},
  76. {`a <code></code> b`, `a <code></code>b`},
  77. {`a <code>code</code> b`, `a <code>code</code> b`},
  78. {`a <code> code </code> b`, `a <code>code</code> b`},
  79. {`a <script>script</script> b`, `a <script>script</script>b`},
  80. {"text\n<!--comment-->\ntext", "text\ntext"},
  81. {"abc\n</body>\ndef", "abc\ndef"},
  82. {"<x>\n<!--y-->\n</x>", "<x></x>"},
  83. {"a <template> b </template> c", "a <template>b</template>c"},
  84. // from HTML Minifier
  85. {`<DIV TITLE="blah">boo</DIV>`, `<div title=blah>boo</div>`},
  86. {"<p title\n\n\t =\n \"bar\">foo</p>", `<p title=bar>foo`},
  87. {`<p class=" foo ">foo bar baz</p>`, `<p class=foo>foo bar baz`},
  88. {`<input maxlength=" 5 ">`, `<input maxlength=5>`},
  89. {`<input type="text">`, `<input>`},
  90. {`<form method="get">`, `<form>`},
  91. {`<script language="Javascript">alert(1)</script>`, `<script>alert(1)</script>`},
  92. {`<script></script>`, ``},
  93. {`<p onclick=" JavaScript: x">x</p>`, `<p onclick=" x">x`},
  94. {`<span Selected="selected"></span>`, `<span selected></span>`},
  95. {`<table><thead><tr><th>foo</th><th>bar</th></tr></thead><tfoot><tr><th>baz</th><th>qux</th></tr></tfoot><tbody><tr><td>boo</td><td>moo</td></tr></tbody></table>`,
  96. `<table><thead><tr><th>foo<th>bar<tfoot><tr><th>baz<th>qux<tbody><tr><td>boo<td>moo</table>`},
  97. {`<select><option>foo</option><option>bar</option></select>`, `<select><option>foo<option>bar</select>`},
  98. {`<meta name="keywords" content="A, B">`, `<meta name=keywords content=A,B>`},
  99. {`<iframe><html> <p> x </p> </html></iframe>`, `<iframe><p>x</iframe>`},
  100. {`<math> &int;_a_^b^{f(x)<over>1+x} dx </math>`, `<math> &int;_a_^b^{f(x)<over>1+x} dx </math>`},
  101. {`<script language="x" charset="x" src="y"></script>`, `<script src=y></script>`},
  102. {`<style media="all">x</style>`, `<style>x</style>`},
  103. {`<a id="abc" name="abc">y</a>`, `<a id=abc>y</a>`},
  104. {`<a id="" value="">y</a>`, `<a value>y</a>`},
  105. // from Kangax html-minfier
  106. {`<span style="font-family:&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,Helvetica,Arial,sans-serif">text</span>`, `<span style='font-family:"Helvetica Neue","Helvetica",Helvetica,Arial,sans-serif'>text</span>`},
  107. // go-fuzz
  108. {`<meta e t n content=ful><a b`, `<meta e t n content=ful><a b>`},
  109. {`<img alt=a'b="">`, `<img alt='a&#39;b=""'>`},
  110. {`</b`, `</b`},
  111. {`<title></`, `<title></`},
  112. {`<svg <`, `<svg <`},
  113. {`<svg "`, `<svg "`},
  114. {`<svg></`, `<svg></`},
  115. {`<script><!--<`, `<script><!--<`},
  116. // bugs
  117. {`<p>text</p><br>text`, `<p>text</p><br>text`}, // #122
  118. {`text <img> text`, `text <img> text`}, // #89
  119. {`text <progress></progress> text`, `text <progress></progress> text`}, // #89
  120. {`<pre> <x> a b </x> </pre>`, `<pre> <x> a b </x> </pre>`}, // #82
  121. {`<svg id="1"></svg>`, `<svg id="1"></svg>`}, // #67
  122. }
  123. m := minify.New()
  124. m.AddFunc("text/html", Minify)
  125. m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  126. _, err := io.Copy(w, r)
  127. return err
  128. })
  129. m.AddFunc("text/javascript", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  130. _, err := io.Copy(w, r)
  131. return err
  132. })
  133. for _, tt := range htmlTests {
  134. t.Run(tt.html, func(t *testing.T) {
  135. r := bytes.NewBufferString(tt.html)
  136. w := &bytes.Buffer{}
  137. err := Minify(m, w, r, nil)
  138. test.Minify(t, tt.html, err, w.String(), tt.expected)
  139. })
  140. }
  141. }
  142. func TestHTMLKeepEndTags(t *testing.T) {
  143. htmlTests := []struct {
  144. html string
  145. expected string
  146. }{
  147. {`<p></p><p></p>`, `<p></p><p></p>`},
  148. {`<ul><li></li><li></li></ul>`, `<ul><li></li><li></li></ul>`},
  149. }
  150. m := minify.New()
  151. htmlMinifier := &Minifier{KeepEndTags: true}
  152. for _, tt := range htmlTests {
  153. t.Run(tt.html, func(t *testing.T) {
  154. r := bytes.NewBufferString(tt.html)
  155. w := &bytes.Buffer{}
  156. err := htmlMinifier.Minify(m, w, r, nil)
  157. test.Minify(t, tt.html, err, w.String(), tt.expected)
  158. })
  159. }
  160. }
  161. func TestHTMLKeepConditionalComments(t *testing.T) {
  162. htmlTests := []struct {
  163. html string
  164. expected string
  165. }{
  166. {`<!--[if IE 6]> <b> </b> <![endif]-->`, `<!--[if IE 6]><b></b><![endif]-->`},
  167. {`<![if IE 6]> <b> </b> <![endif]>`, `<![if IE 6]><b></b><![endif]>`},
  168. }
  169. m := minify.New()
  170. htmlMinifier := &Minifier{KeepConditionalComments: true}
  171. for _, tt := range htmlTests {
  172. t.Run(tt.html, func(t *testing.T) {
  173. r := bytes.NewBufferString(tt.html)
  174. w := &bytes.Buffer{}
  175. err := htmlMinifier.Minify(m, w, r, nil)
  176. test.Minify(t, tt.html, err, w.String(), tt.expected)
  177. })
  178. }
  179. }
  180. func TestHTMLKeepWhitespace(t *testing.T) {
  181. htmlTests := []struct {
  182. html string
  183. expected string
  184. }{
  185. {`cats and dogs `, `cats and dogs`},
  186. {` <div> <i> test </i> <b> test </b> </div> `, `<div> <i> test </i> <b> test </b> </div>`},
  187. {`<strong>x </strong>y`, `<strong>x </strong>y`},
  188. {`<strong>x </strong> y`, `<strong>x </strong> y`},
  189. {"<strong>x </strong>\ny", "<strong>x </strong>\ny"},
  190. {`<p>x </p>y`, `<p>x </p>y`},
  191. {`x <p>y</p>`, `x <p>y`},
  192. {` <!doctype html> <!--comment--> <html> <body><p></p></body></html> `, `<!doctype html><p>`}, // spaces before html and at the start of html are dropped
  193. {`<p>x<br> y`, `<p>x<br> y`},
  194. {`<p>x </b> <b> y`, `<p>x </b> <b> y`},
  195. {`a <code>code</code> b`, `a <code>code</code> b`},
  196. {`a <code></code> b`, `a <code></code> b`},
  197. {`a <script>script</script> b`, `a <script>script</script> b`},
  198. {"text\n<!--comment-->\ntext", "text\ntext"},
  199. {"text\n<!--comment-->text<!--comment--> text", "text\ntext text"},
  200. {"abc\n</body>\ndef", "abc\ndef"},
  201. {"<x>\n<!--y-->\n</x>", "<x>\n</x>"},
  202. {"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"},
  203. }
  204. m := minify.New()
  205. htmlMinifier := &Minifier{KeepWhitespace: true}
  206. for _, tt := range htmlTests {
  207. t.Run(tt.html, func(t *testing.T) {
  208. r := bytes.NewBufferString(tt.html)
  209. w := &bytes.Buffer{}
  210. err := htmlMinifier.Minify(m, w, r, nil)
  211. test.Minify(t, tt.html, err, w.String(), tt.expected)
  212. })
  213. }
  214. }
  215. func TestHTMLURL(t *testing.T) {
  216. htmlTests := []struct {
  217. url string
  218. html string
  219. expected string
  220. }{
  221. {`http://example.com/`, `<a href=http://example.com/>link</a>`, `<a href=//example.com/>link</a>`},
  222. {`https://example.com/`, `<a href=http://example.com/>link</a>`, `<a href=http://example.com/>link</a>`},
  223. {`http://example.com/`, `<a href=https://example.com/>link</a>`, `<a href=https://example.com/>link</a>`},
  224. {`https://example.com/`, `<a href=https://example.com/>link</a>`, `<a href=//example.com/>link</a>`},
  225. {`http://example.com/`, `<a href=" http://example.com ">x</a>`, `<a href=//example.com>x</a>`},
  226. {`http://example.com/`, `<link rel="stylesheet" type="text/css" href="http://example.com">`, `<link rel=stylesheet href=//example.com>`},
  227. {`http://example.com/`, `<!doctype html> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head profile="http://dublincore.org/documents/dcq-html/"> <!-- Barlesque 2.75.0 --> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />`,
  228. `<!doctype html><html xmlns=//www.w3.org/1999/xhtml xml:lang=en><head profile=//dublincore.org/documents/dcq-html/><meta charset=utf-8>`},
  229. {`http://example.com/`, `<html xmlns="http://www.w3.org/1999/xhtml"></html>`, `<html xmlns=//www.w3.org/1999/xhtml>`},
  230. {`https://example.com/`, `<html xmlns="http://www.w3.org/1999/xhtml"></html>`, `<html xmlns=http://www.w3.org/1999/xhtml>`},
  231. {`http://example.com/`, `<html xmlns="https://www.w3.org/1999/xhtml"></html>`, `<html xmlns=https://www.w3.org/1999/xhtml>`},
  232. {`https://example.com/`, `<html xmlns="https://www.w3.org/1999/xhtml"></html>`, `<html xmlns=//www.w3.org/1999/xhtml>`},
  233. }
  234. m := minify.New()
  235. m.AddFunc("text/html", Minify)
  236. for _, tt := range htmlTests {
  237. t.Run(tt.url, func(t *testing.T) {
  238. r := bytes.NewBufferString(tt.html)
  239. w := &bytes.Buffer{}
  240. m.URL, _ = url.Parse(tt.url)
  241. err := Minify(m, w, r, nil)
  242. test.Minify(t, tt.html, err, w.String(), tt.expected)
  243. })
  244. }
  245. }
  246. func TestSpecialTagClosing(t *testing.T) {
  247. m := minify.New()
  248. m.AddFunc("text/html", Minify)
  249. m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  250. b, err := ioutil.ReadAll(r)
  251. test.Error(t, err, nil)
  252. test.String(t, string(b), "</script>")
  253. _, err = w.Write(b)
  254. return err
  255. })
  256. html := `<style></script></style>`
  257. r := bytes.NewBufferString(html)
  258. w := &bytes.Buffer{}
  259. err := Minify(m, w, r, nil)
  260. test.Minify(t, html, err, w.String(), html)
  261. }
  262. func TestReaderErrors(t *testing.T) {
  263. r := test.NewErrorReader(0)
  264. w := &bytes.Buffer{}
  265. m := minify.New()
  266. err := Minify(m, w, r, nil)
  267. test.T(t, err, test.ErrPlain, "return error at first read")
  268. }
  269. func TestWriterErrors(t *testing.T) {
  270. errorTests := []struct {
  271. html string
  272. n []int
  273. }{
  274. {`<!doctype>`, []int{0}},
  275. {`text`, []int{0}},
  276. {`<foo attr=val>`, []int{0, 1, 2, 3, 4, 5}},
  277. {`</foo>`, []int{0}},
  278. {`<style>x</style>`, []int{2}},
  279. {`<textarea>x</textarea>`, []int{2}},
  280. {`<code>x</code>`, []int{2}},
  281. {`<pre>x</pre>`, []int{2}},
  282. {`<svg>x</svg>`, []int{0}},
  283. {`<math>x</math>`, []int{0}},
  284. {`<!--[if IE 6]> text <![endif]-->`, []int{0, 1, 2}},
  285. {`<![if IE 6]> text <![endif]>`, []int{0}},
  286. }
  287. m := minify.New()
  288. m.Add("text/html", &Minifier{
  289. KeepConditionalComments: true,
  290. })
  291. for _, tt := range errorTests {
  292. for _, n := range tt.n {
  293. t.Run(fmt.Sprint(tt.html, " ", tt.n), func(t *testing.T) {
  294. r := bytes.NewBufferString(tt.html)
  295. w := test.NewErrorWriter(n)
  296. err := m.Minify("text/html", w, r)
  297. test.T(t, err, test.ErrPlain)
  298. })
  299. }
  300. }
  301. }
  302. func TestMinifyErrors(t *testing.T) {
  303. errorTests := []struct {
  304. html string
  305. err error
  306. }{
  307. {`<style>abc</style>`, test.ErrPlain},
  308. {`<path style="abc"/>`, test.ErrPlain},
  309. {`<path onclick="abc"/>`, test.ErrPlain},
  310. {`<svg></svg>`, test.ErrPlain},
  311. {`<math></math>`, test.ErrPlain},
  312. }
  313. m := minify.New()
  314. m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  315. return test.ErrPlain
  316. })
  317. m.AddFunc("text/javascript", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  318. return test.ErrPlain
  319. })
  320. m.AddFunc("image/svg+xml", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  321. return test.ErrPlain
  322. })
  323. m.AddFunc("application/mathml+xml", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  324. return test.ErrPlain
  325. })
  326. for _, tt := range errorTests {
  327. t.Run(tt.html, func(t *testing.T) {
  328. r := bytes.NewBufferString(tt.html)
  329. w := &bytes.Buffer{}
  330. err := Minify(m, w, r, nil)
  331. test.T(t, err, tt.err)
  332. })
  333. }
  334. }
  335. ////////////////////////////////////////////////////////////////
  336. func ExampleMinify() {
  337. m := minify.New()
  338. m.AddFunc("text/html", Minify)
  339. m.AddFunc("text/css", css.Minify)
  340. m.AddFunc("text/javascript", js.Minify)
  341. m.AddFunc("image/svg+xml", svg.Minify)
  342. m.AddFuncRegexp(regexp.MustCompile("[/+]json$"), json.Minify)
  343. m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), xml.Minify)
  344. // set URL to minify link locations too
  345. m.URL, _ = url.Parse("https://www.example.com/")
  346. if err := m.Minify("text/html", os.Stdout, os.Stdin); err != nil {
  347. panic(err)
  348. }
  349. }
  350. func ExampleMinify_options() {
  351. m := minify.New()
  352. m.Add("text/html", &Minifier{
  353. KeepDefaultAttrVals: true,
  354. KeepWhitespace: true,
  355. })
  356. if err := m.Minify("text/html", os.Stdout, os.Stdin); err != nil {
  357. panic(err)
  358. }
  359. }
  360. func ExampleMinify_reader() {
  361. b := bytes.NewReader([]byte("<html><body><h1>Example</h1></body></html>"))
  362. m := minify.New()
  363. m.Add("text/html", &Minifier{})
  364. r := m.Reader("text/html", b)
  365. if _, err := io.Copy(os.Stdout, r); err != nil {
  366. panic(err)
  367. }
  368. // Output: <h1>Example</h1>
  369. }
  370. func ExampleMinify_writer() {
  371. m := minify.New()
  372. m.Add("text/html", &Minifier{})
  373. w := m.Writer("text/html", os.Stdout)
  374. w.Write([]byte("<html><body><h1>Example</h1></body></html>"))
  375. w.Close()
  376. // Output: <h1>Example</h1>
  377. }