xml.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. // Package xml minifies XML1.0 following the specifications at http://www.w3.org/TR/xml/.
  2. package xml // import "github.com/tdewolff/minify/xml"
  3. import (
  4. "io"
  5. "github.com/tdewolff/minify"
  6. "github.com/tdewolff/parse"
  7. "github.com/tdewolff/parse/xml"
  8. )
  9. var (
  10. isBytes = []byte("=")
  11. spaceBytes = []byte(" ")
  12. voidBytes = []byte("/>")
  13. )
  14. ////////////////////////////////////////////////////////////////
  15. // DefaultMinifier is the default minifier.
  16. var DefaultMinifier = &Minifier{}
  17. // Minifier is an XML minifier.
  18. type Minifier struct {
  19. KeepWhitespace bool
  20. }
  21. // Minify minifies XML data, it reads from r and writes to w.
  22. func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
  23. return DefaultMinifier.Minify(m, w, r, params)
  24. }
  25. // Minify minifies XML data, it reads from r and writes to w.
  26. func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  27. omitSpace := true // on true the next text token must not start with a space
  28. attrByteBuffer := make([]byte, 0, 64)
  29. l := xml.NewLexer(r)
  30. defer l.Restore()
  31. tb := NewTokenBuffer(l)
  32. for {
  33. t := *tb.Shift()
  34. if t.TokenType == xml.CDATAToken {
  35. if len(t.Text) == 0 {
  36. continue
  37. }
  38. if text, useText := xml.EscapeCDATAVal(&attrByteBuffer, t.Text); useText {
  39. t.TokenType = xml.TextToken
  40. t.Data = text
  41. }
  42. }
  43. switch t.TokenType {
  44. case xml.ErrorToken:
  45. if l.Err() == io.EOF {
  46. return nil
  47. }
  48. return l.Err()
  49. case xml.DOCTYPEToken:
  50. if _, err := w.Write(t.Data); err != nil {
  51. return err
  52. }
  53. case xml.CDATAToken:
  54. if _, err := w.Write(t.Data); err != nil {
  55. return err
  56. }
  57. if len(t.Text) > 0 && parse.IsWhitespace(t.Text[len(t.Text)-1]) {
  58. omitSpace = true
  59. }
  60. case xml.TextToken:
  61. t.Data = parse.ReplaceMultipleWhitespace(t.Data)
  62. // whitespace removal; trim left
  63. if omitSpace && (t.Data[0] == ' ' || t.Data[0] == '\n') {
  64. t.Data = t.Data[1:]
  65. }
  66. // whitespace removal; trim right
  67. omitSpace = false
  68. if len(t.Data) == 0 {
  69. omitSpace = true
  70. } else if t.Data[len(t.Data)-1] == ' ' || t.Data[len(t.Data)-1] == '\n' {
  71. omitSpace = true
  72. i := 0
  73. for {
  74. next := tb.Peek(i)
  75. // trim if EOF, text token with whitespace begin or block token
  76. if next.TokenType == xml.ErrorToken {
  77. t.Data = t.Data[:len(t.Data)-1]
  78. omitSpace = false
  79. break
  80. } else if next.TokenType == xml.TextToken {
  81. // this only happens when a comment, doctype, cdata startpi tag was in between
  82. // remove if the text token starts with a whitespace
  83. if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
  84. t.Data = t.Data[:len(t.Data)-1]
  85. omitSpace = false
  86. }
  87. break
  88. } else if next.TokenType == xml.CDATAToken {
  89. if len(next.Text) > 0 && parse.IsWhitespace(next.Text[0]) {
  90. t.Data = t.Data[:len(t.Data)-1]
  91. omitSpace = false
  92. }
  93. break
  94. } else if next.TokenType == xml.StartTagToken || next.TokenType == xml.EndTagToken {
  95. if !o.KeepWhitespace {
  96. t.Data = t.Data[:len(t.Data)-1]
  97. omitSpace = false
  98. }
  99. break
  100. }
  101. i++
  102. }
  103. }
  104. if _, err := w.Write(t.Data); err != nil {
  105. return err
  106. }
  107. case xml.StartTagToken:
  108. if o.KeepWhitespace {
  109. omitSpace = false
  110. }
  111. if _, err := w.Write(t.Data); err != nil {
  112. return err
  113. }
  114. case xml.StartTagPIToken:
  115. if _, err := w.Write(t.Data); err != nil {
  116. return err
  117. }
  118. case xml.AttributeToken:
  119. if _, err := w.Write(spaceBytes); err != nil {
  120. return err
  121. }
  122. if _, err := w.Write(t.Text); err != nil {
  123. return err
  124. }
  125. if _, err := w.Write(isBytes); err != nil {
  126. return err
  127. }
  128. if len(t.AttrVal) < 2 {
  129. if _, err := w.Write(t.AttrVal); err != nil {
  130. return err
  131. }
  132. } else {
  133. // prefer single or double quotes depending on what occurs more often in value
  134. val := xml.EscapeAttrVal(&attrByteBuffer, t.AttrVal[1:len(t.AttrVal)-1])
  135. if _, err := w.Write(val); err != nil {
  136. return err
  137. }
  138. }
  139. case xml.StartTagCloseToken:
  140. next := tb.Peek(0)
  141. skipExtra := false
  142. if next.TokenType == xml.TextToken && parse.IsAllWhitespace(next.Data) {
  143. next = tb.Peek(1)
  144. skipExtra = true
  145. }
  146. if next.TokenType == xml.EndTagToken {
  147. // collapse empty tags to single void tag
  148. tb.Shift()
  149. if skipExtra {
  150. tb.Shift()
  151. }
  152. if _, err := w.Write(voidBytes); err != nil {
  153. return err
  154. }
  155. } else {
  156. if _, err := w.Write(t.Text); err != nil {
  157. return err
  158. }
  159. }
  160. case xml.StartTagCloseVoidToken:
  161. if _, err := w.Write(t.Text); err != nil {
  162. return err
  163. }
  164. case xml.StartTagClosePIToken:
  165. if _, err := w.Write(t.Text); err != nil {
  166. return err
  167. }
  168. case xml.EndTagToken:
  169. if o.KeepWhitespace {
  170. omitSpace = false
  171. }
  172. if len(t.Data) > 3+len(t.Text) {
  173. t.Data[2+len(t.Text)] = '>'
  174. t.Data = t.Data[:3+len(t.Text)]
  175. }
  176. if _, err := w.Write(t.Data); err != nil {
  177. return err
  178. }
  179. }
  180. }
  181. }