common.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. // Package parse contains a collection of parsers for various formats in its subpackages.
  2. package parse // import "github.com/tdewolff/parse"
  3. import (
  4. "bytes"
  5. "encoding/base64"
  6. "errors"
  7. "net/url"
  8. )
  9. // ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short.
  10. var ErrBadDataURI = errors.New("not a data URI")
  11. // Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?.
  12. func Number(b []byte) int {
  13. if len(b) == 0 {
  14. return 0
  15. }
  16. i := 0
  17. if b[i] == '+' || b[i] == '-' {
  18. i++
  19. if i >= len(b) {
  20. return 0
  21. }
  22. }
  23. firstDigit := (b[i] >= '0' && b[i] <= '9')
  24. if firstDigit {
  25. i++
  26. for i < len(b) && b[i] >= '0' && b[i] <= '9' {
  27. i++
  28. }
  29. }
  30. if i < len(b) && b[i] == '.' {
  31. i++
  32. if i < len(b) && b[i] >= '0' && b[i] <= '9' {
  33. i++
  34. for i < len(b) && b[i] >= '0' && b[i] <= '9' {
  35. i++
  36. }
  37. } else if firstDigit {
  38. // . could belong to the next token
  39. i--
  40. return i
  41. } else {
  42. return 0
  43. }
  44. } else if !firstDigit {
  45. return 0
  46. }
  47. iOld := i
  48. if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
  49. i++
  50. if i < len(b) && (b[i] == '+' || b[i] == '-') {
  51. i++
  52. }
  53. if i >= len(b) || b[i] < '0' || b[i] > '9' {
  54. // e could belong to next token
  55. return iOld
  56. }
  57. for i < len(b) && b[i] >= '0' && b[i] <= '9' {
  58. i++
  59. }
  60. }
  61. return i
  62. }
  63. // Dimension parses a byte-slice and returns the length of the number and its unit.
  64. func Dimension(b []byte) (int, int) {
  65. num := Number(b)
  66. if num == 0 || num == len(b) {
  67. return num, 0
  68. } else if b[num] == '%' {
  69. return num, 1
  70. } else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' {
  71. i := num + 1
  72. for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') {
  73. i++
  74. }
  75. return num, i - num
  76. }
  77. return num, 0
  78. }
  79. // Mediatype parses a given mediatype and splits the mimetype from the parameters.
  80. // It works similar to mime.ParseMediaType but is faster.
  81. func Mediatype(b []byte) ([]byte, map[string]string) {
  82. i := 0
  83. for i < len(b) && b[i] == ' ' {
  84. i++
  85. }
  86. b = b[i:]
  87. n := len(b)
  88. mimetype := b
  89. var params map[string]string
  90. for i := 3; i < n; i++ { // mimetype is at least three characters long
  91. if b[i] == ';' || b[i] == ' ' {
  92. mimetype = b[:i]
  93. if b[i] == ' ' {
  94. i++
  95. for i < n && b[i] == ' ' {
  96. i++
  97. }
  98. if i < n && b[i] != ';' {
  99. break
  100. }
  101. }
  102. params = map[string]string{}
  103. s := string(b)
  104. PARAM:
  105. i++
  106. for i < n && s[i] == ' ' {
  107. i++
  108. }
  109. start := i
  110. for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' {
  111. i++
  112. }
  113. key := s[start:i]
  114. for i < n && s[i] == ' ' {
  115. i++
  116. }
  117. if i < n && s[i] == '=' {
  118. i++
  119. for i < n && s[i] == ' ' {
  120. i++
  121. }
  122. start = i
  123. for i < n && s[i] != ';' && s[i] != ' ' {
  124. i++
  125. }
  126. } else {
  127. start = i
  128. }
  129. params[key] = s[start:i]
  130. for i < n && s[i] == ' ' {
  131. i++
  132. }
  133. if i < n && s[i] == ';' {
  134. goto PARAM
  135. }
  136. break
  137. }
  138. }
  139. return mimetype, params
  140. }
  141. // DataURI parses the given data URI and returns the mediatype, data and ok.
  142. func DataURI(dataURI []byte) ([]byte, []byte, error) {
  143. if len(dataURI) > 5 && bytes.Equal(dataURI[:5], []byte("data:")) {
  144. dataURI = dataURI[5:]
  145. inBase64 := false
  146. var mediatype []byte
  147. i := 0
  148. for j := 0; j < len(dataURI); j++ {
  149. c := dataURI[j]
  150. if c == '=' || c == ';' || c == ',' {
  151. if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), []byte("base64")) {
  152. if len(mediatype) > 0 {
  153. mediatype = mediatype[:len(mediatype)-1]
  154. }
  155. inBase64 = true
  156. i = j
  157. } else if c != ',' {
  158. mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c)
  159. i = j + 1
  160. } else {
  161. mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...)
  162. }
  163. if c == ',' {
  164. if len(mediatype) == 0 || mediatype[0] == ';' {
  165. mediatype = []byte("text/plain")
  166. }
  167. data := dataURI[j+1:]
  168. if inBase64 {
  169. decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data)))
  170. n, err := base64.StdEncoding.Decode(decoded, data)
  171. if err != nil {
  172. return nil, nil, err
  173. }
  174. data = decoded[:n]
  175. } else if unescaped, err := url.QueryUnescape(string(data)); err == nil {
  176. data = []byte(unescaped)
  177. }
  178. return mediatype, data, nil
  179. }
  180. }
  181. }
  182. }
  183. return nil, nil, ErrBadDataURI
  184. }
  185. // QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
  186. func QuoteEntity(b []byte) (quote byte, n int) {
  187. if len(b) < 5 || b[0] != '&' {
  188. return 0, 0
  189. }
  190. if b[1] == '#' {
  191. if b[2] == 'x' {
  192. i := 3
  193. for i < len(b) && b[i] == '0' {
  194. i++
  195. }
  196. if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' {
  197. if b[i+1] == '2' {
  198. return '"', i + 3 // &#x22;
  199. } else if b[i+1] == '7' {
  200. return '\'', i + 3 // &#x27;
  201. }
  202. }
  203. } else {
  204. i := 2
  205. for i < len(b) && b[i] == '0' {
  206. i++
  207. }
  208. if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' {
  209. if b[i+1] == '4' {
  210. return '"', i + 3 // &#34;
  211. } else if b[i+1] == '9' {
  212. return '\'', i + 3 // &#39;
  213. }
  214. }
  215. }
  216. } else if len(b) >= 6 && b[5] == ';' {
  217. if EqualFold(b[1:5], []byte{'q', 'u', 'o', 't'}) {
  218. return '"', 6 // &quot;
  219. } else if EqualFold(b[1:5], []byte{'a', 'p', 'o', 's'}) {
  220. return '\'', 6 // &apos;
  221. }
  222. }
  223. return 0, 0
  224. }