common.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. package minify // import "github.com/tdewolff/minify"
  2. import (
  3. "bytes"
  4. "encoding/base64"
  5. "net/url"
  6. "github.com/tdewolff/parse"
  7. "github.com/tdewolff/parse/strconv"
  8. )
  9. // Epsilon is the closest number to zero that is not considered to be zero.
  10. var Epsilon = 0.00001
  11. // Mediatype minifies a given mediatype by removing all whitespace.
  12. func Mediatype(b []byte) []byte {
  13. j := 0
  14. start := 0
  15. inString := false
  16. for i, c := range b {
  17. if !inString && parse.IsWhitespace(c) {
  18. if start != 0 {
  19. j += copy(b[j:], b[start:i])
  20. } else {
  21. j += i
  22. }
  23. start = i + 1
  24. } else if c == '"' {
  25. inString = !inString
  26. }
  27. }
  28. if start != 0 {
  29. j += copy(b[j:], b[start:])
  30. return parse.ToLower(b[:j])
  31. }
  32. return parse.ToLower(b)
  33. }
  34. // DataURI minifies a data URI and calls a minifier by the specified mediatype. Specifications: https://www.ietf.org/rfc/rfc2397.txt.
  35. func DataURI(m *M, dataURI []byte) []byte {
  36. if mediatype, data, err := parse.DataURI(dataURI); err == nil {
  37. dataURI, _ = m.Bytes(string(mediatype), data)
  38. base64Len := len(";base64") + base64.StdEncoding.EncodedLen(len(dataURI))
  39. asciiLen := len(dataURI)
  40. for _, c := range dataURI {
  41. if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '-' || c == '_' || c == '.' || c == '~' || c == ' ' {
  42. asciiLen++
  43. } else {
  44. asciiLen += 2
  45. }
  46. if asciiLen > base64Len {
  47. break
  48. }
  49. }
  50. if asciiLen > base64Len {
  51. encoded := make([]byte, base64Len-len(";base64"))
  52. base64.StdEncoding.Encode(encoded, dataURI)
  53. dataURI = encoded
  54. mediatype = append(mediatype, []byte(";base64")...)
  55. } else {
  56. dataURI = []byte(url.QueryEscape(string(dataURI)))
  57. dataURI = bytes.Replace(dataURI, []byte("\""), []byte("\\\""), -1)
  58. }
  59. if len("text/plain") <= len(mediatype) && parse.EqualFold(mediatype[:len("text/plain")], []byte("text/plain")) {
  60. mediatype = mediatype[len("text/plain"):]
  61. }
  62. for i := 0; i+len(";charset=us-ascii") <= len(mediatype); i++ {
  63. // must start with semicolon and be followed by end of mediatype or semicolon
  64. if mediatype[i] == ';' && parse.EqualFold(mediatype[i+1:i+len(";charset=us-ascii")], []byte("charset=us-ascii")) && (i+len(";charset=us-ascii") >= len(mediatype) || mediatype[i+len(";charset=us-ascii")] == ';') {
  65. mediatype = append(mediatype[:i], mediatype[i+len(";charset=us-ascii"):]...)
  66. break
  67. }
  68. }
  69. dataURI = append(append(append([]byte("data:"), mediatype...), ','), dataURI...)
  70. }
  71. return dataURI
  72. }
  73. const MaxInt = int(^uint(0) >> 1)
  74. const MinInt = -MaxInt - 1
  75. // Decimal minifies a given byte slice containing a number (see parse.Number) and removes superfluous characters.
  76. // It does not parse or output exponents.
  77. func Decimal(num []byte, prec int) []byte {
  78. // omit first + and register mantissa start and end, whether it's negative and the exponent
  79. neg := false
  80. start := 0
  81. dot := -1
  82. end := len(num)
  83. if 0 < end && (num[0] == '+' || num[0] == '-') {
  84. if num[0] == '-' {
  85. neg = true
  86. }
  87. start++
  88. }
  89. for i, c := range num[start:] {
  90. if c == '.' {
  91. dot = start + i
  92. break
  93. }
  94. }
  95. if dot == -1 {
  96. dot = end
  97. }
  98. // trim leading zeros but leave at least one digit
  99. for start < end-1 && num[start] == '0' {
  100. start++
  101. }
  102. // trim trailing zeros
  103. i := end - 1
  104. for ; i > dot; i-- {
  105. if num[i] != '0' {
  106. end = i + 1
  107. break
  108. }
  109. }
  110. if i == dot {
  111. end = dot
  112. if start == end {
  113. num[start] = '0'
  114. return num[start : start+1]
  115. }
  116. } else if start == end-1 && num[start] == '0' {
  117. return num[start:end]
  118. }
  119. // apply precision
  120. if prec > -1 && dot+1+prec < end {
  121. end = dot + 1 + prec
  122. inc := num[end] >= '5'
  123. if inc || num[end-1] == '0' {
  124. for i := end - 1; i > start; i-- {
  125. if i == dot {
  126. end--
  127. } else if inc {
  128. if num[i] == '9' {
  129. if i > dot {
  130. end--
  131. } else {
  132. num[i] = '0'
  133. }
  134. } else {
  135. num[i]++
  136. inc = false
  137. break
  138. }
  139. } else if i > dot && num[i] == '0' {
  140. end--
  141. }
  142. }
  143. }
  144. if dot == start && end == start+1 {
  145. if inc {
  146. num[start] = '1'
  147. } else {
  148. num[start] = '0'
  149. }
  150. } else {
  151. if dot+1 == end {
  152. end--
  153. }
  154. if inc {
  155. if num[start] == '9' {
  156. num[start] = '0'
  157. copy(num[start+1:], num[start:end])
  158. end++
  159. num[start] = '1'
  160. } else {
  161. num[start]++
  162. }
  163. }
  164. }
  165. }
  166. if neg {
  167. start--
  168. num[start] = '-'
  169. }
  170. return num[start:end]
  171. }
  172. // Number minifies a given byte slice containing a number (see parse.Number) and removes superfluous characters.
  173. func Number(num []byte, prec int) []byte {
  174. // omit first + and register mantissa start and end, whether it's negative and the exponent
  175. neg := false
  176. start := 0
  177. dot := -1
  178. end := len(num)
  179. origExp := 0
  180. if 0 < end && (num[0] == '+' || num[0] == '-') {
  181. if num[0] == '-' {
  182. neg = true
  183. }
  184. start++
  185. }
  186. for i, c := range num[start:] {
  187. if c == '.' {
  188. dot = start + i
  189. } else if c == 'e' || c == 'E' {
  190. end = start + i
  191. i += start + 1
  192. if i < len(num) && num[i] == '+' {
  193. i++
  194. }
  195. if tmpOrigExp, n := strconv.ParseInt(num[i:]); n > 0 && tmpOrigExp >= int64(MinInt) && tmpOrigExp <= int64(MaxInt) {
  196. // range checks for when int is 32 bit
  197. origExp = int(tmpOrigExp)
  198. } else {
  199. return num
  200. }
  201. break
  202. }
  203. }
  204. if dot == -1 {
  205. dot = end
  206. }
  207. // trim leading zeros but leave at least one digit
  208. for start < end-1 && num[start] == '0' {
  209. start++
  210. }
  211. // trim trailing zeros
  212. i := end - 1
  213. for ; i > dot; i-- {
  214. if num[i] != '0' {
  215. end = i + 1
  216. break
  217. }
  218. }
  219. if i == dot {
  220. end = dot
  221. if start == end {
  222. num[start] = '0'
  223. return num[start : start+1]
  224. }
  225. } else if start == end-1 && num[start] == '0' {
  226. return num[start:end]
  227. }
  228. // n is the number of significant digits
  229. // normExp would be the exponent if it were normalised (0.1 <= f < 1)
  230. n := 0
  231. normExp := 0
  232. if dot == start {
  233. for i = dot + 1; i < end; i++ {
  234. if num[i] != '0' {
  235. n = end - i
  236. normExp = dot - i + 1
  237. break
  238. }
  239. }
  240. } else if dot == end {
  241. normExp = end - start
  242. for i = end - 1; i >= start; i-- {
  243. if num[i] != '0' {
  244. n = i + 1 - start
  245. end = i + 1
  246. break
  247. }
  248. }
  249. } else {
  250. n = end - start - 1
  251. normExp = dot - start
  252. }
  253. if origExp < 0 && (normExp < MinInt-origExp || normExp-n < MinInt-origExp) || origExp > 0 && (normExp > MaxInt-origExp || normExp-n > MaxInt-origExp) {
  254. return num
  255. }
  256. normExp += origExp
  257. // intExp would be the exponent if it were an integer
  258. intExp := normExp - n
  259. lenIntExp := 1
  260. if intExp <= -10 || intExp >= 10 {
  261. lenIntExp = strconv.LenInt(int64(intExp))
  262. }
  263. // there are three cases to consider when printing the number
  264. // case 1: without decimals and with an exponent (large numbers)
  265. // case 2: with decimals and without an exponent (around zero)
  266. // case 3: without decimals and with a negative exponent (small numbers)
  267. if normExp >= n {
  268. // case 1
  269. if dot < end {
  270. if dot == start {
  271. start = end - n
  272. } else {
  273. // TODO: copy the other part if shorter?
  274. copy(num[dot:], num[dot+1:end])
  275. end--
  276. }
  277. }
  278. if normExp >= n+3 {
  279. num[end] = 'e'
  280. end++
  281. for i := end + lenIntExp - 1; i >= end; i-- {
  282. num[i] = byte(intExp%10) + '0'
  283. intExp /= 10
  284. }
  285. end += lenIntExp
  286. } else if normExp == n+2 {
  287. num[end] = '0'
  288. num[end+1] = '0'
  289. end += 2
  290. } else if normExp == n+1 {
  291. num[end] = '0'
  292. end++
  293. }
  294. } else if normExp >= -lenIntExp-1 {
  295. // case 2
  296. zeroes := -normExp
  297. newDot := 0
  298. if zeroes > 0 {
  299. // dot placed at the front and add zeroes
  300. newDot = end - n - zeroes - 1
  301. if newDot != dot {
  302. d := start - newDot
  303. if d > 0 {
  304. if dot < end {
  305. // copy original digits behind the dot backwards
  306. copy(num[dot+1+d:], num[dot+1:end])
  307. if dot > start {
  308. // copy original digits before the dot backwards
  309. copy(num[start+d+1:], num[start:dot])
  310. }
  311. } else if dot > start {
  312. // copy original digits before the dot backwards
  313. copy(num[start+d:], num[start:dot])
  314. }
  315. newDot = start
  316. end += d
  317. } else {
  318. start += -d
  319. }
  320. num[newDot] = '.'
  321. for i := 0; i < zeroes; i++ {
  322. num[newDot+1+i] = '0'
  323. }
  324. }
  325. } else {
  326. // placed in the middle
  327. if dot == start {
  328. // TODO: try if placing at the end reduces copying
  329. // when there are zeroes after the dot
  330. dot = end - n - 1
  331. start = dot
  332. } else if dot >= end {
  333. // TODO: try if placing at the start reduces copying
  334. // when input has no dot in it
  335. dot = end
  336. end++
  337. }
  338. newDot = start + normExp
  339. if newDot > dot {
  340. // copy digits forwards
  341. copy(num[dot:], num[dot+1:newDot+1])
  342. } else if newDot < dot {
  343. // copy digits backwards
  344. copy(num[newDot+1:], num[newDot:dot])
  345. }
  346. num[newDot] = '.'
  347. }
  348. // apply precision
  349. dot = newDot
  350. if prec > -1 && dot+1+prec < end {
  351. end = dot + 1 + prec
  352. inc := num[end] >= '5'
  353. if inc || num[end-1] == '0' {
  354. for i := end - 1; i > start; i-- {
  355. if i == dot {
  356. end--
  357. } else if inc {
  358. if num[i] == '9' {
  359. if i > dot {
  360. end--
  361. } else {
  362. num[i] = '0'
  363. }
  364. } else {
  365. num[i]++
  366. inc = false
  367. break
  368. }
  369. } else if i > dot && num[i] == '0' {
  370. end--
  371. }
  372. }
  373. }
  374. if dot == start && end == start+1 {
  375. if inc {
  376. num[start] = '1'
  377. } else {
  378. num[start] = '0'
  379. }
  380. } else {
  381. if dot+1 == end {
  382. end--
  383. }
  384. if inc {
  385. if num[start] == '9' {
  386. num[start] = '0'
  387. copy(num[start+1:], num[start:end])
  388. end++
  389. num[start] = '1'
  390. } else {
  391. num[start]++
  392. }
  393. }
  394. }
  395. }
  396. } else {
  397. // case 3
  398. // find new end, considering moving numbers to the front, removing the dot and increasing the length of the exponent
  399. newEnd := end
  400. if dot == start {
  401. newEnd = start + n
  402. } else {
  403. newEnd--
  404. }
  405. newEnd += 2 + lenIntExp
  406. exp := intExp
  407. lenExp := lenIntExp
  408. if newEnd < len(num) {
  409. // it saves space to convert the decimal to an integer and decrease the exponent
  410. if dot < end {
  411. if dot == start {
  412. copy(num[start:], num[end-n:end])
  413. end = start + n
  414. } else {
  415. copy(num[dot:], num[dot+1:end])
  416. end--
  417. }
  418. }
  419. } else {
  420. // it does not save space and will panic, so we revert to the original representation
  421. exp = origExp
  422. lenExp = 1
  423. if origExp <= -10 || origExp >= 10 {
  424. lenExp = strconv.LenInt(int64(origExp))
  425. }
  426. }
  427. num[end] = 'e'
  428. num[end+1] = '-'
  429. end += 2
  430. exp = -exp
  431. for i := end + lenExp - 1; i >= end; i-- {
  432. num[i] = byte(exp%10) + '0'
  433. exp /= 10
  434. }
  435. end += lenExp
  436. }
  437. if neg {
  438. start--
  439. num[start] = '-'
  440. }
  441. return num[start:end]
  442. }