4
0

parse.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. package css // import "github.com/tdewolff/parse/css"
  2. import (
  3. "bytes"
  4. "io"
  5. "strconv"
  6. "github.com/tdewolff/parse"
  7. )
  8. var wsBytes = []byte(" ")
  9. var endBytes = []byte("}")
  10. var emptyBytes = []byte("")
  11. // GrammarType determines the type of grammar.
  12. type GrammarType uint32
  13. // GrammarType values.
  14. const (
  15. ErrorGrammar GrammarType = iota // extra token when errors occur
  16. CommentGrammar
  17. AtRuleGrammar
  18. BeginAtRuleGrammar
  19. EndAtRuleGrammar
  20. QualifiedRuleGrammar
  21. BeginRulesetGrammar
  22. EndRulesetGrammar
  23. DeclarationGrammar
  24. TokenGrammar
  25. CustomPropertyGrammar
  26. )
  27. // String returns the string representation of a GrammarType.
  28. func (tt GrammarType) String() string {
  29. switch tt {
  30. case ErrorGrammar:
  31. return "Error"
  32. case CommentGrammar:
  33. return "Comment"
  34. case AtRuleGrammar:
  35. return "AtRule"
  36. case BeginAtRuleGrammar:
  37. return "BeginAtRule"
  38. case EndAtRuleGrammar:
  39. return "EndAtRule"
  40. case QualifiedRuleGrammar:
  41. return "QualifiedRule"
  42. case BeginRulesetGrammar:
  43. return "BeginRuleset"
  44. case EndRulesetGrammar:
  45. return "EndRuleset"
  46. case DeclarationGrammar:
  47. return "Declaration"
  48. case TokenGrammar:
  49. return "Token"
  50. case CustomPropertyGrammar:
  51. return "CustomProperty"
  52. }
  53. return "Invalid(" + strconv.Itoa(int(tt)) + ")"
  54. }
  55. ////////////////////////////////////////////////////////////////
  56. // State is the state function the parser currently is in.
  57. type State func(*Parser) GrammarType
  58. // Token is a single TokenType and its associated data.
  59. type Token struct {
  60. TokenType
  61. Data []byte
  62. }
  63. // Parser is the state for the parser.
  64. type Parser struct {
  65. l *Lexer
  66. state []State
  67. err error
  68. buf []Token
  69. level int
  70. tt TokenType
  71. data []byte
  72. prevWS bool
  73. prevEnd bool
  74. }
  75. // NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute.
  76. func NewParser(r io.Reader, isInline bool) *Parser {
  77. l := NewLexer(r)
  78. p := &Parser{
  79. l: l,
  80. state: make([]State, 0, 4),
  81. }
  82. if isInline {
  83. p.state = append(p.state, (*Parser).parseDeclarationList)
  84. } else {
  85. p.state = append(p.state, (*Parser).parseStylesheet)
  86. }
  87. return p
  88. }
  89. // Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned.
  90. func (p *Parser) Err() error {
  91. if p.err != nil {
  92. return p.err
  93. }
  94. return p.l.Err()
  95. }
  96. // Restore restores the NULL byte at the end of the buffer.
  97. func (p *Parser) Restore() {
  98. p.l.Restore()
  99. }
  100. // Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message.
  101. func (p *Parser) Next() (GrammarType, TokenType, []byte) {
  102. p.err = nil
  103. if p.prevEnd {
  104. p.tt, p.data = RightBraceToken, endBytes
  105. p.prevEnd = false
  106. } else {
  107. p.tt, p.data = p.popToken(true)
  108. }
  109. gt := p.state[len(p.state)-1](p)
  110. return gt, p.tt, p.data
  111. }
  112. // Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively.
  113. func (p *Parser) Values() []Token {
  114. return p.buf
  115. }
  116. func (p *Parser) popToken(allowComment bool) (TokenType, []byte) {
  117. p.prevWS = false
  118. tt, data := p.l.Next()
  119. for tt == WhitespaceToken || tt == CommentToken {
  120. if tt == WhitespaceToken {
  121. p.prevWS = true
  122. } else if allowComment && len(p.state) == 1 {
  123. break
  124. }
  125. tt, data = p.l.Next()
  126. }
  127. return tt, data
  128. }
  129. func (p *Parser) initBuf() {
  130. p.buf = p.buf[:0]
  131. }
  132. func (p *Parser) pushBuf(tt TokenType, data []byte) {
  133. p.buf = append(p.buf, Token{tt, data})
  134. }
  135. ////////////////////////////////////////////////////////////////
  136. func (p *Parser) parseStylesheet() GrammarType {
  137. if p.tt == CDOToken || p.tt == CDCToken {
  138. return TokenGrammar
  139. } else if p.tt == AtKeywordToken {
  140. return p.parseAtRule()
  141. } else if p.tt == CommentToken {
  142. return CommentGrammar
  143. } else if p.tt == ErrorToken {
  144. return ErrorGrammar
  145. }
  146. return p.parseQualifiedRule()
  147. }
  148. func (p *Parser) parseDeclarationList() GrammarType {
  149. if p.tt == CommentToken {
  150. p.tt, p.data = p.popToken(false)
  151. }
  152. for p.tt == SemicolonToken {
  153. p.tt, p.data = p.popToken(false)
  154. }
  155. if p.tt == ErrorToken {
  156. return ErrorGrammar
  157. } else if p.tt == AtKeywordToken {
  158. return p.parseAtRule()
  159. } else if p.tt == IdentToken {
  160. return p.parseDeclaration()
  161. } else if p.tt == CustomPropertyNameToken {
  162. return p.parseCustomProperty()
  163. }
  164. // parse error
  165. p.initBuf()
  166. p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r)
  167. for {
  168. tt, data := p.popToken(false)
  169. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  170. p.prevEnd = (tt == RightBraceToken)
  171. return ErrorGrammar
  172. }
  173. p.pushBuf(tt, data)
  174. }
  175. }
  176. ////////////////////////////////////////////////////////////////
  177. func (p *Parser) parseAtRule() GrammarType {
  178. p.initBuf()
  179. parse.ToLower(p.data)
  180. atRuleName := p.data
  181. if len(atRuleName) > 0 && atRuleName[1] == '-' {
  182. if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 {
  183. atRuleName = atRuleName[i+2:] // skip vendor specific prefix
  184. }
  185. }
  186. atRule := ToHash(atRuleName[1:])
  187. first := true
  188. skipWS := false
  189. for {
  190. tt, data := p.popToken(false)
  191. if tt == LeftBraceToken && p.level == 0 {
  192. if atRule == Font_Face || atRule == Page {
  193. p.state = append(p.state, (*Parser).parseAtRuleDeclarationList)
  194. } else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports {
  195. p.state = append(p.state, (*Parser).parseAtRuleRuleList)
  196. } else {
  197. p.state = append(p.state, (*Parser).parseAtRuleUnknown)
  198. }
  199. return BeginAtRuleGrammar
  200. } else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  201. p.prevEnd = (tt == RightBraceToken)
  202. return AtRuleGrammar
  203. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  204. p.level++
  205. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  206. p.level--
  207. }
  208. if first {
  209. if tt == LeftParenthesisToken || tt == LeftBracketToken {
  210. p.prevWS = false
  211. }
  212. first = false
  213. }
  214. if len(data) == 1 && (data[0] == ',' || data[0] == ':') {
  215. skipWS = true
  216. } else if p.prevWS && !skipWS && tt != RightParenthesisToken {
  217. p.pushBuf(WhitespaceToken, wsBytes)
  218. } else {
  219. skipWS = false
  220. }
  221. if tt == LeftParenthesisToken {
  222. skipWS = true
  223. }
  224. p.pushBuf(tt, data)
  225. }
  226. }
  227. func (p *Parser) parseAtRuleRuleList() GrammarType {
  228. if p.tt == RightBraceToken || p.tt == ErrorToken {
  229. p.state = p.state[:len(p.state)-1]
  230. return EndAtRuleGrammar
  231. } else if p.tt == AtKeywordToken {
  232. return p.parseAtRule()
  233. } else {
  234. return p.parseQualifiedRule()
  235. }
  236. }
  237. func (p *Parser) parseAtRuleDeclarationList() GrammarType {
  238. for p.tt == SemicolonToken {
  239. p.tt, p.data = p.popToken(false)
  240. }
  241. if p.tt == RightBraceToken || p.tt == ErrorToken {
  242. p.state = p.state[:len(p.state)-1]
  243. return EndAtRuleGrammar
  244. }
  245. return p.parseDeclarationList()
  246. }
  247. func (p *Parser) parseAtRuleUnknown() GrammarType {
  248. if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken {
  249. p.state = p.state[:len(p.state)-1]
  250. return EndAtRuleGrammar
  251. }
  252. if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken {
  253. p.level++
  254. } else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken {
  255. p.level--
  256. }
  257. return TokenGrammar
  258. }
  259. func (p *Parser) parseQualifiedRule() GrammarType {
  260. p.initBuf()
  261. first := true
  262. inAttrSel := false
  263. skipWS := true
  264. var tt TokenType
  265. var data []byte
  266. for {
  267. if first {
  268. tt, data = p.tt, p.data
  269. p.tt = WhitespaceToken
  270. p.data = emptyBytes
  271. first = false
  272. } else {
  273. tt, data = p.popToken(false)
  274. }
  275. if tt == LeftBraceToken && p.level == 0 {
  276. p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList)
  277. return BeginRulesetGrammar
  278. } else if tt == ErrorToken {
  279. p.err = parse.NewErrorLexer("unexpected ending in qualified rule, expected left brace token", p.l.r)
  280. return ErrorGrammar
  281. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  282. p.level++
  283. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  284. p.level--
  285. }
  286. if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') {
  287. if data[0] == ',' {
  288. return QualifiedRuleGrammar
  289. }
  290. skipWS = true
  291. } else if p.prevWS && !skipWS && !inAttrSel {
  292. p.pushBuf(WhitespaceToken, wsBytes)
  293. } else {
  294. skipWS = false
  295. }
  296. if tt == LeftBracketToken {
  297. inAttrSel = true
  298. } else if tt == RightBracketToken {
  299. inAttrSel = false
  300. }
  301. p.pushBuf(tt, data)
  302. }
  303. }
  304. func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType {
  305. for p.tt == SemicolonToken {
  306. p.tt, p.data = p.popToken(false)
  307. }
  308. if p.tt == RightBraceToken || p.tt == ErrorToken {
  309. p.state = p.state[:len(p.state)-1]
  310. return EndRulesetGrammar
  311. }
  312. return p.parseDeclarationList()
  313. }
  314. func (p *Parser) parseDeclaration() GrammarType {
  315. p.initBuf()
  316. parse.ToLower(p.data)
  317. if tt, _ := p.popToken(false); tt != ColonToken {
  318. p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r)
  319. return ErrorGrammar
  320. }
  321. skipWS := true
  322. for {
  323. tt, data := p.popToken(false)
  324. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  325. p.prevEnd = (tt == RightBraceToken)
  326. return DeclarationGrammar
  327. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  328. p.level++
  329. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  330. p.level--
  331. }
  332. if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') {
  333. skipWS = true
  334. } else if p.prevWS && !skipWS {
  335. p.pushBuf(WhitespaceToken, wsBytes)
  336. } else {
  337. skipWS = false
  338. }
  339. p.pushBuf(tt, data)
  340. }
  341. }
  342. func (p *Parser) parseCustomProperty() GrammarType {
  343. p.initBuf()
  344. if tt, _ := p.popToken(false); tt != ColonToken {
  345. p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r)
  346. return ErrorGrammar
  347. }
  348. val := []byte{}
  349. for {
  350. tt, data := p.l.Next()
  351. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  352. p.prevEnd = (tt == RightBraceToken)
  353. p.pushBuf(CustomPropertyValueToken, val)
  354. return CustomPropertyGrammar
  355. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  356. p.level++
  357. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  358. p.level--
  359. }
  360. val = append(val, data...)
  361. }
  362. }