parse.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. package css // import "github.com/tdewolff/parse/css"
  2. import (
  3. "bytes"
  4. "io"
  5. "strconv"
  6. "github.com/tdewolff/parse"
  7. )
  8. var wsBytes = []byte(" ")
  9. var endBytes = []byte("}")
  10. var emptyBytes = []byte("")
  11. // GrammarType determines the type of grammar.
  12. type GrammarType uint32
  13. // GrammarType values.
  14. const (
  15. ErrorGrammar GrammarType = iota // extra token when errors occur
  16. CommentGrammar
  17. AtRuleGrammar
  18. BeginAtRuleGrammar
  19. EndAtRuleGrammar
  20. QualifiedRuleGrammar
  21. BeginRulesetGrammar
  22. EndRulesetGrammar
  23. DeclarationGrammar
  24. TokenGrammar
  25. CustomPropertyGrammar
  26. )
  27. // String returns the string representation of a GrammarType.
  28. func (tt GrammarType) String() string {
  29. switch tt {
  30. case ErrorGrammar:
  31. return "Error"
  32. case CommentGrammar:
  33. return "Comment"
  34. case AtRuleGrammar:
  35. return "AtRule"
  36. case BeginAtRuleGrammar:
  37. return "BeginAtRule"
  38. case EndAtRuleGrammar:
  39. return "EndAtRule"
  40. case QualifiedRuleGrammar:
  41. return "QualifiedRule"
  42. case BeginRulesetGrammar:
  43. return "BeginRuleset"
  44. case EndRulesetGrammar:
  45. return "EndRuleset"
  46. case DeclarationGrammar:
  47. return "Declaration"
  48. case TokenGrammar:
  49. return "Token"
  50. case CustomPropertyGrammar:
  51. return "CustomProperty"
  52. }
  53. return "Invalid(" + strconv.Itoa(int(tt)) + ")"
  54. }
  55. ////////////////////////////////////////////////////////////////
  56. // State is the state function the parser currently is in.
  57. type State func(*Parser) GrammarType
  58. // Token is a single TokenType and its associated data.
  59. type Token struct {
  60. TokenType
  61. Data []byte
  62. }
  63. func (t Token) String() string {
  64. return t.TokenType.String() + "('" + string(t.Data) + "')"
  65. }
  66. // Parser is the state for the parser.
  67. type Parser struct {
  68. l *Lexer
  69. state []State
  70. err error
  71. buf []Token
  72. level int
  73. tt TokenType
  74. data []byte
  75. prevWS bool
  76. prevEnd bool
  77. }
  78. // NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute.
  79. func NewParser(r io.Reader, isInline bool) *Parser {
  80. l := NewLexer(r)
  81. p := &Parser{
  82. l: l,
  83. state: make([]State, 0, 4),
  84. }
  85. if isInline {
  86. p.state = append(p.state, (*Parser).parseDeclarationList)
  87. } else {
  88. p.state = append(p.state, (*Parser).parseStylesheet)
  89. }
  90. return p
  91. }
  92. // Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned.
  93. func (p *Parser) Err() error {
  94. if p.err != nil {
  95. return p.err
  96. }
  97. return p.l.Err()
  98. }
  99. // Restore restores the NULL byte at the end of the buffer.
  100. func (p *Parser) Restore() {
  101. p.l.Restore()
  102. }
  103. // Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message.
  104. func (p *Parser) Next() (GrammarType, TokenType, []byte) {
  105. p.err = nil
  106. if p.prevEnd {
  107. p.tt, p.data = RightBraceToken, endBytes
  108. p.prevEnd = false
  109. } else {
  110. p.tt, p.data = p.popToken(true)
  111. }
  112. gt := p.state[len(p.state)-1](p)
  113. return gt, p.tt, p.data
  114. }
  115. // Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively.
  116. func (p *Parser) Values() []Token {
  117. return p.buf
  118. }
  119. func (p *Parser) popToken(allowComment bool) (TokenType, []byte) {
  120. p.prevWS = false
  121. tt, data := p.l.Next()
  122. for tt == WhitespaceToken || tt == CommentToken {
  123. if tt == WhitespaceToken {
  124. p.prevWS = true
  125. } else if allowComment && len(p.state) == 1 {
  126. break
  127. }
  128. tt, data = p.l.Next()
  129. }
  130. return tt, data
  131. }
  132. func (p *Parser) initBuf() {
  133. p.buf = p.buf[:0]
  134. }
  135. func (p *Parser) pushBuf(tt TokenType, data []byte) {
  136. p.buf = append(p.buf, Token{tt, data})
  137. }
  138. ////////////////////////////////////////////////////////////////
  139. func (p *Parser) parseStylesheet() GrammarType {
  140. if p.tt == CDOToken || p.tt == CDCToken {
  141. return TokenGrammar
  142. } else if p.tt == AtKeywordToken {
  143. return p.parseAtRule()
  144. } else if p.tt == CommentToken {
  145. return CommentGrammar
  146. } else if p.tt == ErrorToken {
  147. return ErrorGrammar
  148. }
  149. return p.parseQualifiedRule()
  150. }
  151. func (p *Parser) parseDeclarationList() GrammarType {
  152. if p.tt == CommentToken {
  153. p.tt, p.data = p.popToken(false)
  154. }
  155. for p.tt == SemicolonToken {
  156. p.tt, p.data = p.popToken(false)
  157. }
  158. if p.tt == ErrorToken {
  159. return ErrorGrammar
  160. } else if p.tt == AtKeywordToken {
  161. return p.parseAtRule()
  162. } else if p.tt == IdentToken {
  163. return p.parseDeclaration()
  164. } else if p.tt == CustomPropertyNameToken {
  165. return p.parseCustomProperty()
  166. }
  167. // parse error
  168. p.initBuf()
  169. p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r)
  170. for {
  171. tt, data := p.popToken(false)
  172. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  173. p.prevEnd = (tt == RightBraceToken)
  174. return ErrorGrammar
  175. }
  176. p.pushBuf(tt, data)
  177. }
  178. }
  179. ////////////////////////////////////////////////////////////////
  180. func (p *Parser) parseAtRule() GrammarType {
  181. p.initBuf()
  182. parse.ToLower(p.data)
  183. atRuleName := p.data
  184. if len(atRuleName) > 0 && atRuleName[1] == '-' {
  185. if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 {
  186. atRuleName = atRuleName[i+2:] // skip vendor specific prefix
  187. }
  188. }
  189. atRule := ToHash(atRuleName[1:])
  190. first := true
  191. skipWS := false
  192. for {
  193. tt, data := p.popToken(false)
  194. if tt == LeftBraceToken && p.level == 0 {
  195. if atRule == Font_Face || atRule == Page {
  196. p.state = append(p.state, (*Parser).parseAtRuleDeclarationList)
  197. } else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports {
  198. p.state = append(p.state, (*Parser).parseAtRuleRuleList)
  199. } else {
  200. p.state = append(p.state, (*Parser).parseAtRuleUnknown)
  201. }
  202. return BeginAtRuleGrammar
  203. } else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  204. p.prevEnd = (tt == RightBraceToken)
  205. return AtRuleGrammar
  206. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  207. p.level++
  208. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  209. p.level--
  210. }
  211. if first {
  212. if tt == LeftParenthesisToken || tt == LeftBracketToken {
  213. p.prevWS = false
  214. }
  215. first = false
  216. }
  217. if len(data) == 1 && (data[0] == ',' || data[0] == ':') {
  218. skipWS = true
  219. } else if p.prevWS && !skipWS && tt != RightParenthesisToken {
  220. p.pushBuf(WhitespaceToken, wsBytes)
  221. } else {
  222. skipWS = false
  223. }
  224. if tt == LeftParenthesisToken {
  225. skipWS = true
  226. }
  227. p.pushBuf(tt, data)
  228. }
  229. }
  230. func (p *Parser) parseAtRuleRuleList() GrammarType {
  231. if p.tt == RightBraceToken || p.tt == ErrorToken {
  232. p.state = p.state[:len(p.state)-1]
  233. return EndAtRuleGrammar
  234. } else if p.tt == AtKeywordToken {
  235. return p.parseAtRule()
  236. } else {
  237. return p.parseQualifiedRule()
  238. }
  239. }
  240. func (p *Parser) parseAtRuleDeclarationList() GrammarType {
  241. for p.tt == SemicolonToken {
  242. p.tt, p.data = p.popToken(false)
  243. }
  244. if p.tt == RightBraceToken || p.tt == ErrorToken {
  245. p.state = p.state[:len(p.state)-1]
  246. return EndAtRuleGrammar
  247. }
  248. return p.parseDeclarationList()
  249. }
  250. func (p *Parser) parseAtRuleUnknown() GrammarType {
  251. if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken {
  252. p.state = p.state[:len(p.state)-1]
  253. return EndAtRuleGrammar
  254. }
  255. if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken {
  256. p.level++
  257. } else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken {
  258. p.level--
  259. }
  260. return TokenGrammar
  261. }
  262. func (p *Parser) parseQualifiedRule() GrammarType {
  263. p.initBuf()
  264. first := true
  265. inAttrSel := false
  266. skipWS := true
  267. var tt TokenType
  268. var data []byte
  269. for {
  270. if first {
  271. tt, data = p.tt, p.data
  272. p.tt = WhitespaceToken
  273. p.data = emptyBytes
  274. first = false
  275. } else {
  276. tt, data = p.popToken(false)
  277. }
  278. if tt == LeftBraceToken && p.level == 0 {
  279. p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList)
  280. return BeginRulesetGrammar
  281. } else if tt == ErrorToken {
  282. p.err = parse.NewErrorLexer("unexpected ending in qualified rule, expected left brace token", p.l.r)
  283. return ErrorGrammar
  284. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  285. p.level++
  286. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  287. p.level--
  288. }
  289. if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') {
  290. if data[0] == ',' {
  291. return QualifiedRuleGrammar
  292. }
  293. skipWS = true
  294. } else if p.prevWS && !skipWS && !inAttrSel {
  295. p.pushBuf(WhitespaceToken, wsBytes)
  296. } else {
  297. skipWS = false
  298. }
  299. if tt == LeftBracketToken {
  300. inAttrSel = true
  301. } else if tt == RightBracketToken {
  302. inAttrSel = false
  303. }
  304. p.pushBuf(tt, data)
  305. }
  306. }
  307. func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType {
  308. for p.tt == SemicolonToken {
  309. p.tt, p.data = p.popToken(false)
  310. }
  311. if p.tt == RightBraceToken || p.tt == ErrorToken {
  312. p.state = p.state[:len(p.state)-1]
  313. return EndRulesetGrammar
  314. }
  315. return p.parseDeclarationList()
  316. }
  317. func (p *Parser) parseDeclaration() GrammarType {
  318. p.initBuf()
  319. parse.ToLower(p.data)
  320. if tt, _ := p.popToken(false); tt != ColonToken {
  321. p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r)
  322. return ErrorGrammar
  323. }
  324. skipWS := true
  325. for {
  326. tt, data := p.popToken(false)
  327. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  328. p.prevEnd = (tt == RightBraceToken)
  329. return DeclarationGrammar
  330. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  331. p.level++
  332. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  333. p.level--
  334. }
  335. if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') {
  336. skipWS = true
  337. } else if p.prevWS && !skipWS {
  338. p.pushBuf(WhitespaceToken, wsBytes)
  339. } else {
  340. skipWS = false
  341. }
  342. p.pushBuf(tt, data)
  343. }
  344. }
  345. func (p *Parser) parseCustomProperty() GrammarType {
  346. p.initBuf()
  347. if tt, _ := p.popToken(false); tt != ColonToken {
  348. p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r)
  349. return ErrorGrammar
  350. }
  351. val := []byte{}
  352. for {
  353. tt, data := p.l.Next()
  354. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  355. p.prevEnd = (tt == RightBraceToken)
  356. p.pushBuf(CustomPropertyValueToken, val)
  357. return CustomPropertyGrammar
  358. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  359. p.level++
  360. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  361. p.level--
  362. }
  363. val = append(val, data...)
  364. }
  365. }