srcset.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package sanitizer
  4. import (
  5. "math"
  6. "strconv"
  7. "strings"
  8. )
  9. type imageCandidate struct {
  10. ImageURL string
  11. Descriptor string
  12. }
  13. type imageCandidates []*imageCandidate
  14. func (c imageCandidates) String() string {
  15. htmlCandidates := make([]string, 0, len(c))
  16. for _, imageCandidate := range c {
  17. var htmlCandidate string
  18. if imageCandidate.Descriptor != "" {
  19. htmlCandidate = imageCandidate.ImageURL + " " + imageCandidate.Descriptor
  20. } else {
  21. htmlCandidate = imageCandidate.ImageURL
  22. }
  23. htmlCandidates = append(htmlCandidates, htmlCandidate)
  24. }
  25. return strings.Join(htmlCandidates, ", ")
  26. }
  27. // ParseSrcSetAttribute returns the list of image candidates from the set.
  28. // Parsing behavior follows the WebKit HTMLSrcsetParser implementation.
  29. // https://html.spec.whatwg.org/#parse-a-srcset-attribute
  30. func ParseSrcSetAttribute(attributeValue string) (candidates imageCandidates) {
  31. if attributeValue == "" {
  32. return nil
  33. }
  34. var position uint = 0
  35. for position < uint(len(attributeValue)) {
  36. position = skipWhileHTMLSpaceOrComma(attributeValue, position)
  37. if position >= uint(len(attributeValue)) {
  38. break
  39. }
  40. urlStart := position
  41. position = skipUntilASCIIWhitespace(attributeValue, position)
  42. imageURL := attributeValue[urlStart:position]
  43. if imageURL == "" {
  44. continue
  45. }
  46. var result descriptorParsingResult
  47. if imageURL[len(imageURL)-1] == ',' {
  48. imageURL = strings.TrimRight(imageURL, ",")
  49. if imageURL == "" {
  50. continue
  51. }
  52. } else {
  53. position = skipWhileASCIIWhitespace(attributeValue, position)
  54. descriptorTokens, newPosition := tokenizeDescriptors(attributeValue, position)
  55. position = newPosition
  56. if !parseDescriptors(descriptorTokens, &result) {
  57. continue
  58. }
  59. }
  60. candidates = append(candidates, &imageCandidate{
  61. ImageURL: imageURL,
  62. Descriptor: serializeDescriptor(result),
  63. })
  64. }
  65. return candidates
  66. }
  67. type descriptorParsingResult struct {
  68. density float64
  69. resourceWidth uint
  70. resourceHeight uint
  71. hasDensity bool
  72. hasWidth bool
  73. hasHeight bool
  74. }
  75. func (r *descriptorParsingResult) setDensity(value float64) {
  76. r.density = value
  77. r.hasDensity = true
  78. }
  79. func (r *descriptorParsingResult) setResourceWidth(value uint) {
  80. r.resourceWidth = value
  81. r.hasWidth = true
  82. }
  83. func (r *descriptorParsingResult) setResourceHeight(value uint) {
  84. r.resourceHeight = value
  85. r.hasHeight = true
  86. }
  87. func serializeDescriptor(result descriptorParsingResult) string {
  88. if result.hasDensity {
  89. return formatFloat(result.density) + "x"
  90. }
  91. if result.hasWidth {
  92. return strconv.FormatUint(uint64(result.resourceWidth), 10) + "w"
  93. }
  94. return ""
  95. }
  96. func parseDescriptors(descriptors []string, result *descriptorParsingResult) bool {
  97. for _, descriptor := range descriptors {
  98. if descriptor == "" {
  99. continue
  100. }
  101. lastIndex := len(descriptor) - 1
  102. descriptorChar := descriptor[lastIndex]
  103. value := descriptor[:lastIndex]
  104. switch descriptorChar {
  105. case 'x':
  106. if result.hasDensity || result.hasHeight || result.hasWidth {
  107. return false
  108. }
  109. density, ok := parseValidHTMLFloatingPointNumber(value)
  110. if !ok || density < 0 {
  111. return false
  112. }
  113. result.setDensity(density)
  114. case 'w':
  115. if result.hasDensity || result.hasWidth {
  116. return false
  117. }
  118. width, ok := parseValidHTMLNonNegativeInteger(value)
  119. if !ok || width <= 0 {
  120. return false
  121. }
  122. result.setResourceWidth(width)
  123. case 'h':
  124. if result.hasDensity || result.hasHeight {
  125. return false
  126. }
  127. height, ok := parseValidHTMLNonNegativeInteger(value)
  128. if !ok || height <= 0 {
  129. return false
  130. }
  131. result.setResourceHeight(height)
  132. default:
  133. return false
  134. }
  135. }
  136. return !result.hasHeight || result.hasWidth
  137. }
  138. type descriptorTokenizerState int
  139. const (
  140. descriptorStateInitial descriptorTokenizerState = iota
  141. descriptorStateInParenthesis
  142. descriptorStateAfterToken
  143. )
  144. func tokenizeDescriptors(input string, start uint) (tokens []string, newPosition uint) {
  145. state := descriptorStateInitial
  146. currentStart := start
  147. currentSet := true
  148. position := start
  149. appendDescriptorAndReset := func(position uint) {
  150. if currentSet && position > currentStart {
  151. tokens = append(tokens, input[currentStart:position])
  152. }
  153. currentSet = false
  154. }
  155. appendCharacter := func(position uint) {
  156. if !currentSet {
  157. currentStart = position
  158. currentSet = true
  159. }
  160. }
  161. for {
  162. if position >= uint(len(input)) {
  163. if state != descriptorStateAfterToken {
  164. appendDescriptorAndReset(position)
  165. }
  166. return tokens, position
  167. }
  168. character := input[position]
  169. switch state {
  170. case descriptorStateInitial:
  171. switch {
  172. case isComma(character):
  173. appendDescriptorAndReset(position)
  174. position++
  175. return tokens, position
  176. case isASCIIWhitespace(character):
  177. appendDescriptorAndReset(position)
  178. currentStart = position + 1
  179. currentSet = true
  180. state = descriptorStateAfterToken
  181. case character == '(':
  182. appendCharacter(position)
  183. state = descriptorStateInParenthesis
  184. default:
  185. appendCharacter(position)
  186. }
  187. case descriptorStateInParenthesis:
  188. if character == ')' {
  189. appendCharacter(position)
  190. state = descriptorStateInitial
  191. } else {
  192. appendCharacter(position)
  193. }
  194. case descriptorStateAfterToken:
  195. if !isASCIIWhitespace(character) {
  196. state = descriptorStateInitial
  197. currentStart = position
  198. currentSet = true
  199. position--
  200. }
  201. }
  202. position++
  203. }
  204. }
  205. func parseValidHTMLNonNegativeInteger(value string) (uint, bool) {
  206. if value == "" {
  207. return 0, false
  208. }
  209. for i := 0; i < len(value); i++ {
  210. if value[i] < '0' || value[i] > '9' {
  211. return 0, false
  212. }
  213. }
  214. parsed, err := strconv.ParseUint(value, 10, 0)
  215. if err != nil {
  216. return 0, false
  217. }
  218. return uint(parsed), true
  219. }
  220. func parseValidHTMLFloatingPointNumber(value string) (float64, bool) {
  221. if value == "" {
  222. return 0, false
  223. }
  224. if value[0] == '+' || value[len(value)-1] == '.' {
  225. return 0, false
  226. }
  227. parsed, err := strconv.ParseFloat(value, 64)
  228. if err != nil || math.IsNaN(parsed) || math.IsInf(parsed, 0) {
  229. return 0, false
  230. }
  231. return parsed, true
  232. }
  233. func formatFloat(value float64) string {
  234. return strconv.FormatFloat(value, 'g', -1, 64)
  235. }
  236. func skipWhileHTMLSpaceOrComma(value string, position uint) uint {
  237. for position < uint(len(value)) && (isASCIIWhitespace(value[position]) || isComma(value[position])) {
  238. position++
  239. }
  240. return position
  241. }
  242. func skipWhileASCIIWhitespace(value string, position uint) uint {
  243. for position < uint(len(value)) && isASCIIWhitespace(value[position]) {
  244. position++
  245. }
  246. return position
  247. }
  248. func skipUntilASCIIWhitespace(value string, position uint) uint {
  249. for position < uint(len(value)) && !isASCIIWhitespace(value[position]) {
  250. position++
  251. }
  252. return position
  253. }
  254. func isASCIIWhitespace(character byte) bool {
  255. switch character {
  256. case '\t', '\n', '\f', '\r', ' ':
  257. return true
  258. default:
  259. return false
  260. }
  261. }
  262. func isComma(character byte) bool {
  263. return character == ','
  264. }