LBP
/
miniflux_v2
cermin dari https://github.com/miniflux/v2.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
							// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package sanitizer

import (
	"math"
	"strconv"
	"strings"
)

type imageCandidate struct {
	ImageURL   string
	Descriptor string
}

type imageCandidates []*imageCandidate

func (c imageCandidates) String() string {
	htmlCandidates := make([]string, 0, len(c))

	for _, imageCandidate := range c {
		var htmlCandidate string
		if imageCandidate.Descriptor != "" {
			htmlCandidate = imageCandidate.ImageURL + " " + imageCandidate.Descriptor
		} else {
			htmlCandidate = imageCandidate.ImageURL
		}

		htmlCandidates = append(htmlCandidates, htmlCandidate)
	}

	return strings.Join(htmlCandidates, ", ")
}

// ParseSrcSetAttribute returns the list of image candidates from the set.
// Parsing behavior follows the WebKit HTMLSrcsetParser implementation.
// https://html.spec.whatwg.org/#parse-a-srcset-attribute
func ParseSrcSetAttribute(attributeValue string) (candidates imageCandidates) {
	if attributeValue == "" {
		return nil
	}

	var position uint = 0
	for position < uint(len(attributeValue)) {
		position = skipWhileHTMLSpaceOrComma(attributeValue, position)
		if position >= uint(len(attributeValue)) {
			break
		}

		urlStart := position
		position = skipUntilASCIIWhitespace(attributeValue, position)
		imageURL := attributeValue[urlStart:position]
		if imageURL == "" {
			continue
		}

		var result descriptorParsingResult
		if imageURL[len(imageURL)-1] == ',' {
			imageURL = strings.TrimRight(imageURL, ",")
			if imageURL == "" {
				continue
			}
		} else {
			position = skipWhileASCIIWhitespace(attributeValue, position)
			descriptorTokens, newPosition := tokenizeDescriptors(attributeValue, position)
			position = newPosition
			if !parseDescriptors(descriptorTokens, &result) {
				continue
			}
		}

		candidates = append(candidates, &imageCandidate{
			ImageURL:   imageURL,
			Descriptor: serializeDescriptor(result),
		})
	}

	return candidates
}

type descriptorParsingResult struct {
	density        float64
	resourceWidth  uint
	resourceHeight uint
	hasDensity     bool
	hasWidth       bool
	hasHeight      bool
}

func (r *descriptorParsingResult) setDensity(value float64) {
	r.density = value
	r.hasDensity = true
}

func (r *descriptorParsingResult) setResourceWidth(value uint) {
	r.resourceWidth = value
	r.hasWidth = true
}

func (r *descriptorParsingResult) setResourceHeight(value uint) {
	r.resourceHeight = value
	r.hasHeight = true
}

func serializeDescriptor(result descriptorParsingResult) string {
	if result.hasDensity {
		return formatFloat(result.density) + "x"
	}
	if result.hasWidth {
		return strconv.FormatUint(uint64(result.resourceWidth), 10) + "w"
	}
	return ""
}

func parseDescriptors(descriptors []string, result *descriptorParsingResult) bool {
	for _, descriptor := range descriptors {
		if descriptor == "" {
			continue
		}
		lastIndex := len(descriptor) - 1
		descriptorChar := descriptor[lastIndex]
		value := descriptor[:lastIndex]

		switch descriptorChar {
		case 'x':
			if result.hasDensity || result.hasHeight || result.hasWidth {
				return false
			}
			density, ok := parseValidHTMLFloatingPointNumber(value)
			if !ok || density < 0 {
				return false
			}
			result.setDensity(density)
		case 'w':
			if result.hasDensity || result.hasWidth {
				return false
			}
			width, ok := parseValidHTMLNonNegativeInteger(value)
			if !ok || width <= 0 {
				return false
			}
			result.setResourceWidth(width)
		case 'h':
			if result.hasDensity || result.hasHeight {
				return false
			}
			height, ok := parseValidHTMLNonNegativeInteger(value)
			if !ok || height <= 0 {
				return false
			}
			result.setResourceHeight(height)
		default:
			return false
		}
	}

	return !result.hasHeight || result.hasWidth
}

type descriptorTokenizerState int

const (
	descriptorStateInitial descriptorTokenizerState = iota
	descriptorStateInParenthesis
	descriptorStateAfterToken
)

func tokenizeDescriptors(input string, start uint) (tokens []string, newPosition uint) {
	state := descriptorStateInitial
	currentStart := start
	currentSet := true
	position := start

	appendDescriptorAndReset := func(position uint) {
		if currentSet && position > currentStart {
			tokens = append(tokens, input[currentStart:position])
		}
		currentSet = false
	}

	appendCharacter := func(position uint) {
		if !currentSet {
			currentStart = position
			currentSet = true
		}
	}

	for {
		if position >= uint(len(input)) {
			if state != descriptorStateAfterToken {
				appendDescriptorAndReset(position)
			}
			return tokens, position
		}

		character := input[position]
		switch state {
		case descriptorStateInitial:
			switch {
			case isComma(character):
				appendDescriptorAndReset(position)
				position++
				return tokens, position
			case isASCIIWhitespace(character):
				appendDescriptorAndReset(position)
				currentStart = position + 1
				currentSet = true
				state = descriptorStateAfterToken
			case character == '(':
				appendCharacter(position)
				state = descriptorStateInParenthesis
			default:
				appendCharacter(position)
			}
		case descriptorStateInParenthesis:
			if character == ')' {
				appendCharacter(position)
				state = descriptorStateInitial
			} else {
				appendCharacter(position)
			}
		case descriptorStateAfterToken:
			if !isASCIIWhitespace(character) {
				state = descriptorStateInitial
				currentStart = position
				currentSet = true
				position--
			}
		}

		position++
	}
}

func parseValidHTMLNonNegativeInteger(value string) (uint, bool) {
	if value == "" {
		return 0, false
	}

	for i := 0; i < len(value); i++ {
		if value[i] < '0' || value[i] > '9' {
			return 0, false
		}
	}

	parsed, err := strconv.ParseUint(value, 10, 0)
	if err != nil {
		return 0, false
	}

	return uint(parsed), true
}

func parseValidHTMLFloatingPointNumber(value string) (float64, bool) {
	if value == "" {
		return 0, false
	}
	if value[0] == '+' || value[len(value)-1] == '.' {
		return 0, false
	}

	parsed, err := strconv.ParseFloat(value, 64)
	if err != nil || math.IsNaN(parsed) || math.IsInf(parsed, 0) {
		return 0, false
	}

	return parsed, true
}

func formatFloat(value float64) string {
	return strconv.FormatFloat(value, 'g', -1, 64)
}

func skipWhileHTMLSpaceOrComma(value string, position uint) uint {
	for position < uint(len(value)) && (isASCIIWhitespace(value[position]) || isComma(value[position])) {
		position++
	}
	return position
}

func skipWhileASCIIWhitespace(value string, position uint) uint {
	for position < uint(len(value)) && isASCIIWhitespace(value[position]) {
		position++
	}
	return position
}

func skipUntilASCIIWhitespace(value string, position uint) uint {
	for position < uint(len(value)) && !isASCIIWhitespace(value[position]) {
		position++
	}
	return position
}

func isASCIIWhitespace(character byte) bool {
	switch character {
	case '\t', '\n', '\f', '\r', ' ':
		return true
	default:
		return false
	}
}

func isComma(character byte) bool {
	return character == ','
}