| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- // Copyright 2018 Frédéric Guillot. All rights reserved.
- // Use of this source code is governed by the Apache 2.0
- // license that can be found in the LICENSE file.
- package parser // import "miniflux.app/reader/parser"
- import (
- "strings"
- "miniflux.app/errors"
- "miniflux.app/logger"
- "miniflux.app/model"
- "miniflux.app/reader/atom"
- "miniflux.app/reader/json"
- "miniflux.app/reader/rdf"
- "miniflux.app/reader/rss"
- )
- // ParseFeed analyzes the input data and returns a normalized feed object.
- func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
- data = stripInvalidXMLCharacters(data)
- switch DetectFeedFormat(data) {
- case FormatAtom:
- return atom.Parse(strings.NewReader(data))
- case FormatRSS:
- return rss.Parse(strings.NewReader(data))
- case FormatJSON:
- return json.Parse(strings.NewReader(data))
- case FormatRDF:
- return rdf.Parse(strings.NewReader(data))
- default:
- return nil, errors.NewLocalizedError("Unsupported feed format")
- }
- }
- func stripInvalidXMLCharacters(input string) string {
- return strings.Map(func(r rune) rune {
- if isInCharacterRange(r) {
- return r
- }
- logger.Debug("Strip invalid XML characters: %U", r)
- return -1
- }, input)
- }
- // Decide whether the given rune is in the XML Character Range, per
- // the Char production of http://www.xml.com/axml/testaxml.htm,
- // Section 2.2 Characters.
- func isInCharacterRange(r rune) (inrange bool) {
- return r == 0x09 ||
- r == 0x0A ||
- r == 0x0D ||
- r >= 0x20 && r <= 0xDF77 ||
- r >= 0xE000 && r <= 0xFFFD ||
- r >= 0x10000 && r <= 0x10FFFF
- }
|