parser.go 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. // Copyright 2018 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package parser // import "miniflux.app/reader/parser"
  5. import (
  6. "strings"
  7. "miniflux.app/errors"
  8. "miniflux.app/logger"
  9. "miniflux.app/model"
  10. "miniflux.app/reader/atom"
  11. "miniflux.app/reader/json"
  12. "miniflux.app/reader/rdf"
  13. "miniflux.app/reader/rss"
  14. )
  15. // ParseFeed analyzes the input data and returns a normalized feed object.
  16. func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
  17. data = stripInvalidXMLCharacters(data)
  18. switch DetectFeedFormat(data) {
  19. case FormatAtom:
  20. return atom.Parse(strings.NewReader(data))
  21. case FormatRSS:
  22. return rss.Parse(strings.NewReader(data))
  23. case FormatJSON:
  24. return json.Parse(strings.NewReader(data))
  25. case FormatRDF:
  26. return rdf.Parse(strings.NewReader(data))
  27. default:
  28. return nil, errors.NewLocalizedError("Unsupported feed format")
  29. }
  30. }
  31. func stripInvalidXMLCharacters(input string) string {
  32. return strings.Map(func(r rune) rune {
  33. if isInCharacterRange(r) {
  34. return r
  35. }
  36. logger.Debug("Strip invalid XML characters: %U", r)
  37. return -1
  38. }, input)
  39. }
  40. // Decide whether the given rune is in the XML Character Range, per
  41. // the Char production of http://www.xml.com/axml/testaxml.htm,
  42. // Section 2.2 Characters.
  43. func isInCharacterRange(r rune) (inrange bool) {
  44. return r == 0x09 ||
  45. r == 0x0A ||
  46. r == 0x0D ||
  47. r >= 0x20 && r <= 0xDF77 ||
  48. r >= 0xE000 && r <= 0xFFFD ||
  49. r >= 0x10000 && r <= 0x10FFFF
  50. }