format.go 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package parser // import "miniflux.app/v2/internal/reader/parser"
  4. import (
  5. "encoding/xml"
  6. "io"
  7. "unicode"
  8. rxml "miniflux.app/v2/internal/reader/xml"
  9. )
  10. // List of feed formats.
  11. const (
  12. FormatRDF = "rdf"
  13. FormatRSS = "rss"
  14. FormatAtom = "atom"
  15. FormatJSON = "json"
  16. FormatUnknown = "unknown"
  17. )
  18. // DetectFeedFormat tries to guess the feed format from input data.
  19. func DetectFeedFormat(r io.ReadSeeker) (string, string) {
  20. if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
  21. return FormatJSON, ""
  22. }
  23. r.Seek(0, io.SeekStart)
  24. decoder := rxml.NewXMLDecoder(r)
  25. for {
  26. token, _ := decoder.Token()
  27. if token == nil {
  28. break
  29. }
  30. if element, ok := token.(xml.StartElement); ok {
  31. switch element.Name.Local {
  32. case "rss":
  33. return FormatRSS, ""
  34. case "feed":
  35. for _, attr := range element.Attr {
  36. if attr.Name.Local == "version" && attr.Value == "0.3" {
  37. return FormatAtom, "0.3"
  38. }
  39. }
  40. return FormatAtom, "1.0"
  41. case "RDF":
  42. return FormatRDF, ""
  43. }
  44. }
  45. }
  46. return FormatUnknown, ""
  47. }
  48. // detectJSONFormat checks if the reader contains JSON by reading until it finds
  49. // the first non-whitespace character or reaches EOF/error.
  50. func detectJSONFormat(r io.ReadSeeker) (bool, error) {
  51. const bufferSize = 32
  52. buffer := make([]byte, bufferSize)
  53. for {
  54. n, err := r.Read(buffer)
  55. if n == 0 {
  56. if err == io.EOF {
  57. return false, nil // No non-whitespace content found
  58. }
  59. return false, err
  60. }
  61. // Check each byte in the buffer
  62. for i := range n {
  63. ch := buffer[i]
  64. // Skip whitespace characters (space, tab, newline, carriage return, etc.)
  65. if unicode.IsSpace(rune(ch)) {
  66. continue
  67. }
  68. // First non-whitespace character determines if it's JSON
  69. return ch == '{', nil
  70. }
  71. // If we've read less than bufferSize, we've reached EOF
  72. if n < bufferSize {
  73. return false, nil
  74. }
  75. }
  76. }