| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
- // SPDX-License-Identifier: Apache-2.0
- package parser // import "miniflux.app/v2/internal/reader/parser"
- import (
- "encoding/xml"
- "io"
- "unicode"
- rxml "miniflux.app/v2/internal/reader/xml"
- )
- // List of feed formats.
- const (
- FormatRDF = "rdf"
- FormatRSS = "rss"
- FormatAtom = "atom"
- FormatJSON = "json"
- FormatUnknown = "unknown"
- )
- // DetectFeedFormat tries to guess the feed format from input data.
- func DetectFeedFormat(r io.ReadSeeker) (string, string) {
- r.Seek(0, io.SeekStart)
- defer r.Seek(0, io.SeekStart)
- if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
- return FormatJSON, ""
- }
- r.Seek(0, io.SeekStart)
- decoder := rxml.NewXMLDecoder(r)
- for {
- token, _ := decoder.Token()
- if token == nil {
- break
- }
- if element, ok := token.(xml.StartElement); ok {
- switch element.Name.Local {
- case "rss":
- return FormatRSS, ""
- case "feed":
- for _, attr := range element.Attr {
- if attr.Name.Local == "version" && attr.Value == "0.3" {
- return FormatAtom, "0.3"
- }
- }
- return FormatAtom, "1.0"
- case "RDF":
- return FormatRDF, ""
- }
- }
- }
- return FormatUnknown, ""
- }
- // detectJSONFormat checks if the reader contains JSON by reading until it finds
- // the first non-whitespace character or reaches EOF/error.
- func detectJSONFormat(r io.ReadSeeker) (bool, error) {
- const bufferSize = 32
- buffer := make([]byte, bufferSize)
- for {
- n, err := r.Read(buffer)
- if n == 0 {
- if err == io.EOF {
- return false, nil // No non-whitespace content found
- }
- return false, err
- }
- if len(buffer) < n {
- panic("unreachable") // bounds check hint to compiler
- }
- // Check each byte in the buffer
- for i := range n {
- ch := buffer[i]
- // Skip whitespace characters (space, tab, newline, carriage return, etc.)
- if unicode.IsSpace(rune(ch)) {
- continue
- }
- // First non-whitespace character determines if it's JSON
- return ch == '{', nil
- }
- // If we've read less than bufferSize, we've reached EOF
- if n < bufferSize {
- return false, nil
- }
- }
- }
|