Explorar el Código

fix(parser): handle feeds with leading whitespace that exceeds buffer size

Frédéric Guillot hace 8 meses
padre
commit
54abd0a736
Se han modificado 2 ficheros con 88 adiciones y 6 borrados
  1. 35 6
      internal/reader/parser/format.go
  2. 53 0
      internal/reader/parser/format_test.go

+ 35 - 6
internal/reader/parser/format.go

@@ -4,9 +4,9 @@
 package parser // import "miniflux.app/v2/internal/reader/parser"
 
 import (
-	"bytes"
 	"encoding/xml"
 	"io"
+	"unicode"
 
 	rxml "miniflux.app/v2/internal/reader/xml"
 )
@@ -22,11 +22,7 @@ const (
 
 // DetectFeedFormat tries to guess the feed format from input data.
 func DetectFeedFormat(r io.ReadSeeker) (string, string) {
-	var dataArray = [32]byte{}
-	data := dataArray[:]
-	r.Read(data)
-
-	if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
+	if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
 		return FormatJSON, ""
 	}
 
@@ -58,3 +54,36 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) {
 
 	return FormatUnknown, ""
 }
+
+// detectJSONFormat checks if the reader contains JSON by reading until it finds
+// the first non-whitespace character or reaches EOF/error.
+func detectJSONFormat(r io.ReadSeeker) (bool, error) {
+	const bufferSize = 32
+	buffer := make([]byte, bufferSize)
+
+	for {
+		n, err := r.Read(buffer)
+		if n == 0 {
+			if err == io.EOF {
+				return false, nil // No non-whitespace content found
+			}
+			return false, err
+		}
+
+		// Check each byte in the buffer
+		for i := range n {
+			ch := buffer[i]
+			// Skip whitespace characters (space, tab, newline, carriage return, etc.)
+			if unicode.IsSpace(rune(ch)) {
+				continue
+			}
+			// First non-whitespace character determines if it's JSON
+			return ch == '{', nil
+		}
+
+		// If we've read less than bufferSize, we've reached EOF
+		if n < bufferSize {
+			return false, nil
+		}
+	}
+}

+ 53 - 0
internal/reader/parser/format_test.go

@@ -77,3 +77,56 @@ func TestDetectUnknown(t *testing.T) {
 		t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
 	}
 }
+
+func TestDetectJSONWithLargeLeadingWhitespace(t *testing.T) {
+	leadingWhitespace := strings.Repeat(" ", 10000)
+	data := leadingWhitespace + `{
+		"version" : "https://jsonfeed.org/version/1",
+		"title" : "Example with lots of leading whitespace"
+	}`
+	format, _ := DetectFeedFormat(strings.NewReader(data))
+
+	if format != FormatJSON {
+		t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
+	}
+}
+
+func TestDetectJSONWithMixedWhitespace(t *testing.T) {
+	leadingWhitespace := strings.Repeat("\n\t  ", 10000)
+	data := leadingWhitespace + `{
+		"version" : "https://jsonfeed.org/version/1",
+		"title" : "Example with mixed whitespace"
+	}`
+	format, _ := DetectFeedFormat(strings.NewReader(data))
+
+	if format != FormatJSON {
+		t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
+	}
+}
+
+func TestDetectOnlyWhitespace(t *testing.T) {
+	data := strings.Repeat(" \t\n\r", 10000)
+	format, _ := DetectFeedFormat(strings.NewReader(data))
+
+	if format != FormatUnknown {
+		t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
+	}
+}
+
+func TestDetectJSONSmallerThanBuffer(t *testing.T) {
+	data := `{"version":"1"}` // This is only 15 bytes, well below the 32-byte buffer
+	format, _ := DetectFeedFormat(strings.NewReader(data))
+
+	if format != FormatJSON {
+		t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
+	}
+}
+
+func TestDetectJSONWithWhitespaceSmallerThanBuffer(t *testing.T) {
+	data := `  {"title":"test"}  `
+	format, _ := DetectFeedFormat(strings.NewReader(data))
+
+	if format != FormatJSON {
+		t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
+	}
+}