ソースを参照

perf(parser): don't process the whole page to detect its format

There is no need to process the whole page to guess its format: if we can't
find a format indicator in the first 50 xml tokens, odds are that we won't find
it at all.

This should save some time when trying to find a feed, as this function is
called a handful of times on various pages.
jvoisin 1 ヶ月 前
コミット
8d19529948
1 ファイル変更5 行追加1 行削除
  1. 5 1
      internal/reader/parser/format.go

+ 5 - 1
internal/reader/parser/format.go

@@ -20,6 +20,8 @@ const (
 	FormatUnknown = "unknown"
 )
 
+const maxTokensToConsider = uint(50)
+
 // DetectFeedFormat tries to guess the feed format from input data.
 func DetectFeedFormat(r io.ReadSeeker) (string, string) {
 	r.Seek(0, io.SeekStart)
@@ -32,11 +34,13 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) {
 	r.Seek(0, io.SeekStart)
 	decoder := rxml.NewXMLDecoder(r)
 
+	processedTokens := uint(0)
 	for {
 		token, _ := decoder.Token()
-		if token == nil {
+		if token == nil || processedTokens == maxTokensToConsider {
 			break
 		}
+		processedTokens += 1
 
 		if element, ok := token.(xml.StartElement); ok {
 			switch element.Name.Local {