Browse Source

perf(subscription): don't read the body twice

Since FindSubscriptions is reading the Body in a []byte, there is no need to
wrap it into a Reader for NewCharsetReader to ReadAll into another buffer: we
can simply pass the []byte instead. For some large webpages, this might shave
off two allocation-deallocation of a handful of megabytes when looking for
subscriptions.
Julien Voisin 2 months ago
parent
commit
6f3c92db74

+ 4 - 0
internal/reader/encoding/encoding.go

@@ -48,6 +48,10 @@ func NewCharsetReader(r io.Reader, contentType string) (io.Reader, error) {
 		return nil, fmt.Errorf(`encoding: unable to read input: %w`, err)
 	}
 
+	return NewCharsetReaderFromBytes(buffer, contentType)
+}
+
+func NewCharsetReaderFromBytes(buffer []byte, contentType string) (io.Reader, error) {
 	internalReader := bytes.NewReader(buffer)
 
 	// The document is already UTF-8, do not do anything.

+ 6 - 7
internal/reader/subscription/finder.go

@@ -5,7 +5,6 @@ package subscription // import "miniflux.app/v2/internal/reader/subscription"
 
 import (
 	"bytes"
-	"io"
 	"log/slog"
 	"net/url"
 	"strings"
@@ -71,7 +70,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
 
 	// Step 2) Find the canonical URL of the website.
 	slog.Debug("Try to find the canonical URL of the website", slog.String("website_url", websiteURL))
-	websiteURL = f.findCanonicalURL(websiteURL, responseHandler.ContentType(), bytes.NewReader(responseBody))
+	websiteURL = f.findCanonicalURL(websiteURL, responseHandler.ContentType(), responseBody)
 
 	// Step 3) Check if the website URL is a YouTube channel.
 	slog.Debug("Try to detect feeds for a YouTube page", slog.String("website_url", websiteURL))
@@ -87,7 +86,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
 		slog.String("website_url", websiteURL),
 		slog.String("content_type", responseHandler.ContentType()),
 	)
-	if subscriptions, localizedError := f.findSubscriptionsFromWebPage(websiteURL, responseHandler.ContentType(), bytes.NewReader(responseBody)); localizedError != nil {
+	if subscriptions, localizedError := f.findSubscriptionsFromWebPage(websiteURL, responseHandler.ContentType(), responseBody); localizedError != nil {
 		return nil, localizedError
 	} else if len(subscriptions) > 0 {
 		slog.Debug("Subscriptions found from web page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
@@ -117,14 +116,14 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
 	return nil, nil
 }
 
-func (f *subscriptionFinder) findSubscriptionsFromWebPage(websiteURL, contentType string, body io.Reader) (Subscriptions, *locale.LocalizedErrorWrapper) {
+func (f *subscriptionFinder) findSubscriptionsFromWebPage(websiteURL, contentType string, body []byte) (Subscriptions, *locale.LocalizedErrorWrapper) {
 	queries := map[string]string{
 		"link[type='application/rss+xml']":                                  parser.FormatRSS,
 		"link[type='application/atom+xml']":                                 parser.FormatAtom,
 		"link[type='application/json'], link[type='application/feed+json']": parser.FormatJSON,
 	}
 
-	htmlDocumentReader, err := encoding.NewCharsetReader(body, contentType)
+	htmlDocumentReader, err := encoding.NewCharsetReaderFromBytes(body, contentType)
 	if err != nil {
 		return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_parse_html_document", err)
 	}
@@ -323,8 +322,8 @@ func (f *subscriptionFinder) findSubscriptionsFromYouTube(websiteURL string) (Su
 
 // findCanonicalURL extracts the canonical URL from the HTML <link rel="canonical"> tag.
 // Returns the canonical URL if found, otherwise returns the effective URL.
-func (f *subscriptionFinder) findCanonicalURL(effectiveURL, contentType string, body io.Reader) string {
-	htmlDocumentReader, err := encoding.NewCharsetReader(body, contentType)
+func (f *subscriptionFinder) findCanonicalURL(effectiveURL, contentType string, body []byte) string {
+	htmlDocumentReader, err := encoding.NewCharsetReaderFromBytes(body, contentType)
 	if err != nil {
 		return effectiveURL
 	}

+ 12 - 13
internal/reader/subscription/finder_test.go

@@ -4,7 +4,6 @@
 package subscription
 
 import (
-	"strings"
 	"testing"
 )
 
@@ -120,7 +119,7 @@ func TestParseWebPageWithRssFeed(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -153,7 +152,7 @@ func TestParseWebPageWithAtomFeed(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -186,7 +185,7 @@ func TestParseWebPageWithJSONFeed(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -219,7 +218,7 @@ func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -252,7 +251,7 @@ func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -285,7 +284,7 @@ func TestParseWebPageWithEmptyTitle(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -319,7 +318,7 @@ func TestParseWebPageWithMultipleFeeds(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -341,7 +340,7 @@ func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -374,7 +373,7 @@ func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -395,7 +394,7 @@ func TestParseWebPageWithNoHref(t *testing.T) {
 		</body>
 	</html>`
 
-	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
 	if err != nil {
 		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
 	}
@@ -416,7 +415,7 @@ func TestFindCanonicalURL(t *testing.T) {
 		</body>
 	</html>`
 
-	canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", strings.NewReader(htmlPage))
+	canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", []byte(htmlPage))
 	if canonicalURL != "https://example.org/canonical-page" {
 		t.Errorf(`Unexpected canonical URL, got %q, expected %q`, canonicalURL, "https://example.org/canonical-page")
 	}
@@ -432,7 +431,7 @@ func TestFindCanonicalURLNotFound(t *testing.T) {
 		</body>
 	</html>`
 
-	canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", strings.NewReader(htmlPage))
+	canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", []byte(htmlPage))
 	if canonicalURL != "https://example.org/page" {
 		t.Errorf(`Expected effective URL when canonical not found, got %q`, canonicalURL)
 	}