Browse Source

reader/processor: minimize the feed's entries html

Compress the html of feed entries before storing it. This should reduce the
size of the database a bit, but more importantly, reduce the amount of data
sent to clients

minify being [stupidly fast](https://github.com/tdewolff/minify/?tab=readme-ov-file#performance), the performance impact should be in the noise level.
jvoisin 2 years ago
parent
commit
b205b5aad0
1 changed files with 18 additions and 2 deletions
  1. 18 2
      internal/reader/processor/processor.go

+ 18 - 2
internal/reader/processor/processor.go

@@ -23,6 +23,8 @@ import (
 	"miniflux.app/v2/internal/storage"
 
 	"github.com/PuerkitoBio/goquery"
+	"github.com/tdewolff/minify/v2"
+	"github.com/tdewolff/minify/v2/html"
 )
 
 var (
@@ -36,6 +38,9 @@ var (
 func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
 	var filteredEntries model.Entries
 
+	minifier := minify.New()
+	minifier.AddFunc("text/html", html.Minify)
+
 	// Process older entries first
 	for i := len(feed.Entries) - 1; i >= 0; i-- {
 		entry := feed.Entries[i]
@@ -102,7 +107,11 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
 				)
 			} else if content != "" {
 				// We replace the entry content only if the scraper doesn't return any error.
-				entry.Content = content
+				if minifiedHTML, err := minifier.String("text/html", content); err == nil {
+					entry.Content = minifiedHTML
+				} else {
+					entry.Content = content
+				}
 			}
 		}
 
@@ -180,6 +189,9 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
 
 // ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
 func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
+	minifier := minify.New()
+	minifier.AddFunc("text/html", html.Minify)
+
 	startTime := time.Now()
 	websiteURL := getUrlFromEntry(feed, entry)
 
@@ -211,7 +223,11 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
 	}
 
 	if content != "" {
-		entry.Content = content
+		if minifiedHTML, err := minifier.String("text/html", content); err == nil {
+			entry.Content = minifiedHTML
+		} else {
+			entry.Content = content
+		}
 		if user.ShowReadingTime {
 			entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
 		}