Bladeren bron

Calculate reading time during feed processing

The goal is to speed up the user interface.

Detecting the language based on the content is pretty slow.
Frédéric Guillot 5 jaren geleden
bovenliggende
commit
de7a613098

+ 15 - 14
client/core.go

@@ -129,20 +129,21 @@ type Feeds []*Feed
 
 // Entry represents a subscription item in the system.
 type Entry struct {
-	ID         int64      `json:"id"`
-	UserID     int64      `json:"user_id"`
-	FeedID     int64      `json:"feed_id"`
-	Status     string     `json:"status"`
-	Hash       string     `json:"hash"`
-	Title      string     `json:"title"`
-	URL        string     `json:"url"`
-	Date       time.Time  `json:"published_at"`
-	Content    string     `json:"content"`
-	Author     string     `json:"author"`
-	ShareCode  string     `json:"share_code"`
-	Starred    bool       `json:"starred"`
-	Enclosures Enclosures `json:"enclosures,omitempty"`
-	Feed       *Feed      `json:"feed,omitempty"`
+	ID          int64      `json:"id"`
+	UserID      int64      `json:"user_id"`
+	FeedID      int64      `json:"feed_id"`
+	Status      string     `json:"status"`
+	Hash        string     `json:"hash"`
+	Title       string     `json:"title"`
+	URL         string     `json:"url"`
+	Date        time.Time  `json:"published_at"`
+	Content     string     `json:"content"`
+	Author      string     `json:"author"`
+	ShareCode   string     `json:"share_code"`
+	Starred     bool       `json:"starred"`
+	ReadingTime int        `json:"reading_time"`
+	Enclosures  Enclosures `json:"enclosures,omitempty"`
+	Feed        *Feed      `json:"feed,omitempty"`
 }
 
 // Entries represents a list of entries.

+ 1 - 1
database/migration.go

@@ -12,7 +12,7 @@ import (
 	"miniflux.app/logger"
 )
 
-const schemaVersion = 40
+const schemaVersion = 41
 
 // Migrate executes database migrations.
 func Migrate(db *sql.DB) {

+ 2 - 0
database/sql.go

@@ -203,6 +203,7 @@ alter table users add column entry_direction entry_sorting_direction default 'as
     add column keeplist_rules text not null default ''
 ;
 `,
+	"schema_version_41": `alter table entries add column reading_time int not null default 0;`,
 	"schema_version_5": `create table integrations (
     user_id int not null,
     pinboard_enabled bool default 'f',
@@ -264,6 +265,7 @@ var SqlMapChecksums = map[string]string{
 	"schema_version_39": "b0f90b97502921d4681a07c64d180a91a0b4ccac7d3c1dbe30519ad6f1bf1737",
 	"schema_version_4":  "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
 	"schema_version_40": "6a8fec92399f853ed6817aff4cfa43255dce4c19afad796e41519d09de62105e",
+	"schema_version_41": "128e118ce61267ea1f6ae03b63a6d4734eae87e520b00e309ad083f1f6afdfe5",
 	"schema_version_5":  "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
 	"schema_version_6":  "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
 	"schema_version_7":  "33f298c9aa30d6de3ca28e1270df51c2884d7596f1283a75716e2aeb634cd05c",

+ 1 - 0
database/sql/schema_version_41.sql

@@ -0,0 +1 @@
+alter table entries add column reading_time int not null default 0;

+ 1 - 0
model/entry.go

@@ -33,6 +33,7 @@ type Entry struct {
 	Author      string        `json:"author"`
 	ShareCode   string        `json:"share_code"`
 	Starred     bool          `json:"starred"`
+	ReadingTime int           `json:"reading_time"`
 	Enclosures  EnclosureList `json:"enclosures,omitempty"`
 	Feed        *Feed         `json:"feed,omitempty"`
 }

+ 22 - 0
reader/processor/processor.go

@@ -5,8 +5,11 @@
 package processor
 
 import (
+	"math"
 	"regexp"
+	"strings"
 	"time"
+	"unicode/utf8"
 
 	"miniflux.app/config"
 	"miniflux.app/logger"
@@ -16,6 +19,8 @@ import (
 	"miniflux.app/reader/sanitizer"
 	"miniflux.app/reader/scraper"
 	"miniflux.app/storage"
+
+	"github.com/rylans/getlang"
 )
 
 // ProcessFeedEntries downloads original web page for entries and apply filters.
@@ -58,6 +63,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
 		// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
 		entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
 
+		entry.ReadingTime = calculateReadingTime(entry.Content)
 		filteredEntries = append(filteredEntries, entry)
 	}
 
@@ -108,7 +114,23 @@ func ProcessEntryWebPage(entry *model.Entry) error {
 
 	if content != "" {
 		entry.Content = content
+		entry.ReadingTime = calculateReadingTime(content)
 	}
 
 	return nil
 }
+
+func calculateReadingTime(content string) int {
+	sanitizedContent := sanitizer.StripTags(content)
+	languageInfo := getlang.FromString(sanitizedContent)
+
+	var timeToReadInt int
+	if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
+		timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
+	} else {
+		nbOfWords := len(strings.Fields(sanitizedContent))
+		timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
+	}
+
+	return timeToReadInt
+}

+ 35 - 6
storage/entry.go

@@ -75,11 +75,11 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
 		UPDATE
 			entries
 		SET
-			content=$1
+			content=$1, reading_time=$2
 		WHERE
-			id=$2 AND user_id=$3
+			id=$3 AND user_id=$4
 	`
-	_, err = tx.Exec(query, entry.Content, entry.ID, entry.UserID)
+	_, err = tx.Exec(query, entry.Content, entry.ReadingTime, entry.ID, entry.UserID)
 	if err != nil {
 		tx.Rollback()
 		return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
@@ -106,9 +106,35 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
 func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
 	query := `
 		INSERT INTO entries
-			(title, hash, url, comments_url, published_at, content, author, user_id, feed_id, changed_at, document_vectors)
+			(
+				title,
+				hash,
+				url,
+				comments_url,
+				published_at,
+				content,
+				author,
+				user_id,
+				feed_id,
+				reading_time,
+				changed_at,
+				document_vectors
+			)
 		VALUES
-			($1, $2, $3, $4, $5, $6, $7, $8, $9, now(), setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B'))
+			(
+				$1,
+				$2,
+				$3,
+				$4,
+				$5,
+				$6,
+				$7,
+				$8,
+				$9,
+				$10,
+				now(),
+				setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B')
+			)
 		RETURNING
 			id, status
 	`
@@ -123,6 +149,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
 		entry.Author,
 		entry.UserID,
 		entry.FeedID,
+		entry.ReadingTime,
 	).Scan(&entry.ID, &entry.Status)
 
 	if err != nil {
@@ -154,9 +181,10 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
 			comments_url=$3,
 			content=$4,
 			author=$5,
+			reading_time=$6,
 			document_vectors = setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($4, '') for 1000000)), 'B')
 		WHERE
-			user_id=$6 AND feed_id=$7 AND hash=$8
+			user_id=$7 AND feed_id=$8 AND hash=$9
 		RETURNING
 			id
 	`
@@ -167,6 +195,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
 		entry.CommentsURL,
 		entry.Content,
 		entry.Author,
+		entry.ReadingTime,
 		entry.UserID,
 		entry.FeedID,
 		entry.Hash,

+ 2 - 0
storage/entry_query_builder.go

@@ -226,6 +226,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
 			e.content,
 			e.status,
 			e.starred,
+			e.reading_time,
 			f.title as feed_title,
 			f.feed_url,
 			f.site_url,
@@ -284,6 +285,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
 			&entry.Content,
 			&entry.Status,
 			&entry.Starred,
+			&entry.ReadingTime,
 			&entry.Feed.Title,
 			&entry.Feed.FeedURL,
 			&entry.Feed.SiteURL,

+ 3 - 3
template/common.go

@@ -242,10 +242,10 @@ SOFTWARE.
         <li>
             <time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
         </li>
-        {{ if .user.ShowReadingTime }}
+        {{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
         <li>
             <span>
-            {{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
+            {{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
             </span>
         </li>
         {{ end }}
@@ -523,7 +523,7 @@ var templateCommonMapChecksums = map[string]string{
 	"feed_list":        "931e43d328a116318c510de5658c688cd940b934c86b6ec82a472e1f81e020ae",
 	"feed_menu":        "318d8662dda5ca9dfc75b909c8461e79c86fb5082df1428f67aaf856f19f4b50",
 	"icons":            "9a41753778072f286216085d8712495e2ccca20c7a24f5c982775436a3d38579",
-	"item_meta":        "eb72c6e2a924759af20b8ef41f2ce7495aedc053181c2e5ca1b063f9410c58b0",
+	"item_meta":        "56ab09d7dd46eeb2e2ee11ddcec0c157a5832c896dbd2887d9e2b013680b2af6",
 	"layout":           "65767e7dbebe1f7ed42895ecd5a737b0693e4a2ec35e84e3e391f462beb11977",
 	"pagination":       "7b61288e86283c4cf0dc83bcbf8bf1c00c7cb29e60201c8c0b633b2450d2911f",
 	"settings_menu":    "e2b777630c0efdbc529800303c01d6744ed3af80ec505ac5a5b3f99c9b989156",

+ 0 - 3
template/engine.go

@@ -65,9 +65,6 @@ func (e *Engine) Render(name, language string, data interface{}) []byte {
 		"plural": func(key string, n int, args ...interface{}) string {
 			return printer.Plural(key, n, args...)
 		},
-		"timeToRead": func(content string) int {
-			return timeToRead(content)
-		},
 	})
 
 	var b bytes.Buffer

+ 0 - 21
template/functions.go

@@ -11,19 +11,16 @@ import (
 	"net/mail"
 	"strings"
 	"time"
-	"unicode/utf8"
 
 	"miniflux.app/config"
 	"miniflux.app/http/route"
 	"miniflux.app/locale"
 	"miniflux.app/model"
 	"miniflux.app/proxy"
-	"miniflux.app/reader/sanitizer"
 	"miniflux.app/timezone"
 	"miniflux.app/url"
 
 	"github.com/gorilla/mux"
-	"github.com/rylans/getlang"
 )
 
 type funcMap struct {
@@ -94,9 +91,6 @@ func (f *funcMap) Map() template.FuncMap {
 		"plural": func(key string, n int, args ...interface{}) string {
 			return ""
 		},
-		"timeToRead": func(content string) int {
-			return 0
-		},
 	}
 }
 
@@ -195,18 +189,3 @@ func formatFileSize(b int64) string {
 	return fmt.Sprintf("%.1f %ciB",
 		float64(b)/float64(div), "KMGTPE"[exp])
 }
-
-func timeToRead(content string) int {
-	sanitizedContent := sanitizer.StripTags(content)
-	languageInfo := getlang.FromString(sanitizedContent)
-
-	var timeToReadInt int
-	if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
-		timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
-	} else {
-		nbOfWords := len(strings.Fields(sanitizedContent))
-		timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
-	}
-
-	return timeToReadInt
-}

+ 2 - 2
template/html/common/item_meta.html

@@ -7,10 +7,10 @@
         <li>
             <time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
         </li>
-        {{ if .user.ShowReadingTime }}
+        {{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
         <li>
             <span>
-            {{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
+            {{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
             </span>
         </li>
         {{ end }}