Преглед изворни кода

Calculate reading time during feed processing

The goal is to speed up the user interface.

Detecting the language based on the content is pretty slow.
Frédéric Guillot пре 5 година
родитељ
комит
de7a613098

+ 15 - 14
client/core.go

@@ -129,20 +129,21 @@ type Feeds []*Feed
 
 // Entry represents a subscription item in the system.
 type Entry struct {
-	ID         int64      `json:"id"`
-	UserID     int64      `json:"user_id"`
-	FeedID     int64      `json:"feed_id"`
-	Status     string     `json:"status"`
-	Hash       string     `json:"hash"`
-	Title      string     `json:"title"`
-	URL        string     `json:"url"`
-	Date       time.Time  `json:"published_at"`
-	Content    string     `json:"content"`
-	Author     string     `json:"author"`
-	ShareCode  string     `json:"share_code"`
-	Starred    bool       `json:"starred"`
-	Enclosures Enclosures `json:"enclosures,omitempty"`
-	Feed       *Feed      `json:"feed,omitempty"`
+	ID          int64      `json:"id"`
+	UserID      int64      `json:"user_id"`
+	FeedID      int64      `json:"feed_id"`
+	Status      string     `json:"status"`
+	Hash        string     `json:"hash"`
+	Title       string     `json:"title"`
+	URL         string     `json:"url"`
+	Date        time.Time  `json:"published_at"`
+	Content     string     `json:"content"`
+	Author      string     `json:"author"`
+	ShareCode   string     `json:"share_code"`
+	Starred     bool       `json:"starred"`
+	ReadingTime int        `json:"reading_time"`
+	Enclosures  Enclosures `json:"enclosures,omitempty"`
+	Feed        *Feed      `json:"feed,omitempty"`
 }
 
 // Entries represents a list of entries.

+ 1 - 1
database/migration.go

@@ -12,7 +12,7 @@ import (
 	"miniflux.app/logger"
 )
 
-const schemaVersion = 40
+const schemaVersion = 41
 
 // Migrate executes database migrations.
 func Migrate(db *sql.DB) {

+ 2 - 0
database/sql.go

@@ -203,6 +203,7 @@ alter table users add column entry_direction entry_sorting_direction default 'as
     add column keeplist_rules text not null default ''
 ;
 `,
+	"schema_version_41": `alter table entries add column reading_time int not null default 0;`,
 	"schema_version_5": `create table integrations (
     user_id int not null,
     pinboard_enabled bool default 'f',
@@ -264,6 +265,7 @@ var SqlMapChecksums = map[string]string{
 	"schema_version_39": "b0f90b97502921d4681a07c64d180a91a0b4ccac7d3c1dbe30519ad6f1bf1737",
 	"schema_version_4":  "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
 	"schema_version_40": "6a8fec92399f853ed6817aff4cfa43255dce4c19afad796e41519d09de62105e",
+	"schema_version_41": "128e118ce61267ea1f6ae03b63a6d4734eae87e520b00e309ad083f1f6afdfe5",
 	"schema_version_5":  "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
 	"schema_version_6":  "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
 	"schema_version_7":  "33f298c9aa30d6de3ca28e1270df51c2884d7596f1283a75716e2aeb634cd05c",

+ 1 - 0
database/sql/schema_version_41.sql

@@ -0,0 +1 @@
+alter table entries add column reading_time int not null default 0;

+ 1 - 0
model/entry.go

@@ -33,6 +33,7 @@ type Entry struct {
 	Author      string        `json:"author"`
 	ShareCode   string        `json:"share_code"`
 	Starred     bool          `json:"starred"`
+	ReadingTime int           `json:"reading_time"`
 	Enclosures  EnclosureList `json:"enclosures,omitempty"`
 	Feed        *Feed         `json:"feed,omitempty"`
 }

+ 22 - 0
reader/processor/processor.go

@@ -5,8 +5,11 @@
 package processor
 
 import (
+	"math"
 	"regexp"
+	"strings"
 	"time"
+	"unicode/utf8"
 
 	"miniflux.app/config"
 	"miniflux.app/logger"
@@ -16,6 +19,8 @@ import (
 	"miniflux.app/reader/sanitizer"
 	"miniflux.app/reader/scraper"
 	"miniflux.app/storage"
+
+	"github.com/rylans/getlang"
 )
 
 // ProcessFeedEntries downloads original web page for entries and apply filters.
@@ -58,6 +63,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
 		// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
 		entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
 
+		entry.ReadingTime = calculateReadingTime(entry.Content)
 		filteredEntries = append(filteredEntries, entry)
 	}
 
@@ -108,7 +114,23 @@ func ProcessEntryWebPage(entry *model.Entry) error {
 
 	if content != "" {
 		entry.Content = content
+		entry.ReadingTime = calculateReadingTime(content)
 	}
 
 	return nil
 }
+
+func calculateReadingTime(content string) int {
+	sanitizedContent := sanitizer.StripTags(content)
+	languageInfo := getlang.FromString(sanitizedContent)
+
+	var timeToReadInt int
+	if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
+		timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
+	} else {
+		nbOfWords := len(strings.Fields(sanitizedContent))
+		timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
+	}
+
+	return timeToReadInt
+}

+ 35 - 6
storage/entry.go

@@ -75,11 +75,11 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
 		UPDATE
 			entries
 		SET
-			content=$1
+			content=$1, reading_time=$2
 		WHERE
-			id=$2 AND user_id=$3
+			id=$3 AND user_id=$4
 	`
-	_, err = tx.Exec(query, entry.Content, entry.ID, entry.UserID)
+	_, err = tx.Exec(query, entry.Content, entry.ReadingTime, entry.ID, entry.UserID)
 	if err != nil {
 		tx.Rollback()
 		return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
@@ -106,9 +106,35 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
 func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
 	query := `
 		INSERT INTO entries
-			(title, hash, url, comments_url, published_at, content, author, user_id, feed_id, changed_at, document_vectors)
+			(
+				title,
+				hash,
+				url,
+				comments_url,
+				published_at,
+				content,
+				author,
+				user_id,
+				feed_id,
+				reading_time,
+				changed_at,
+				document_vectors
+			)
 		VALUES
-			($1, $2, $3, $4, $5, $6, $7, $8, $9, now(), setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B'))
+			(
+				$1,
+				$2,
+				$3,
+				$4,
+				$5,
+				$6,
+				$7,
+				$8,
+				$9,
+				$10,
+				now(),
+				setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B')
+			)
 		RETURNING
 			id, status
 	`
@@ -123,6 +149,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
 		entry.Author,
 		entry.UserID,
 		entry.FeedID,
+		entry.ReadingTime,
 	).Scan(&entry.ID, &entry.Status)
 
 	if err != nil {
@@ -154,9 +181,10 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
 			comments_url=$3,
 			content=$4,
 			author=$5,
+			reading_time=$6,
 			document_vectors = setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($4, '') for 1000000)), 'B')
 		WHERE
-			user_id=$6 AND feed_id=$7 AND hash=$8
+			user_id=$7 AND feed_id=$8 AND hash=$9
 		RETURNING
 			id
 	`
@@ -167,6 +195,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
 		entry.CommentsURL,
 		entry.Content,
 		entry.Author,
+		entry.ReadingTime,
 		entry.UserID,
 		entry.FeedID,
 		entry.Hash,

+ 2 - 0
storage/entry_query_builder.go

@@ -226,6 +226,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
 			e.content,
 			e.status,
 			e.starred,
+			e.reading_time,
 			f.title as feed_title,
 			f.feed_url,
 			f.site_url,
@@ -284,6 +285,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
 			&entry.Content,
 			&entry.Status,
 			&entry.Starred,
+			&entry.ReadingTime,
 			&entry.Feed.Title,
 			&entry.Feed.FeedURL,
 			&entry.Feed.SiteURL,

+ 3 - 3
template/common.go

@@ -242,10 +242,10 @@ SOFTWARE.
         <li>
             <time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
         </li>
-        {{ if .user.ShowReadingTime }}
+        {{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
         <li>
             <span>
-            {{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
+            {{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
             </span>
         </li>
         {{ end }}
@@ -523,7 +523,7 @@ var templateCommonMapChecksums = map[string]string{
 	"feed_list":        "931e43d328a116318c510de5658c688cd940b934c86b6ec82a472e1f81e020ae",
 	"feed_menu":        "318d8662dda5ca9dfc75b909c8461e79c86fb5082df1428f67aaf856f19f4b50",
 	"icons":            "9a41753778072f286216085d8712495e2ccca20c7a24f5c982775436a3d38579",
-	"item_meta":        "eb72c6e2a924759af20b8ef41f2ce7495aedc053181c2e5ca1b063f9410c58b0",
+	"item_meta":        "56ab09d7dd46eeb2e2ee11ddcec0c157a5832c896dbd2887d9e2b013680b2af6",
 	"layout":           "65767e7dbebe1f7ed42895ecd5a737b0693e4a2ec35e84e3e391f462beb11977",
 	"pagination":       "7b61288e86283c4cf0dc83bcbf8bf1c00c7cb29e60201c8c0b633b2450d2911f",
 	"settings_menu":    "e2b777630c0efdbc529800303c01d6744ed3af80ec505ac5a5b3f99c9b989156",

+ 0 - 3
template/engine.go

@@ -65,9 +65,6 @@ func (e *Engine) Render(name, language string, data interface{}) []byte {
 		"plural": func(key string, n int, args ...interface{}) string {
 			return printer.Plural(key, n, args...)
 		},
-		"timeToRead": func(content string) int {
-			return timeToRead(content)
-		},
 	})
 
 	var b bytes.Buffer

+ 0 - 21
template/functions.go

@@ -11,19 +11,16 @@ import (
 	"net/mail"
 	"strings"
 	"time"
-	"unicode/utf8"
 
 	"miniflux.app/config"
 	"miniflux.app/http/route"
 	"miniflux.app/locale"
 	"miniflux.app/model"
 	"miniflux.app/proxy"
-	"miniflux.app/reader/sanitizer"
 	"miniflux.app/timezone"
 	"miniflux.app/url"
 
 	"github.com/gorilla/mux"
-	"github.com/rylans/getlang"
 )
 
 type funcMap struct {
@@ -94,9 +91,6 @@ func (f *funcMap) Map() template.FuncMap {
 		"plural": func(key string, n int, args ...interface{}) string {
 			return ""
 		},
-		"timeToRead": func(content string) int {
-			return 0
-		},
 	}
 }
 
@@ -195,18 +189,3 @@ func formatFileSize(b int64) string {
 	return fmt.Sprintf("%.1f %ciB",
 		float64(b)/float64(div), "KMGTPE"[exp])
 }
-
-func timeToRead(content string) int {
-	sanitizedContent := sanitizer.StripTags(content)
-	languageInfo := getlang.FromString(sanitizedContent)
-
-	var timeToReadInt int
-	if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
-		timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
-	} else {
-		nbOfWords := len(strings.Fields(sanitizedContent))
-		timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
-	}
-
-	return timeToReadInt
-}

+ 2 - 2
template/html/common/item_meta.html

@@ -7,10 +7,10 @@
         <li>
             <time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
         </li>
-        {{ if .user.ShowReadingTime }}
+        {{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
         <li>
             <span>
-            {{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
+            {{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
             </span>
         </li>
         {{ end }}