Browse Source

Use Odysee video duration as read time

This feature works by scraping the Odysee website.

To enable it, set the FETCH_ODYSEE_WATCH_TIME environment variable to
1.
Kierán Meinhardt 3 years ago
parent
commit
3060946cc1
5 changed files with 82 additions and 0 deletions
  1. 18 0
      config/config_test.go
  2. 10 0
      config/options.go
  3. 2 0
      config/parser.go
  4. 6 0
      miniflux.1
  5. 46 0
      reader/processor/processor.go

+ 18 - 0
config/config_test.go

@@ -1598,6 +1598,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) {
 	}
 }
 
+func TestFetchOdyseeWatchTime(t *testing.T) {
+	os.Clearenv()
+	os.Setenv("FETCH_ODYSEE_WATCH_TIME", "1")
+
+	parser := NewParser()
+	opts, err := parser.ParseEnvironmentVariables()
+	if err != nil {
+		t.Fatalf(`Parsing failure: %v`, err)
+	}
+
+	expected := true
+	result := opts.FetchOdyseeWatchTime()
+
+	if result != expected {
+		t.Fatalf(`Unexpected FETCH_ODYSEE_WATCH_TIME value, got %v instead of %v`, result, expected)
+	}
+}
+
 func TestFetchYouTubeWatchTime(t *testing.T) {
 	os.Clearenv()
 	os.Setenv("FETCH_YOUTUBE_WATCH_TIME", "1")

+ 10 - 0
config/options.go

@@ -49,6 +49,7 @@ const (
 	defaultProxyOption                        = "http-only"
 	defaultProxyMediaTypes                    = "image"
 	defaultProxyUrl                           = ""
+	defaultFetchOdyseeWatchTime               = false
 	defaultFetchYouTubeWatchTime              = false
 	defaultYouTubeEmbedUrlOverride            = "https://www.youtube-nocookie.com/embed/"
 	defaultCreateAdmin                        = false
@@ -126,6 +127,7 @@ type Options struct {
 	proxyOption                        string
 	proxyMediaTypes                    []string
 	proxyUrl                           string
+	fetchOdyseeWatchTime               bool
 	fetchYouTubeWatchTime              bool
 	youTubeEmbedUrlOverride            string
 	oauth2UserCreationAllowed          bool
@@ -196,6 +198,7 @@ func NewOptions() *Options {
 		proxyOption:                        defaultProxyOption,
 		proxyMediaTypes:                    []string{defaultProxyMediaTypes},
 		proxyUrl:                           defaultProxyUrl,
+		fetchOdyseeWatchTime:               defaultFetchOdyseeWatchTime,
 		fetchYouTubeWatchTime:              defaultFetchYouTubeWatchTime,
 		youTubeEmbedUrlOverride:            defaultYouTubeEmbedUrlOverride,
 		oauth2UserCreationAllowed:          defaultOAuth2UserCreation,
@@ -436,6 +439,12 @@ func (o *Options) YouTubeEmbedUrlOverride() string {
 	return o.youTubeEmbedUrlOverride
 }
 
+// FetchOdyseeWatchTime returns true if the Odysee video duration
+// should be fetched and used as a reading time.
+func (o *Options) FetchOdyseeWatchTime() bool {
+	return o.fetchOdyseeWatchTime
+}
+
 // ProxyOption returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy.
 func (o *Options) ProxyOption() string {
 	return o.proxyOption
@@ -581,6 +590,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option {
 		"DISABLE_HTTP_SERVICE":                   !o.httpService,
 		"DISABLE_SCHEDULER_SERVICE":              !o.schedulerService,
 		"FETCH_YOUTUBE_WATCH_TIME":               o.fetchYouTubeWatchTime,
+		"FETCH_ODYSEE_WATCH_TIME":                o.fetchOdyseeWatchTime,
 		"HTTPS":                                  o.HTTPS,
 		"HTTP_CLIENT_MAX_BODY_SIZE":              o.httpClientMaxBodySize,
 		"HTTP_CLIENT_PROXY":                      o.httpClientProxy,

+ 2 - 0
config/parser.go

@@ -213,6 +213,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
 			p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
 		case "METRICS_PASSWORD_FILE":
 			p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
+		case "FETCH_ODYSEE_WATCH_TIME":
+			p.opts.fetchOdyseeWatchTime = parseBool(value, defaultFetchOdyseeWatchTime)
 		case "FETCH_YOUTUBE_WATCH_TIME":
 			p.opts.fetchYouTubeWatchTime = parseBool(value, defaultFetchYouTubeWatchTime)
 		case "YOUTUBE_EMBED_URL_OVERRIDE":

+ 6 - 0
miniflux.1

@@ -118,6 +118,12 @@ Set the value to 1 to enable debug logs\&.
 .br
 Disabled by default\&.
 .TP
+.B FETCH_ODYSEE_WATCH_TIME
+Set the value to 1 to scrape video duration from Odysee website and
+use it as a reading time\&.
+.br
+Disabled by default\&.
+.TP
 .B FETCH_YOUTUBE_WATCH_TIME
 Set the value to 1 to scrape video duration from YouTube website and
 use it as a reading time\&.

+ 46 - 0
reader/processor/processor.go

@@ -32,6 +32,7 @@ import (
 
 var (
 	youtubeRegex           = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
+	odyseeRegex            = regexp.MustCompile(`^https://odysee\.com`)
 	iso8601Regex           = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
 	customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
 )
@@ -207,6 +208,17 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
 		}
 	}
 
+	if shouldFetchOdyseeWatchTime(entry) {
+		if entryIsNew {
+			watchTime, err := fetchOdyseeWatchTime(entry.URL)
+			if err != nil {
+				logger.Error("[Processor] Unable to fetch Odysee watch time: %q => %v", entry.URL, err)
+			}
+			entry.ReadingTime = watchTime
+		} else {
+			entry.ReadingTime = store.GetReadTime(entry, feed)
+		}
+	}
 	// Handle YT error case and non-YT entries.
 	if entry.ReadingTime == 0 {
 		entry.ReadingTime = calculateReadingTime(entry.Content, user)
@@ -222,6 +234,14 @@ func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
 	return urlMatchesYouTubePattern
 }
 
+func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
+	if !config.Opts.FetchOdyseeWatchTime() {
+		return false
+	}
+	matches := odyseeRegex.FindStringSubmatch(entry.URL)
+	return matches != nil
+}
+
 func fetchYouTubeWatchTime(url string) (int, error) {
 	clt := client.NewClientWithConfig(url, config.Opts)
 	response, browserErr := browser.Exec(clt)
@@ -247,6 +267,32 @@ func fetchYouTubeWatchTime(url string) (int, error) {
 	return int(dur.Minutes()), nil
 }
 
+func fetchOdyseeWatchTime(url string) (int, error) {
+	clt := client.NewClientWithConfig(url, config.Opts)
+	response, browserErr := browser.Exec(clt)
+	if browserErr != nil {
+		return 0, browserErr
+	}
+
+	doc, docErr := goquery.NewDocumentFromReader(response.Body)
+	if docErr != nil {
+		return 0, docErr
+	}
+
+	durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
+	// durs contains video watch time in seconds
+	if !exists {
+		return 0, errors.New("duration has not found")
+	}
+
+	dur, err := strconv.ParseInt(durs, 10, 64)
+	if err != nil {
+		return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
+	}
+
+	return int(dur / 60), nil
+}
+
 // parseISO8601 parses an ISO 8601 duration string.
 func parseISO8601(from string) (time.Duration, error) {
 	var match []string