4
0
Эх сурвалжийг харах

options: add FETCH_NEBULA_WATCH_TIME

fin444 1 жил өмнө
parent
commit
a631bd527d

+ 18 - 0
internal/config/config_test.go

@@ -2021,6 +2021,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) {
 	}
 }
 
+func TestFetchNebulaWatchTime(t *testing.T) {
+	os.Clearenv()
+	os.Setenv("FETCH_NEBULA_WATCH_TIME", "1")
+
+	parser := NewParser()
+	opts, err := parser.ParseEnvironmentVariables()
+	if err != nil {
+		t.Fatalf(`Parsing failure: %v`, err)
+	}
+
+	expected := true
+	result := opts.FetchNebulaWatchTime()
+
+	if result != expected {
+		t.Fatalf(`Unexpected FETCH_NEBULA_WATCH_TIME value, got %v instead of %v`, result, expected)
+	}
+}
+
 func TestFetchOdyseeWatchTime(t *testing.T) {
 	os.Clearenv()
 	os.Setenv("FETCH_ODYSEE_WATCH_TIME", "1")

+ 10 - 0
internal/config/options.go

@@ -56,6 +56,7 @@ const (
 	defaultMediaResourceTypes                 = "image"
 	defaultMediaProxyURL                      = ""
 	defaultFilterEntryMaxAgeDays              = 0
+	defaultFetchNebulaWatchTime               = false
 	defaultFetchOdyseeWatchTime               = false
 	defaultFetchYouTubeWatchTime              = false
 	defaultYouTubeEmbedUrlOverride            = "https://www.youtube-nocookie.com/embed/"
@@ -140,6 +141,7 @@ type Options struct {
 	mediaProxyMode                     string
 	mediaProxyResourceTypes            []string
 	mediaProxyCustomURL                string
+	fetchNebulaWatchTime               bool
 	fetchOdyseeWatchTime               bool
 	fetchYouTubeWatchTime              bool
 	filterEntryMaxAgeDays              int
@@ -216,6 +218,7 @@ func NewOptions() *Options {
 		mediaProxyResourceTypes:            []string{defaultMediaResourceTypes},
 		mediaProxyCustomURL:                defaultMediaProxyURL,
 		filterEntryMaxAgeDays:              defaultFilterEntryMaxAgeDays,
+		fetchNebulaWatchTime:               defaultFetchNebulaWatchTime,
 		fetchOdyseeWatchTime:               defaultFetchOdyseeWatchTime,
 		fetchYouTubeWatchTime:              defaultFetchYouTubeWatchTime,
 		youTubeEmbedUrlOverride:            defaultYouTubeEmbedUrlOverride,
@@ -486,6 +489,12 @@ func (o *Options) YouTubeEmbedUrlOverride() string {
 	return o.youTubeEmbedUrlOverride
 }
 
+// FetchNebulaWatchTime returns true if the Nebula video duration
+// should be fetched and used as a reading time.
+func (o *Options) FetchNebulaWatchTime() bool {
+	return o.fetchNebulaWatchTime
+}
+
 // FetchOdyseeWatchTime returns true if the Odysee video duration
 // should be fetched and used as a reading time.
 func (o *Options) FetchOdyseeWatchTime() bool {
@@ -647,6 +656,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option {
 		"DISABLE_SCHEDULER_SERVICE":              !o.schedulerService,
 		"FILTER_ENTRY_MAX_AGE_DAYS":              o.filterEntryMaxAgeDays,
 		"FETCH_YOUTUBE_WATCH_TIME":               o.fetchYouTubeWatchTime,
+		"FETCH_NEBULA_WATCH_TIME":                o.fetchNebulaWatchTime,
 		"FETCH_ODYSEE_WATCH_TIME":                o.fetchOdyseeWatchTime,
 		"HTTPS":                                  o.HTTPS,
 		"HTTP_CLIENT_MAX_BODY_SIZE":              o.httpClientMaxBodySize,

+ 2 - 0
internal/config/parser.go

@@ -259,6 +259,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
 			p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
 		case "METRICS_PASSWORD_FILE":
 			p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
+		case "FETCH_NEBULA_WATCH_TIME":
+			p.opts.fetchNebulaWatchTime = parseBool(value, defaultFetchNebulaWatchTime)
 		case "FETCH_ODYSEE_WATCH_TIME":
 			p.opts.fetchOdyseeWatchTime = parseBool(value, defaultFetchOdyseeWatchTime)
 		case "FETCH_YOUTUBE_WATCH_TIME":

+ 60 - 0
internal/reader/processor/processor.go

@@ -29,6 +29,7 @@ import (
 
 var (
 	youtubeRegex           = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
+	nebulaRegex            = regexp.MustCompile(`^https://nebula\.tv`)
 	odyseeRegex            = regexp.MustCompile(`^https://odysee\.com`)
 	iso8601Regex           = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
 	customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
@@ -277,6 +278,25 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
 		}
 	}
 
+	if shouldFetchNebulaWatchTime(entry) {
+		if entryIsNew {
+			watchTime, err := fetchNebulaWatchTime(entry.URL)
+			if err != nil {
+				slog.Warn("Unable to fetch Nebula watch time",
+					slog.Int64("user_id", user.ID),
+					slog.Int64("entry_id", entry.ID),
+					slog.String("entry_url", entry.URL),
+					slog.Int64("feed_id", feed.ID),
+					slog.String("feed_url", feed.FeedURL),
+					slog.Any("error", err),
+				)
+			}
+			entry.ReadingTime = watchTime
+		} else {
+			entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
+		}
+	}
+
 	if shouldFetchOdyseeWatchTime(entry) {
 		if entryIsNew {
 			watchTime, err := fetchOdyseeWatchTime(entry.URL)
@@ -311,6 +331,14 @@ func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
 	return urlMatchesYouTubePattern
 }
 
+func shouldFetchNebulaWatchTime(entry *model.Entry) bool {
+	if !config.Opts.FetchNebulaWatchTime() {
+		return false
+	}
+	matches := nebulaRegex.FindStringSubmatch(entry.URL)
+	return matches != nil
+}
+
 func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
 	if !config.Opts.FetchOdyseeWatchTime() {
 		return false
@@ -350,6 +378,38 @@ func fetchYouTubeWatchTime(websiteURL string) (int, error) {
 	return int(dur.Minutes()), nil
 }
 
+func fetchNebulaWatchTime(websiteURL string) (int, error) {
+	requestBuilder := fetcher.NewRequestBuilder()
+	requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
+	requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
+
+	responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
+	defer responseHandler.Close()
+
+	if localizedError := responseHandler.LocalizedError(); localizedError != nil {
+		slog.Warn("Unable to fetch Nebula watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
+		return 0, localizedError.Error()
+	}
+
+	doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
+	if docErr != nil {
+		return 0, docErr
+	}
+
+	durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
+	// durs contains video watch time in seconds
+	if !exists {
+		return 0, errors.New("duration has not found")
+	}
+
+	dur, err := strconv.ParseInt(durs, 10, 64)
+	if err != nil {
+		return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
+	}
+
+	return int(dur / 60), nil
+}
+
 func fetchOdyseeWatchTime(websiteURL string) (int, error) {
 	requestBuilder := fetcher.NewRequestBuilder()
 	requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())

+ 6 - 0
miniflux.1

@@ -244,6 +244,12 @@ Set the value to 1 to disable the internal scheduler service\&.
 .br
 Default is false (The internal scheduler service is enabled)\&.
 .TP
+.B FETCH_NEBULA_WATCH_TIME
+Set the value to 1 to scrape video duration from Nebula website and
+use it as a reading time\&.
+.br
+Disabled by default\&.
+.TP
 .B FETCH_ODYSEE_WATCH_TIME
 Set the value to 1 to scrape video duration from Odysee website and
 use it as a reading time\&.