Răsfoiți Sursa

feat: add `FETCH_BILIBILI_WATCH_TIME` config option

Qeynos 1 an în urmă
părinte
comite
bcbf9f4025

+ 18 - 0
internal/config/config_test.go

@@ -2044,6 +2044,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) {
 	}
 }
 
+func TestFetchBilibiliWatchTime(t *testing.T) {
+	os.Clearenv()
+	os.Setenv("FETCH_BILIBILI_WATCH_TIME", "1")
+
+	parser := NewParser()
+	opts, err := parser.ParseEnvironmentVariables()
+	if err != nil {
+		t.Fatalf(`Parsing failure: %v`, err)
+	}
+
+	expected := true
+	result := opts.FetchBilibiliWatchTime()
+
+	if result != expected {
+		t.Fatalf(`Unexpected FETCH_BILIBILI_WATCH_TIME value, got %v instead of %v`, result, expected)
+	}
+}
+
 func TestFetchNebulaWatchTime(t *testing.T) {
 	os.Clearenv()
 	os.Setenv("FETCH_NEBULA_WATCH_TIME", "1")

+ 10 - 0
internal/config/options.go

@@ -56,6 +56,7 @@ const (
 	defaultMediaResourceTypes                 = "image"
 	defaultMediaProxyURL                      = ""
 	defaultFilterEntryMaxAgeDays              = 0
+	defaultFetchBilibiliWatchTime             = false
 	defaultFetchNebulaWatchTime               = false
 	defaultFetchOdyseeWatchTime               = false
 	defaultFetchYouTubeWatchTime              = false
@@ -141,6 +142,7 @@ type Options struct {
 	mediaProxyMode                     string
 	mediaProxyResourceTypes            []string
 	mediaProxyCustomURL                string
+	fetchBilibiliWatchTime             bool
 	fetchNebulaWatchTime               bool
 	fetchOdyseeWatchTime               bool
 	fetchYouTubeWatchTime              bool
@@ -218,6 +220,7 @@ func NewOptions() *Options {
 		mediaProxyResourceTypes:            []string{defaultMediaResourceTypes},
 		mediaProxyCustomURL:                defaultMediaProxyURL,
 		filterEntryMaxAgeDays:              defaultFilterEntryMaxAgeDays,
+		fetchBilibiliWatchTime:             defaultFetchBilibiliWatchTime,
 		fetchNebulaWatchTime:               defaultFetchNebulaWatchTime,
 		fetchOdyseeWatchTime:               defaultFetchOdyseeWatchTime,
 		fetchYouTubeWatchTime:              defaultFetchYouTubeWatchTime,
@@ -501,6 +504,12 @@ func (o *Options) FetchOdyseeWatchTime() bool {
 	return o.fetchOdyseeWatchTime
 }
 
+// FetchBilibiliWatchTime returns true if the Bilibili video duration
+// should be fetched and used as a reading time.
+func (o *Options) FetchBilibiliWatchTime() bool {
+	return o.fetchBilibiliWatchTime
+}
+
 // MediaProxyMode returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy.
 func (o *Options) MediaProxyMode() string {
 	return o.mediaProxyMode
@@ -658,6 +667,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option {
 		"FETCH_YOUTUBE_WATCH_TIME":               o.fetchYouTubeWatchTime,
 		"FETCH_NEBULA_WATCH_TIME":                o.fetchNebulaWatchTime,
 		"FETCH_ODYSEE_WATCH_TIME":                o.fetchOdyseeWatchTime,
+		"FETCH_BILIBILI_WATCH_TIME":              o.fetchBilibiliWatchTime,
 		"HTTPS":                                  o.HTTPS,
 		"HTTP_CLIENT_MAX_BODY_SIZE":              o.httpClientMaxBodySize,
 		"HTTP_CLIENT_PROXY":                      o.httpClientProxy,

+ 2 - 0
internal/config/parser.go

@@ -259,6 +259,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
 			p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
 		case "METRICS_PASSWORD_FILE":
 			p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
+		case "FETCH_BILIBILI_WATCH_TIME":
+			p.opts.fetchBilibiliWatchTime = parseBool(value, defaultFetchBilibiliWatchTime)
 		case "FETCH_NEBULA_WATCH_TIME":
 			p.opts.fetchNebulaWatchTime = parseBool(value, defaultFetchNebulaWatchTime)
 		case "FETCH_ODYSEE_WATCH_TIME":

+ 67 - 0
internal/reader/processor/processor.go

@@ -33,6 +33,8 @@ var (
 	youtubeRegex           = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
 	nebulaRegex            = regexp.MustCompile(`^https://nebula\.tv`)
 	odyseeRegex            = regexp.MustCompile(`^https://odysee\.com`)
+	bilibiliRegex          = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
+	timelengthRegex        = regexp.MustCompile(`"timelength":\s*(\d+)`)
 	iso8601Regex           = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
 	customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
 )
@@ -418,6 +420,25 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
 		}
 	}
 
+	if shouldFetchBilibiliWatchTime(entry) {
+		if entryIsNew {
+			watchTime, err := fetchBilibiliWatchTime(entry.URL)
+			if err != nil {
+				slog.Warn("Unable to fetch Bilibili watch time",
+					slog.Int64("user_id", user.ID),
+					slog.Int64("entry_id", entry.ID),
+					slog.String("entry_url", entry.URL),
+					slog.Int64("feed_id", feed.ID),
+					slog.String("feed_url", feed.FeedURL),
+					slog.Any("error", err),
+				)
+			}
+			entry.ReadingTime = watchTime
+		} else {
+			entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
+		}
+	}
+
 	// Handle YT error case and non-YT entries.
 	if entry.ReadingTime == 0 {
 		entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
@@ -449,6 +470,15 @@ func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
 	return matches != nil
 }
 
+func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
+	if !config.Opts.FetchBilibiliWatchTime() {
+		return false
+	}
+	matches := bilibiliRegex.FindStringSubmatch(entry.URL)
+	urlMatchesBilibiliPattern := len(matches) == 2
+	return urlMatchesBilibiliPattern
+}
+
 func fetchYouTubeWatchTime(websiteURL string) (int, error) {
 	requestBuilder := fetcher.NewRequestBuilder()
 	requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
@@ -544,6 +574,43 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
 	return int(dur / 60), nil
 }
 
+func fetchBilibiliWatchTime(websiteURL string) (int, error) {
+	requestBuilder := fetcher.NewRequestBuilder()
+	requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
+	requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
+
+	responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
+	defer responseHandler.Close()
+
+	if localizedError := responseHandler.LocalizedError(); localizedError != nil {
+		slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
+		return 0, localizedError.Error()
+	}
+
+	doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
+	if docErr != nil {
+		return 0, docErr
+	}
+
+	timelengthMatches := timelengthRegex.FindStringSubmatch(doc.Text())
+	if len(timelengthMatches) < 2 {
+		return 0, errors.New("duration has not found")
+	}
+
+	durationMs, err := strconv.ParseInt(timelengthMatches[1], 10, 64)
+	if err != nil {
+		return 0, fmt.Errorf("unable to parse duration %s: %v", timelengthMatches[1], err)
+	}
+
+	durationSec := durationMs / 1000
+	durationMin := durationSec / 60
+	if durationSec%60 != 0 {
+		durationMin++
+	}
+
+	return int(durationMin), nil
+}
+
 // parseISO8601 parses an ISO 8601 duration string.
 func parseISO8601(from string) (time.Duration, error) {
 	var match []string

+ 6 - 0
miniflux.1

@@ -244,6 +244,12 @@ Set the value to 1 to disable the internal scheduler service\&.
 .br
 Default is false (The internal scheduler service is enabled)\&.
 .TP
+.B FETCH_BILIBILI_WATCH_TIME
+Set the value to 1 to scrape video duration from Bilibili website and
+use it as a reading time\&.
+.br
+Disabled by default\&.
+.TP
 .B FETCH_NEBULA_WATCH_TIME
 Set the value to 1 to scrape video duration from Nebula website and
 use it as a reading time\&.