|
|
@@ -4,13 +4,9 @@
|
|
|
package processor
|
|
|
|
|
|
import (
|
|
|
- "encoding/json"
|
|
|
- "errors"
|
|
|
- "fmt"
|
|
|
"log/slog"
|
|
|
"regexp"
|
|
|
"slices"
|
|
|
- "strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
|
|
|
|
@@ -25,20 +21,11 @@ import (
|
|
|
"miniflux.app/v2/internal/reader/urlcleaner"
|
|
|
"miniflux.app/v2/internal/storage"
|
|
|
|
|
|
- "github.com/PuerkitoBio/goquery"
|
|
|
"github.com/tdewolff/minify/v2"
|
|
|
"github.com/tdewolff/minify/v2/html"
|
|
|
)
|
|
|
|
|
|
-var (
|
|
|
- youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
|
|
- nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
|
|
|
- odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
|
|
|
- bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
|
|
|
- bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`)
|
|
|
- iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
|
|
- customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
|
|
-)
|
|
|
+var customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
|
|
|
|
|
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
|
|
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
|
|
|
@@ -446,234 +433,6 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
|
|
|
- if !config.Opts.FetchYouTubeWatchTime() {
|
|
|
- return false
|
|
|
- }
|
|
|
- matches := youtubeRegex.FindStringSubmatch(entry.URL)
|
|
|
- urlMatchesYouTubePattern := len(matches) == 2
|
|
|
- return urlMatchesYouTubePattern
|
|
|
-}
|
|
|
-
|
|
|
-func shouldFetchNebulaWatchTime(entry *model.Entry) bool {
|
|
|
- if !config.Opts.FetchNebulaWatchTime() {
|
|
|
- return false
|
|
|
- }
|
|
|
- matches := nebulaRegex.FindStringSubmatch(entry.URL)
|
|
|
- return matches != nil
|
|
|
-}
|
|
|
-
|
|
|
-func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
|
|
|
- if !config.Opts.FetchOdyseeWatchTime() {
|
|
|
- return false
|
|
|
- }
|
|
|
- matches := odyseeRegex.FindStringSubmatch(entry.URL)
|
|
|
- return matches != nil
|
|
|
-}
|
|
|
-
|
|
|
-func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
|
|
|
- if !config.Opts.FetchBilibiliWatchTime() {
|
|
|
- return false
|
|
|
- }
|
|
|
- matches := bilibiliURLRegex.FindStringSubmatch(entry.URL)
|
|
|
- urlMatchesBilibiliPattern := len(matches) == 2
|
|
|
- return urlMatchesBilibiliPattern
|
|
|
-}
|
|
|
-
|
|
|
-func fetchYouTubeWatchTime(websiteURL string) (int, error) {
|
|
|
- requestBuilder := fetcher.NewRequestBuilder()
|
|
|
- requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
|
- requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
|
-
|
|
|
- responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
|
- defer responseHandler.Close()
|
|
|
-
|
|
|
- if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
|
- slog.Warn("Unable to fetch YouTube page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
|
- return 0, localizedError.Error()
|
|
|
- }
|
|
|
-
|
|
|
- doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
|
- if docErr != nil {
|
|
|
- return 0, docErr
|
|
|
- }
|
|
|
-
|
|
|
- durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content")
|
|
|
- if !exists {
|
|
|
- return 0, errors.New("duration has not found")
|
|
|
- }
|
|
|
-
|
|
|
- dur, err := parseISO8601(durs)
|
|
|
- if err != nil {
|
|
|
- return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
|
|
- }
|
|
|
-
|
|
|
- return int(dur.Minutes()), nil
|
|
|
-}
|
|
|
-
|
|
|
-func fetchNebulaWatchTime(websiteURL string) (int, error) {
|
|
|
- requestBuilder := fetcher.NewRequestBuilder()
|
|
|
- requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
|
- requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
|
-
|
|
|
- responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
|
- defer responseHandler.Close()
|
|
|
-
|
|
|
- if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
|
- slog.Warn("Unable to fetch Nebula watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
|
- return 0, localizedError.Error()
|
|
|
- }
|
|
|
-
|
|
|
- doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
|
- if docErr != nil {
|
|
|
- return 0, docErr
|
|
|
- }
|
|
|
-
|
|
|
- durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
|
|
|
- // durs contains video watch time in seconds
|
|
|
- if !exists {
|
|
|
- return 0, errors.New("duration has not found")
|
|
|
- }
|
|
|
-
|
|
|
- dur, err := strconv.ParseInt(durs, 10, 64)
|
|
|
- if err != nil {
|
|
|
- return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
|
|
- }
|
|
|
-
|
|
|
- return int(dur / 60), nil
|
|
|
-}
|
|
|
-
|
|
|
-func fetchOdyseeWatchTime(websiteURL string) (int, error) {
|
|
|
- requestBuilder := fetcher.NewRequestBuilder()
|
|
|
- requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
|
- requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
|
-
|
|
|
- responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
|
- defer responseHandler.Close()
|
|
|
-
|
|
|
- if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
|
- slog.Warn("Unable to fetch Odysee watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
|
- return 0, localizedError.Error()
|
|
|
- }
|
|
|
-
|
|
|
- doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
|
- if docErr != nil {
|
|
|
- return 0, docErr
|
|
|
- }
|
|
|
-
|
|
|
- durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
|
|
|
- // durs contains video watch time in seconds
|
|
|
- if !exists {
|
|
|
- return 0, errors.New("duration has not found")
|
|
|
- }
|
|
|
-
|
|
|
- dur, err := strconv.ParseInt(durs, 10, 64)
|
|
|
- if err != nil {
|
|
|
- return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
|
|
- }
|
|
|
-
|
|
|
- return int(dur / 60), nil
|
|
|
-}
|
|
|
-
|
|
|
-func extractBilibiliVideoID(websiteURL string) (string, string, error) {
|
|
|
- matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL)
|
|
|
- if matches == nil {
|
|
|
- return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL)
|
|
|
- }
|
|
|
- if matches[1] != "" {
|
|
|
- return "aid", matches[1], nil
|
|
|
- }
|
|
|
- if matches[2] != "" {
|
|
|
- return "bvid", matches[2], nil
|
|
|
- }
|
|
|
- return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL)
|
|
|
-}
|
|
|
-
|
|
|
-func fetchBilibiliWatchTime(websiteURL string) (int, error) {
|
|
|
- requestBuilder := fetcher.NewRequestBuilder()
|
|
|
- requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
|
- requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
|
-
|
|
|
- idType, videoID, extractErr := extractBilibiliVideoID(websiteURL)
|
|
|
- if extractErr != nil {
|
|
|
- return 0, extractErr
|
|
|
- }
|
|
|
- bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID)
|
|
|
-
|
|
|
- responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL))
|
|
|
- defer responseHandler.Close()
|
|
|
-
|
|
|
- if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
|
- slog.Warn("Unable to fetch Bilibili API",
|
|
|
- slog.String("website_url", bilibiliApiURL),
|
|
|
- slog.Any("error", localizedError.Error()))
|
|
|
- return 0, localizedError.Error()
|
|
|
- }
|
|
|
-
|
|
|
- var result map[string]interface{}
|
|
|
- doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
|
- if docErr := doc.Decode(&result); docErr != nil {
|
|
|
- return 0, fmt.Errorf("failed to decode API response: %v", docErr)
|
|
|
- }
|
|
|
-
|
|
|
- if code, ok := result["code"].(float64); !ok || code != 0 {
|
|
|
- return 0, fmt.Errorf("API returned error code: %v", result["code"])
|
|
|
- }
|
|
|
-
|
|
|
- data, ok := result["data"].(map[string]interface{})
|
|
|
- if !ok {
|
|
|
- return 0, fmt.Errorf("data field not found or not an object")
|
|
|
- }
|
|
|
-
|
|
|
- duration, ok := data["duration"].(float64)
|
|
|
- if !ok {
|
|
|
- return 0, fmt.Errorf("duration not found or not a number")
|
|
|
- }
|
|
|
- intDuration := int(duration)
|
|
|
- durationMin := intDuration / 60
|
|
|
- if intDuration%60 != 0 {
|
|
|
- durationMin++
|
|
|
- }
|
|
|
- return durationMin, nil
|
|
|
-}
|
|
|
-
|
|
|
-// parseISO8601 parses an ISO 8601 duration string.
|
|
|
-func parseISO8601(from string) (time.Duration, error) {
|
|
|
- var match []string
|
|
|
- var d time.Duration
|
|
|
-
|
|
|
- if iso8601Regex.MatchString(from) {
|
|
|
- match = iso8601Regex.FindStringSubmatch(from)
|
|
|
- } else {
|
|
|
- return 0, errors.New("could not parse duration string")
|
|
|
- }
|
|
|
-
|
|
|
- for i, name := range iso8601Regex.SubexpNames() {
|
|
|
- part := match[i]
|
|
|
- if i == 0 || name == "" || part == "" {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- val, err := strconv.ParseInt(part, 10, 64)
|
|
|
- if err != nil {
|
|
|
- return 0, err
|
|
|
- }
|
|
|
-
|
|
|
- switch name {
|
|
|
- case "hour":
|
|
|
- d += (time.Duration(val) * time.Hour)
|
|
|
- case "minute":
|
|
|
- d += (time.Duration(val) * time.Minute)
|
|
|
- case "second":
|
|
|
- d += (time.Duration(val) * time.Second)
|
|
|
- default:
|
|
|
- return 0, fmt.Errorf("unknown field %s", name)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return d, nil
|
|
|
-}
|
|
|
-
|
|
|
func isRecentEntry(entry *model.Entry) bool {
|
|
|
if config.Opts.FilterEntryMaxAgeDays() == 0 || entry.Date.After(time.Now().AddDate(0, 0, -config.Opts.FilterEntryMaxAgeDays())) {
|
|
|
return true
|