|
|
@@ -4,6 +4,7 @@
|
|
|
package processor
|
|
|
|
|
|
import (
|
|
|
+ "encoding/json"
|
|
|
"errors"
|
|
|
"fmt"
|
|
|
"log/slog"
|
|
|
@@ -33,8 +34,8 @@ var (
|
|
|
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
|
|
nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
|
|
|
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
|
|
|
- bilibiliRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
|
|
|
- timelengthRegex = regexp.MustCompile(`"timelength":\s*(\d+)`)
|
|
|
+ bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
|
|
|
+ bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`)
|
|
|
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
|
|
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
|
|
)
|
|
|
@@ -474,7 +475,7 @@ func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
|
|
|
if !config.Opts.FetchBilibiliWatchTime() {
|
|
|
return false
|
|
|
}
|
|
|
- matches := bilibiliRegex.FindStringSubmatch(entry.URL)
|
|
|
+ matches := bilibiliURLRegex.FindStringSubmatch(entry.URL)
|
|
|
urlMatchesBilibiliPattern := len(matches) == 2
|
|
|
return urlMatchesBilibiliPattern
|
|
|
}
|
|
|
@@ -574,41 +575,66 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
|
|
|
return int(dur / 60), nil
|
|
|
}
|
|
|
|
|
|
+func extractBilibiliVideoID(websiteURL string) (string, string, error) {
|
|
|
+ matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL)
|
|
|
+ if matches == nil {
|
|
|
+ return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL)
|
|
|
+ }
|
|
|
+ if matches[1] != "" {
|
|
|
+ return "aid", matches[1], nil
|
|
|
+ }
|
|
|
+ if matches[2] != "" {
|
|
|
+ return "bvid", matches[2], nil
|
|
|
+ }
|
|
|
+ return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL)
|
|
|
+}
|
|
|
+
|
|
|
func fetchBilibiliWatchTime(websiteURL string) (int, error) {
|
|
|
requestBuilder := fetcher.NewRequestBuilder()
|
|
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
|
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
|
|
|
|
- responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
|
+ idType, videoID, extractErr := extractBilibiliVideoID(websiteURL)
|
|
|
+ if extractErr != nil {
|
|
|
+ return 0, extractErr
|
|
|
+ }
|
|
|
+ bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID)
|
|
|
+
|
|
|
+ responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL))
|
|
|
defer responseHandler.Close()
|
|
|
|
|
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
|
- slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
|
+ slog.Warn("Unable to fetch Bilibili API",
|
|
|
+ slog.String("website_url", bilibiliApiURL),
|
|
|
+ slog.Any("error", localizedError.Error()))
|
|
|
return 0, localizedError.Error()
|
|
|
}
|
|
|
|
|
|
- doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
|
- if docErr != nil {
|
|
|
- return 0, docErr
|
|
|
+ var result map[string]interface{}
|
|
|
+ doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
|
+ if docErr := doc.Decode(&result); docErr != nil {
|
|
|
+ return 0, fmt.Errorf("failed to decode API response: %v", docErr)
|
|
|
}
|
|
|
|
|
|
- timelengthMatches := timelengthRegex.FindStringSubmatch(doc.Text())
|
|
|
- if len(timelengthMatches) < 2 {
|
|
|
- return 0, errors.New("duration has not found")
|
|
|
+ if code, ok := result["code"].(float64); !ok || code != 0 {
|
|
|
+ return 0, fmt.Errorf("API returned error code: %v", result["code"])
|
|
|
}
|
|
|
|
|
|
- durationMs, err := strconv.ParseInt(timelengthMatches[1], 10, 64)
|
|
|
- if err != nil {
|
|
|
- return 0, fmt.Errorf("unable to parse duration %s: %v", timelengthMatches[1], err)
|
|
|
+ data, ok := result["data"].(map[string]interface{})
|
|
|
+ if !ok {
|
|
|
+ return 0, fmt.Errorf("data field not found or not an object")
|
|
|
}
|
|
|
|
|
|
- durationSec := durationMs / 1000
|
|
|
- durationMin := durationSec / 60
|
|
|
- if durationSec%60 != 0 {
|
|
|
+ duration, ok := data["duration"].(float64)
|
|
|
+ if !ok {
|
|
|
+ return 0, fmt.Errorf("duration not found or not a number")
|
|
|
+ }
|
|
|
+ intDuration := int(duration)
|
|
|
+ durationMin := intDuration / 60
|
|
|
+ if intDuration%60 != 0 {
|
|
|
durationMin++
|
|
|
}
|
|
|
-
|
|
|
- return int(durationMin), nil
|
|
|
+ return durationMin, nil
|
|
|
}
|
|
|
|
|
|
// parseISO8601 parses an ISO 8601 duration string.
|