youtube.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package processor
  4. import (
  5. "errors"
  6. "fmt"
  7. "log/slog"
  8. "regexp"
  9. "strconv"
  10. "time"
  11. "github.com/PuerkitoBio/goquery"
  12. "miniflux.app/v2/internal/config"
  13. "miniflux.app/v2/internal/model"
  14. "miniflux.app/v2/internal/reader/fetcher"
  15. )
  16. var (
  17. youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
  18. iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
  19. )
  20. func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
  21. if !config.Opts.FetchYouTubeWatchTime() {
  22. return false
  23. }
  24. matches := youtubeRegex.FindStringSubmatch(entry.URL)
  25. urlMatchesYouTubePattern := len(matches) == 2
  26. return urlMatchesYouTubePattern
  27. }
  28. func fetchYouTubeWatchTime(websiteURL string) (int, error) {
  29. requestBuilder := fetcher.NewRequestBuilder()
  30. requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
  31. requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
  32. responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
  33. defer responseHandler.Close()
  34. if localizedError := responseHandler.LocalizedError(); localizedError != nil {
  35. slog.Warn("Unable to fetch YouTube page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
  36. return 0, localizedError.Error()
  37. }
  38. doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
  39. if docErr != nil {
  40. return 0, docErr
  41. }
  42. durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content")
  43. if !exists {
  44. return 0, errors.New("duration has not found")
  45. }
  46. dur, err := parseISO8601(durs)
  47. if err != nil {
  48. return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
  49. }
  50. return int(dur.Minutes()), nil
  51. }
  52. func parseISO8601(from string) (time.Duration, error) {
  53. var match []string
  54. var d time.Duration
  55. if iso8601Regex.MatchString(from) {
  56. match = iso8601Regex.FindStringSubmatch(from)
  57. } else {
  58. return 0, errors.New("could not parse duration string")
  59. }
  60. for i, name := range iso8601Regex.SubexpNames() {
  61. part := match[i]
  62. if i == 0 || name == "" || part == "" {
  63. continue
  64. }
  65. val, err := strconv.ParseInt(part, 10, 64)
  66. if err != nil {
  67. return 0, err
  68. }
  69. switch name {
  70. case "hour":
  71. d += (time.Duration(val) * time.Hour)
  72. case "minute":
  73. d += (time.Duration(val) * time.Minute)
  74. case "second":
  75. d += (time.Duration(val) * time.Second)
  76. default:
  77. return 0, fmt.Errorf("unknown field %s", name)
  78. }
  79. }
  80. return d, nil
  81. }