finder.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package subscription // import "miniflux.app/reader/subscription"
  5. import (
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strings"
  10. "miniflux.app/errors"
  11. "miniflux.app/http/client"
  12. "miniflux.app/reader/browser"
  13. "miniflux.app/reader/parser"
  14. "miniflux.app/url"
  15. "github.com/PuerkitoBio/goquery"
  16. )
  17. var (
  18. errUnreadableDoc = "Unable to analyze this page: %v"
  19. youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
  20. youtubeVideoRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
  21. )
  22. // FindSubscriptions downloads and try to find one or more subscriptions from an URL.
  23. func FindSubscriptions(websiteURL, userAgent, username, password string, fetchViaProxy bool) (Subscriptions, *errors.LocalizedError) {
  24. websiteURL = findYoutubeChannelFeed(websiteURL)
  25. websiteURL = parseYoutubeVideoPage(websiteURL)
  26. request := client.New(websiteURL)
  27. request.WithCredentials(username, password)
  28. request.WithUserAgent(userAgent)
  29. if fetchViaProxy {
  30. request.WithProxy()
  31. }
  32. response, err := browser.Exec(request)
  33. if err != nil {
  34. return nil, err
  35. }
  36. body := response.BodyAsString()
  37. if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
  38. var subscriptions Subscriptions
  39. subscriptions = append(subscriptions, &Subscription{
  40. Title: response.EffectiveURL,
  41. URL: response.EffectiveURL,
  42. Type: format,
  43. })
  44. return subscriptions, nil
  45. }
  46. subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body))
  47. if err != nil || subscriptions != nil {
  48. return subscriptions, err
  49. }
  50. return tryWellKnownUrls(websiteURL, userAgent, username, password)
  51. }
  52. func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
  53. var subscriptions Subscriptions
  54. queries := map[string]string{
  55. "link[type='application/rss+xml']": "rss",
  56. "link[type='application/atom+xml']": "atom",
  57. "link[type='application/json']": "json",
  58. }
  59. doc, err := goquery.NewDocumentFromReader(data)
  60. if err != nil {
  61. return nil, errors.NewLocalizedError(errUnreadableDoc, err)
  62. }
  63. for query, kind := range queries {
  64. doc.Find(query).Each(func(i int, s *goquery.Selection) {
  65. subscription := new(Subscription)
  66. subscription.Type = kind
  67. if title, exists := s.Attr("title"); exists {
  68. subscription.Title = title
  69. } else {
  70. subscription.Title = "Feed"
  71. }
  72. if feedURL, exists := s.Attr("href"); exists {
  73. subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
  74. }
  75. if subscription.Title == "" {
  76. subscription.Title = subscription.URL
  77. }
  78. if subscription.URL != "" {
  79. subscriptions = append(subscriptions, subscription)
  80. }
  81. })
  82. }
  83. return subscriptions, nil
  84. }
  85. func findYoutubeChannelFeed(websiteURL string) string {
  86. matches := youtubeChannelRegex.FindStringSubmatch(websiteURL)
  87. if len(matches) == 2 {
  88. return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, matches[1])
  89. }
  90. return websiteURL
  91. }
  92. func parseYoutubeVideoPage(websiteURL string) string {
  93. if !youtubeVideoRegex.MatchString(websiteURL) {
  94. return websiteURL
  95. }
  96. request := client.New(websiteURL)
  97. response, browserErr := browser.Exec(request)
  98. if browserErr != nil {
  99. return websiteURL
  100. }
  101. doc, docErr := goquery.NewDocumentFromReader(response.Body)
  102. if docErr != nil {
  103. return websiteURL
  104. }
  105. if channelID, exists := doc.Find(`meta[itemprop="channelId"]`).First().Attr("content"); exists {
  106. return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, channelID)
  107. }
  108. return websiteURL
  109. }
  110. func tryWellKnownUrls(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) {
  111. var subscriptions Subscriptions
  112. knownURLs := map[string]string{
  113. "/atom.xml": "atom",
  114. "/feed.xml": "atom",
  115. "/feed/": "atom",
  116. "/rss.xml": "rss",
  117. }
  118. lastCharacter := websiteURL[len(websiteURL)-1:]
  119. if lastCharacter == "/" {
  120. websiteURL = websiteURL[:len(websiteURL)-1]
  121. }
  122. for knownURL, kind := range knownURLs {
  123. fullURL, err := url.AbsoluteURL(websiteURL, knownURL)
  124. if err != nil {
  125. continue
  126. }
  127. request := client.New(fullURL)
  128. request.WithCredentials(username, password)
  129. request.WithUserAgent(userAgent)
  130. response, err := request.Get()
  131. if err != nil {
  132. continue
  133. }
  134. if response != nil && response.StatusCode == 200 {
  135. subscription := new(Subscription)
  136. subscription.Type = kind
  137. subscription.Title = fullURL
  138. subscription.URL = fullURL
  139. if subscription.URL != "" {
  140. subscriptions = append(subscriptions, subscription)
  141. }
  142. }
  143. }
  144. return subscriptions, nil
  145. }