finder.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package subscription // import "miniflux.app/reader/subscription"
  5. import (
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strings"
  10. "miniflux.app/errors"
  11. "miniflux.app/http/client"
  12. "miniflux.app/reader/browser"
  13. "miniflux.app/reader/parser"
  14. "miniflux.app/url"
  15. "github.com/PuerkitoBio/goquery"
  16. )
  17. var (
  18. errUnreadableDoc = "Unable to analyze this page: %v"
  19. youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
  20. )
  21. // FindSubscriptions downloads and try to find one or more subscriptions from an URL.
  22. func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) {
  23. websiteURL = findYoutubeChannelFeed(websiteURL)
  24. request := client.New(websiteURL)
  25. request.WithCredentials(username, password)
  26. request.WithUserAgent(userAgent)
  27. response, err := browser.Exec(request)
  28. if err != nil {
  29. return nil, err
  30. }
  31. body := response.BodyAsString()
  32. if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
  33. var subscriptions Subscriptions
  34. subscriptions = append(subscriptions, &Subscription{
  35. Title: response.EffectiveURL,
  36. URL: response.EffectiveURL,
  37. Type: format,
  38. })
  39. return subscriptions, nil
  40. }
  41. subscriptions, err := parseDocument(response.EffectiveURL, strings.NewReader(body))
  42. if err != nil || subscriptions != nil {
  43. return subscriptions, err
  44. }
  45. return tryWellKnownUrls(websiteURL, userAgent, username, password)
  46. }
  47. func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
  48. var subscriptions Subscriptions
  49. queries := map[string]string{
  50. "link[type='application/rss+xml']": "rss",
  51. "link[type='application/atom+xml']": "atom",
  52. "link[type='application/json']": "json",
  53. }
  54. doc, err := goquery.NewDocumentFromReader(data)
  55. if err != nil {
  56. return nil, errors.NewLocalizedError(errUnreadableDoc, err)
  57. }
  58. for query, kind := range queries {
  59. doc.Find(query).Each(func(i int, s *goquery.Selection) {
  60. subscription := new(Subscription)
  61. subscription.Type = kind
  62. if title, exists := s.Attr("title"); exists {
  63. subscription.Title = title
  64. } else {
  65. subscription.Title = "Feed"
  66. }
  67. if feedURL, exists := s.Attr("href"); exists {
  68. subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
  69. }
  70. if subscription.Title == "" {
  71. subscription.Title = subscription.URL
  72. }
  73. if subscription.URL != "" {
  74. subscriptions = append(subscriptions, subscription)
  75. }
  76. })
  77. }
  78. return subscriptions, nil
  79. }
  80. func findYoutubeChannelFeed(websiteURL string) string {
  81. matches := youtubeChannelRegex.FindStringSubmatch(websiteURL)
  82. if len(matches) == 2 {
  83. return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, matches[1])
  84. }
  85. return websiteURL
  86. }
  87. func tryWellKnownUrls(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) {
  88. var subscriptions Subscriptions
  89. knownURLs := map[string]string{
  90. "/atom.xml": "atom",
  91. "/feed.xml": "atom",
  92. "/feed/": "atom",
  93. "/rss.xml": "rss",
  94. }
  95. lastCharacter := websiteURL[len(websiteURL)-1:]
  96. if lastCharacter == "/" {
  97. websiteURL = websiteURL[:len(websiteURL)-1]
  98. }
  99. for knownURL, kind := range knownURLs {
  100. fullURL, err := url.AbsoluteURL(websiteURL, knownURL)
  101. if err != nil {
  102. continue
  103. }
  104. request := client.New(fullURL)
  105. request.WithCredentials(username, password)
  106. request.WithUserAgent(userAgent)
  107. response, err := request.Get()
  108. if err != nil {
  109. continue
  110. }
  111. if response != nil && response.StatusCode == 200 {
  112. subscription := new(Subscription)
  113. subscription.Type = kind
  114. subscription.Title = fullURL
  115. subscription.URL = fullURL
  116. if subscription.URL != "" {
  117. subscriptions = append(subscriptions, subscription)
  118. }
  119. }
  120. }
  121. return subscriptions, nil
  122. }