finder.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package subscription // import "miniflux.app/reader/subscription"
  5. import (
  6. "io"
  7. "strings"
  8. "miniflux.app/errors"
  9. "miniflux.app/http/client"
  10. "miniflux.app/reader/browser"
  11. "miniflux.app/reader/parser"
  12. "miniflux.app/url"
  13. "github.com/PuerkitoBio/goquery"
  14. )
  15. var (
  16. errUnreadableDoc = "Unable to analyze this page: %v"
  17. )
  18. // FindSubscriptions downloads and try to find one or more subscriptions from an URL.
  19. func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) {
  20. request := client.New(websiteURL)
  21. request.WithCredentials(username, password)
  22. request.WithUserAgent(userAgent)
  23. response, err := browser.Exec(request)
  24. if err != nil {
  25. return nil, err
  26. }
  27. body := response.BodyAsString()
  28. if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
  29. var subscriptions Subscriptions
  30. subscriptions = append(subscriptions, &Subscription{
  31. Title: response.EffectiveURL,
  32. URL: response.EffectiveURL,
  33. Type: format,
  34. })
  35. return subscriptions, nil
  36. }
  37. subscriptions, err := parseDocument(response.EffectiveURL, strings.NewReader(body))
  38. if err != nil || subscriptions != nil {
  39. return subscriptions, err
  40. }
  41. return tryWellKnownUrls(websiteURL, userAgent, username, password)
  42. }
  43. func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
  44. var subscriptions Subscriptions
  45. queries := map[string]string{
  46. "link[type='application/rss+xml']": "rss",
  47. "link[type='application/atom+xml']": "atom",
  48. "link[type='application/json']": "json",
  49. }
  50. doc, err := goquery.NewDocumentFromReader(data)
  51. if err != nil {
  52. return nil, errors.NewLocalizedError(errUnreadableDoc, err)
  53. }
  54. for query, kind := range queries {
  55. doc.Find(query).Each(func(i int, s *goquery.Selection) {
  56. subscription := new(Subscription)
  57. subscription.Type = kind
  58. if title, exists := s.Attr("title"); exists {
  59. subscription.Title = title
  60. } else {
  61. subscription.Title = "Feed"
  62. }
  63. if feedURL, exists := s.Attr("href"); exists {
  64. subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
  65. }
  66. if subscription.Title == "" {
  67. subscription.Title = subscription.URL
  68. }
  69. if subscription.URL != "" {
  70. subscriptions = append(subscriptions, subscription)
  71. }
  72. })
  73. }
  74. return subscriptions, nil
  75. }
  76. func tryWellKnownUrls(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) {
  77. var subscriptions Subscriptions
  78. knownURLs := map[string]string{
  79. "/atom.xml": "atom",
  80. "/feed.xml": "atom",
  81. "/feed/": "atom",
  82. "/rss.xml": "rss",
  83. }
  84. lastCharacter := websiteURL[len(websiteURL)-1:]
  85. if lastCharacter == "/" {
  86. websiteURL = websiteURL[:len(websiteURL)-1]
  87. }
  88. for knownURL, kind := range knownURLs {
  89. fullURL, err := url.AbsoluteURL(websiteURL, knownURL)
  90. if err != nil {
  91. continue
  92. }
  93. request := client.New(fullURL)
  94. request.WithCredentials(username, password)
  95. request.WithUserAgent(userAgent)
  96. response, err := request.Get()
  97. if err != nil {
  98. continue
  99. }
  100. if response != nil && response.StatusCode == 200 {
  101. subscription := new(Subscription)
  102. subscription.Type = kind
  103. subscription.Title = fullURL
  104. subscription.URL = fullURL
  105. if subscription.URL != "" {
  106. subscriptions = append(subscriptions, subscription)
  107. }
  108. }
  109. }
  110. return subscriptions, nil
  111. }