| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- // Copyright 2017 Frédéric Guillot. All rights reserved.
- // Use of this source code is governed by the Apache 2.0
- // license that can be found in the LICENSE file.
- package subscription // import "miniflux.app/reader/subscription"
- import (
- "fmt"
- "io"
- "regexp"
- "strings"
- "miniflux.app/config"
- "miniflux.app/errors"
- "miniflux.app/http/client"
- "miniflux.app/reader/browser"
- "miniflux.app/reader/parser"
- "miniflux.app/url"
- "github.com/PuerkitoBio/goquery"
- )
- var (
- errUnreadableDoc = "Unable to analyze this page: %v"
- youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
- youtubeVideoRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
- )
- // FindSubscriptions downloads and try to find one or more subscriptions from an URL.
- func FindSubscriptions(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool) (Subscriptions, *errors.LocalizedError) {
- websiteURL = findYoutubeChannelFeed(websiteURL)
- websiteURL = parseYoutubeVideoPage(websiteURL)
- clt := client.NewClientWithConfig(websiteURL, config.Opts)
- clt.WithCredentials(username, password)
- clt.WithUserAgent(userAgent)
- clt.WithCookie(cookie)
- clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
- if fetchViaProxy {
- clt.WithProxy()
- }
- response, err := browser.Exec(clt)
- if err != nil {
- return nil, err
- }
- body := response.BodyAsString()
- if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
- var subscriptions Subscriptions
- subscriptions = append(subscriptions, &Subscription{
- Title: response.EffectiveURL,
- URL: response.EffectiveURL,
- Type: format,
- })
- return subscriptions, nil
- }
- subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body))
- if err != nil || subscriptions != nil {
- return subscriptions, err
- }
- return tryWellKnownUrls(websiteURL, userAgent, cookie, username, password)
- }
- func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
- var subscriptions Subscriptions
- queries := map[string]string{
- "link[type='application/rss+xml']": "rss",
- "link[type='application/atom+xml']": "atom",
- "link[type='application/json']": "json",
- "link[type='application/feed+json']": "json",
- }
- doc, err := goquery.NewDocumentFromReader(data)
- if err != nil {
- return nil, errors.NewLocalizedError(errUnreadableDoc, err)
- }
- for query, kind := range queries {
- doc.Find(query).Each(func(i int, s *goquery.Selection) {
- subscription := new(Subscription)
- subscription.Type = kind
- if title, exists := s.Attr("title"); exists {
- subscription.Title = title
- } else {
- subscription.Title = "Feed"
- }
- if feedURL, exists := s.Attr("href"); exists {
- subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
- }
- if subscription.Title == "" {
- subscription.Title = subscription.URL
- }
- if subscription.URL != "" {
- subscriptions = append(subscriptions, subscription)
- }
- })
- }
- return subscriptions, nil
- }
- func findYoutubeChannelFeed(websiteURL string) string {
- matches := youtubeChannelRegex.FindStringSubmatch(websiteURL)
- if len(matches) == 2 {
- return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, matches[1])
- }
- return websiteURL
- }
- func parseYoutubeVideoPage(websiteURL string) string {
- if !youtubeVideoRegex.MatchString(websiteURL) {
- return websiteURL
- }
- clt := client.NewClientWithConfig(websiteURL, config.Opts)
- response, browserErr := browser.Exec(clt)
- if browserErr != nil {
- return websiteURL
- }
- doc, docErr := goquery.NewDocumentFromReader(response.Body)
- if docErr != nil {
- return websiteURL
- }
- if channelID, exists := doc.Find(`meta[itemprop="channelId"]`).First().Attr("content"); exists {
- return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, channelID)
- }
- return websiteURL
- }
- func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string) (Subscriptions, *errors.LocalizedError) {
- var subscriptions Subscriptions
- knownURLs := map[string]string{
- "/atom.xml": "atom",
- "/feed.xml": "atom",
- "/feed/": "atom",
- "/rss.xml": "rss",
- "/rss/": "rss",
- }
- lastCharacter := websiteURL[len(websiteURL)-1:]
- if lastCharacter == "/" {
- websiteURL = websiteURL[:len(websiteURL)-1]
- }
- for knownURL, kind := range knownURLs {
- fullURL, err := url.AbsoluteURL(websiteURL, knownURL)
- if err != nil {
- continue
- }
- clt := client.NewClientWithConfig(fullURL, config.Opts)
- clt.WithCredentials(username, password)
- clt.WithUserAgent(userAgent)
- clt.WithCookie(cookie)
- // Some websites redirects unknown URLs to the home page.
- // As result, the list of known URLs is returned to the subscription list.
- // We don't want the user to choose between invalid feed URLs.
- clt.WithoutRedirects()
- response, err := clt.Get()
- if err != nil {
- continue
- }
- if response != nil && response.StatusCode == 200 {
- subscription := new(Subscription)
- subscription.Type = kind
- subscription.Title = fullURL
- subscription.URL = fullURL
- if subscription.URL != "" {
- subscriptions = append(subscriptions, subscription)
- }
- }
- }
- return subscriptions, nil
- }
|