processor.go 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package processor // import "miniflux.app/reader/processor"
  5. import (
  6. "miniflux.app/logger"
  7. "miniflux.app/model"
  8. "miniflux.app/reader/rewrite"
  9. "miniflux.app/reader/sanitizer"
  10. "miniflux.app/reader/scraper"
  11. "miniflux.app/storage"
  12. )
  13. // FeedProcessor handles the processing of feed contents.
  14. type FeedProcessor struct {
  15. userID int64
  16. store *storage.Storage
  17. feed *model.Feed
  18. scraperRules string
  19. rewriteRules string
  20. crawler bool
  21. userAgent string
  22. }
  23. // WithCrawler enables the crawler.
  24. func (f *FeedProcessor) WithCrawler(value bool) {
  25. f.crawler = value
  26. }
  27. // WithScraperRules adds scraper rules to the processing.
  28. func (f *FeedProcessor) WithScraperRules(rules string) {
  29. f.scraperRules = rules
  30. }
  31. // WithUserAgent sets the User-Agent header for fetching article content.
  32. func (f *FeedProcessor) WithUserAgent(userAgent string) {
  33. f.userAgent = userAgent
  34. }
  35. // WithRewriteRules adds rewrite rules to the processing.
  36. func (f *FeedProcessor) WithRewriteRules(rules string) {
  37. f.rewriteRules = rules
  38. }
  39. // Process applies rewrite and scraper rules.
  40. func (f *FeedProcessor) Process() {
  41. for _, entry := range f.feed.Entries {
  42. if f.crawler {
  43. if f.store.EntryURLExists(f.userID, entry.URL) {
  44. logger.Debug(`[FeedProcessor] Do not crawl existing entry URL: "%s"`, entry.URL)
  45. } else {
  46. content, err := scraper.Fetch(entry.URL, f.scraperRules, f.userAgent)
  47. if err != nil {
  48. logger.Error("[FeedProcessor] %v", err)
  49. } else {
  50. entry.Content = content
  51. }
  52. }
  53. }
  54. entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules)
  55. entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
  56. }
  57. }
  58. // NewFeedProcessor returns a new FeedProcessor.
  59. func NewFeedProcessor(userID int64, store *storage.Storage, feed *model.Feed) *FeedProcessor {
  60. return &FeedProcessor{userID: userID, store: store, feed: feed, crawler: false}
  61. }