processor.go 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package processor
  5. import (
  6. "github.com/miniflux/miniflux/logger"
  7. "github.com/miniflux/miniflux/model"
  8. "github.com/miniflux/miniflux/reader/rewrite"
  9. "github.com/miniflux/miniflux/reader/sanitizer"
  10. "github.com/miniflux/miniflux/reader/scraper"
  11. "github.com/miniflux/miniflux/storage"
  12. )
  13. // FeedProcessor handles the processing of feed contents.
  14. type FeedProcessor struct {
  15. userID int64
  16. store *storage.Storage
  17. feed *model.Feed
  18. scraperRules string
  19. rewriteRules string
  20. crawler bool
  21. }
  22. // WithCrawler enables the crawler.
  23. func (f *FeedProcessor) WithCrawler(value bool) {
  24. f.crawler = value
  25. }
  26. // WithScraperRules adds scraper rules to the processing.
  27. func (f *FeedProcessor) WithScraperRules(rules string) {
  28. f.scraperRules = rules
  29. }
  30. // WithRewriteRules adds rewrite rules to the processing.
  31. func (f *FeedProcessor) WithRewriteRules(rules string) {
  32. f.rewriteRules = rules
  33. }
  34. // Process applies rewrite and scraper rules.
  35. func (f *FeedProcessor) Process() {
  36. for _, entry := range f.feed.Entries {
  37. if f.crawler {
  38. if f.store.EntryURLExists(f.userID, entry.URL) {
  39. logger.Debug(`[FeedProcessor] Do not crawl existing entry URL: "%s"`, entry.URL)
  40. } else {
  41. content, err := scraper.Fetch(entry.URL, f.scraperRules)
  42. if err != nil {
  43. logger.Error("[FeedProcessor] %v", err)
  44. } else {
  45. entry.Content = content
  46. }
  47. }
  48. }
  49. entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules)
  50. entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
  51. }
  52. }
  53. // NewFeedProcessor returns a new FeedProcessor.
  54. func NewFeedProcessor(userID int64, store *storage.Storage, feed *model.Feed) *FeedProcessor {
  55. return &FeedProcessor{userID: userID, store: store, feed: feed, crawler: false}
  56. }