processor.go 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package processor
  5. import (
  6. "github.com/miniflux/miniflux/logger"
  7. "github.com/miniflux/miniflux/model"
  8. "github.com/miniflux/miniflux/reader/rewrite"
  9. "github.com/miniflux/miniflux/reader/sanitizer"
  10. "github.com/miniflux/miniflux/reader/scraper"
  11. )
  12. // FeedProcessor handles the processing of feed contents.
  13. type FeedProcessor struct {
  14. feed *model.Feed
  15. scraperRules string
  16. rewriteRules string
  17. crawler bool
  18. }
  19. // WithCrawler enables the crawler.
  20. func (f *FeedProcessor) WithCrawler(value bool) {
  21. f.crawler = value
  22. }
  23. // WithScraperRules adds scraper rules to the processing.
  24. func (f *FeedProcessor) WithScraperRules(rules string) {
  25. f.scraperRules = rules
  26. }
  27. // WithRewriteRules adds rewrite rules to the processing.
  28. func (f *FeedProcessor) WithRewriteRules(rules string) {
  29. f.rewriteRules = rules
  30. }
  31. // Process applies rewrite and scraper rules.
  32. func (f *FeedProcessor) Process() {
  33. for _, entry := range f.feed.Entries {
  34. if f.crawler {
  35. content, err := scraper.Fetch(entry.URL, f.scraperRules)
  36. if err != nil {
  37. logger.Error("[FeedProcessor] %v", err)
  38. } else {
  39. entry.Content = content
  40. }
  41. }
  42. entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
  43. entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules)
  44. }
  45. }
  46. // NewFeedProcessor returns a new FeedProcessor.
  47. func NewFeedProcessor(feed *model.Feed) *FeedProcessor {
  48. return &FeedProcessor{feed: feed, crawler: false}
  49. }