entry_scraper.go 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. // Copyright 2018 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package ui
  5. import (
  6. "errors"
  7. "net/http"
  8. "github.com/miniflux/miniflux/http/context"
  9. "github.com/miniflux/miniflux/http/request"
  10. "github.com/miniflux/miniflux/http/response/json"
  11. "github.com/miniflux/miniflux/model"
  12. "github.com/miniflux/miniflux/reader/sanitizer"
  13. "github.com/miniflux/miniflux/reader/scraper"
  14. )
  15. // FetchContent downloads the original HTML page and returns relevant contents.
  16. func (c *Controller) FetchContent(w http.ResponseWriter, r *http.Request) {
  17. entryID, err := request.IntParam(r, "entryID")
  18. if err != nil {
  19. json.BadRequest(w, err)
  20. return
  21. }
  22. ctx := context.New(r)
  23. builder := c.store.NewEntryQueryBuilder(ctx.UserID())
  24. builder.WithEntryID(entryID)
  25. builder.WithoutStatus(model.EntryStatusRemoved)
  26. entry, err := builder.GetEntry()
  27. if err != nil {
  28. json.ServerError(w, err)
  29. return
  30. }
  31. if entry == nil {
  32. json.NotFound(w, errors.New("Entry not found"))
  33. return
  34. }
  35. content, err := scraper.Fetch(entry.URL, entry.Feed.ScraperRules)
  36. if err != nil {
  37. json.ServerError(w, err)
  38. return
  39. }
  40. entry.Content = sanitizer.Sanitize(entry.URL, content)
  41. c.store.UpdateEntryContent(entry)
  42. json.OK(w, r, map[string]string{"content": entry.Content})
  43. }