finder.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package icon
  5. import (
  6. "fmt"
  7. "io"
  8. "io/ioutil"
  9. "github.com/miniflux/miniflux/helper"
  10. "github.com/miniflux/miniflux/http"
  11. "github.com/miniflux/miniflux/logger"
  12. "github.com/miniflux/miniflux/model"
  13. "github.com/miniflux/miniflux/url"
  14. "github.com/PuerkitoBio/goquery"
  15. )
  16. // FindIcon try to find the website's icon.
  17. func FindIcon(websiteURL string) (*model.Icon, error) {
  18. rootURL := url.RootURL(websiteURL)
  19. client := http.NewClient(rootURL)
  20. response, err := client.Get()
  21. if err != nil {
  22. return nil, fmt.Errorf("unable to download website index page: %v", err)
  23. }
  24. if response.HasServerFailure() {
  25. return nil, fmt.Errorf("unable to download website index page: status=%d", response.StatusCode)
  26. }
  27. iconURL, err := parseDocument(rootURL, response.Body)
  28. if err != nil {
  29. return nil, err
  30. }
  31. logger.Debug("[FindIcon] Fetching icon => %s", iconURL)
  32. icon, err := downloadIcon(iconURL)
  33. if err != nil {
  34. return nil, err
  35. }
  36. return icon, nil
  37. }
  38. func parseDocument(websiteURL string, data io.Reader) (string, error) {
  39. queries := []string{
  40. "link[rel='shortcut icon']",
  41. "link[rel='Shortcut Icon']",
  42. "link[rel='icon shortcut']",
  43. "link[rel='icon']",
  44. }
  45. doc, err := goquery.NewDocumentFromReader(data)
  46. if err != nil {
  47. return "", fmt.Errorf("unable to read document: %v", err)
  48. }
  49. var iconURL string
  50. for _, query := range queries {
  51. doc.Find(query).Each(func(i int, s *goquery.Selection) {
  52. if href, exists := s.Attr("href"); exists {
  53. iconURL = href
  54. }
  55. })
  56. if iconURL != "" {
  57. break
  58. }
  59. }
  60. if iconURL == "" {
  61. iconURL = url.RootURL(websiteURL) + "favicon.ico"
  62. } else {
  63. iconURL, _ = url.AbsoluteURL(websiteURL, iconURL)
  64. }
  65. return iconURL, nil
  66. }
  67. func downloadIcon(iconURL string) (*model.Icon, error) {
  68. client := http.NewClient(iconURL)
  69. response, err := client.Get()
  70. if err != nil {
  71. return nil, fmt.Errorf("unable to download iconURL: %v", err)
  72. }
  73. if response.HasServerFailure() {
  74. return nil, fmt.Errorf("unable to download icon: status=%d", response.StatusCode)
  75. }
  76. body, err := ioutil.ReadAll(response.Body)
  77. if err != nil {
  78. return nil, fmt.Errorf("unable to read downloaded icon: %v", err)
  79. }
  80. if len(body) == 0 {
  81. return nil, fmt.Errorf("downloaded icon is empty, iconURL=%s", iconURL)
  82. }
  83. icon := &model.Icon{
  84. Hash: helper.HashFromBytes(body),
  85. MimeType: response.ContentType,
  86. Content: body,
  87. }
  88. return icon, nil
  89. }