finder.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package icon // import "miniflux.app/reader/icon"
  5. import (
  6. "encoding/base64"
  7. "fmt"
  8. "io"
  9. "strings"
  10. stdlib_url "net/url"
  11. "miniflux.app/config"
  12. "miniflux.app/crypto"
  13. "miniflux.app/http/client"
  14. "miniflux.app/logger"
  15. "miniflux.app/model"
  16. "miniflux.app/url"
  17. "github.com/PuerkitoBio/goquery"
  18. )
  19. // FindIcon try to find the website's icon.
  20. func FindIcon(websiteURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) {
  21. rootURL := url.RootURL(websiteURL)
  22. logger.Debug("[FindIcon] Trying to find an icon: rootURL=%q websiteURL=%q userAgent=%q", rootURL, websiteURL, userAgent)
  23. clt := client.NewClientWithConfig(rootURL, config.Opts)
  24. clt.WithUserAgent(userAgent)
  25. clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
  26. if fetchViaProxy {
  27. clt.WithProxy()
  28. }
  29. response, err := clt.Get()
  30. if err != nil {
  31. return nil, fmt.Errorf("icon: unable to download website index page: %v", err)
  32. }
  33. if response.HasServerFailure() {
  34. return nil, fmt.Errorf("icon: unable to download website index page: status=%d", response.StatusCode)
  35. }
  36. iconURL, err := parseDocument(rootURL, response.Body)
  37. if err != nil {
  38. return nil, err
  39. }
  40. if strings.HasPrefix(iconURL, "data:") {
  41. return parseImageDataURL(iconURL)
  42. }
  43. logger.Debug("[FindIcon] Fetching icon => %s", iconURL)
  44. icon, err := downloadIcon(iconURL, userAgent, fetchViaProxy, allowSelfSignedCertificates)
  45. if err != nil {
  46. return nil, err
  47. }
  48. return icon, nil
  49. }
  50. func parseDocument(websiteURL string, data io.Reader) (string, error) {
  51. queries := []string{
  52. "link[rel='shortcut icon']",
  53. "link[rel='Shortcut Icon']",
  54. "link[rel='icon shortcut']",
  55. "link[rel='icon']",
  56. }
  57. doc, err := goquery.NewDocumentFromReader(data)
  58. if err != nil {
  59. return "", fmt.Errorf("icon: unable to read document: %v", err)
  60. }
  61. var iconURL string
  62. for _, query := range queries {
  63. doc.Find(query).Each(func(i int, s *goquery.Selection) {
  64. if href, exists := s.Attr("href"); exists {
  65. iconURL = strings.TrimSpace(href)
  66. }
  67. })
  68. if iconURL != "" {
  69. break
  70. }
  71. }
  72. if iconURL == "" {
  73. iconURL = url.RootURL(websiteURL) + "favicon.ico"
  74. } else {
  75. iconURL, _ = url.AbsoluteURL(websiteURL, iconURL)
  76. }
  77. return iconURL, nil
  78. }
  79. func downloadIcon(iconURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) {
  80. clt := client.NewClientWithConfig(iconURL, config.Opts)
  81. clt.WithUserAgent(userAgent)
  82. clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
  83. if fetchViaProxy {
  84. clt.WithProxy()
  85. }
  86. response, err := clt.Get()
  87. if err != nil {
  88. return nil, fmt.Errorf("icon: unable to download iconURL: %v", err)
  89. }
  90. if response.HasServerFailure() {
  91. return nil, fmt.Errorf("icon: unable to download icon: status=%d", response.StatusCode)
  92. }
  93. body, err := io.ReadAll(response.Body)
  94. if err != nil {
  95. return nil, fmt.Errorf("icon: unable to read downloaded icon: %v", err)
  96. }
  97. if len(body) == 0 {
  98. return nil, fmt.Errorf("icon: downloaded icon is empty, iconURL=%s", iconURL)
  99. }
  100. icon := &model.Icon{
  101. Hash: crypto.HashFromBytes(body),
  102. MimeType: response.ContentType,
  103. Content: body,
  104. }
  105. return icon, nil
  106. }
  107. // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs#syntax
  108. // data:[<mediatype>][;base64],<data>
  109. func parseImageDataURL(value string) (*model.Icon, error) {
  110. var mediaType string
  111. var encoding string
  112. if !strings.HasPrefix(value, "data:") {
  113. return nil, fmt.Errorf(`icon: invalid data URL (missing data:) %q`, value)
  114. }
  115. value = value[5:]
  116. comma := strings.Index(value, ",")
  117. if comma < 0 {
  118. return nil, fmt.Errorf(`icon: invalid data URL (no comma) %q`, value)
  119. }
  120. data := value[comma+1:]
  121. semicolon := strings.Index(value[0:comma], ";")
  122. if semicolon > 0 {
  123. mediaType = value[0:semicolon]
  124. encoding = value[semicolon+1 : comma]
  125. } else {
  126. mediaType = value[0:comma]
  127. }
  128. if !strings.HasPrefix(mediaType, "image/") {
  129. return nil, fmt.Errorf(`icon: invalid media type %q`, mediaType)
  130. }
  131. var blob []byte
  132. switch encoding {
  133. case "base64":
  134. var err error
  135. blob, err = base64.StdEncoding.DecodeString(data)
  136. if err != nil {
  137. return nil, fmt.Errorf(`icon: invalid data %q (%v)`, value, err)
  138. }
  139. case "":
  140. decodedData, err := stdlib_url.QueryUnescape(data)
  141. if err != nil {
  142. return nil, fmt.Errorf(`icon: unable to decode data URL %q`, value)
  143. }
  144. blob = []byte(decodedData)
  145. default:
  146. return nil, fmt.Errorf(`icon: unsupported data URL encoding %q`, value)
  147. }
  148. if len(blob) == 0 {
  149. return nil, fmt.Errorf(`icon: empty data URL %q`, value)
  150. }
  151. icon := &model.Icon{
  152. Hash: crypto.HashFromBytes(blob),
  153. Content: blob,
  154. MimeType: mediaType,
  155. }
  156. return icon, nil
  157. }