favicons.php 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. <?php
  2. declare(strict_types=1);
  3. const FAVICONS_DIR = DATA_PATH . '/favicons/';
  4. const DEFAULT_FAVICON = PUBLIC_PATH . '/themes/icons/default_favicon.ico';
  5. function isImgMime(string $content): bool {
  6. //Based on https://github.com/ArthurHoaro/favicon/blob/3a4f93da9bb24915b21771eb7873a21bde26f5d1/src/Favicon/Favicon.php#L311-L319
  7. if ($content == '') {
  8. return false;
  9. }
  10. if (!extension_loaded('fileinfo')) {
  11. return true;
  12. }
  13. $fInfo = finfo_open(FILEINFO_MIME_TYPE);
  14. if ($fInfo === false) {
  15. return true;
  16. }
  17. $content = finfo_buffer($fInfo, $content);
  18. $isImage = str_contains($content ?: '', 'image');
  19. finfo_close($fInfo);
  20. return $isImage;
  21. }
  22. /** @param array<int,int|bool|string> $curlOptions */
  23. function downloadHttp(string &$url, array $curlOptions = []): string {
  24. if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) {
  25. Minz_Log::warning('For that domain, will first retry favicon after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url));
  26. return '';
  27. }
  28. syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url);
  29. $url2 = checkUrl($url);
  30. if ($url2 == false) {
  31. return '';
  32. }
  33. $url = $url2;
  34. $ch = curl_init($url);
  35. if ($ch === false) {
  36. return '';
  37. }
  38. curl_setopt_array($ch, [
  39. CURLOPT_HEADER => true,
  40. CURLOPT_RETURNTRANSFER => true,
  41. CURLOPT_TIMEOUT => 15,
  42. CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
  43. CURLOPT_MAXREDIRS => 10,
  44. CURLOPT_FOLLOWLOCATION => true,
  45. CURLOPT_ENCODING => '', //Enable all encodings
  46. //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers
  47. ]);
  48. FreshRSS_Context::initSystem();
  49. if (FreshRSS_Context::hasSystemConf()) {
  50. curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
  51. }
  52. curl_setopt_array($ch, $curlOptions);
  53. $response = curl_exec($ch);
  54. $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  55. $c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
  56. curl_close($ch);
  57. $parser = new \SimplePie\HTTP\Parser(is_string($response) ? $response : '');
  58. if ($parser->parse()) {
  59. $headers = $parser->headers;
  60. $body = $parser->body;
  61. } else {
  62. $headers = [];
  63. $body = false;
  64. }
  65. if (in_array($c_status, [429, 503], true)) {
  66. $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? '');
  67. if ($c_status === 429) {
  68. $errorMessage = 'HTTP 429 Too Many Requests! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
  69. } elseif ($c_status === 503) {
  70. $errorMessage = 'HTTP 503 Service Unavailable! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
  71. }
  72. if ($retryAfter > 0) {
  73. $errorMessage .= ' We may retry after ' . date('c', $retryAfter);
  74. }
  75. }
  76. $url2 = checkUrl($c_effective_url);
  77. if ($url2 != false) {
  78. $url = $url2; //Possible redirect
  79. }
  80. return $c_status === 200 && is_string($body) ? $body : '';
  81. }
  82. function searchFavicon(string &$url): string {
  83. $dom = new DOMDocument();
  84. $html = downloadHttp($url);
  85. if ($html == '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
  86. return '';
  87. }
  88. $xpath = new DOMXPath($dom);
  89. $links = $xpath->query('//link[@href][translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="shortcut icon"'
  90. . ' or translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="icon"]');
  91. if (!($links instanceof DOMNodeList)) {
  92. return '';
  93. }
  94. // Use the base element for relative paths, if there is one
  95. $baseElements = $xpath->query('//base[@href]');
  96. $baseElement = ($baseElements !== false && $baseElements->length > 0) ? $baseElements->item(0) : null;
  97. $baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $url;
  98. foreach ($links as $link) {
  99. if (!$link instanceof DOMElement) {
  100. continue;
  101. }
  102. $href = trim($link->getAttribute('href'));
  103. $urlParts = parse_url($url);
  104. // Handle protocol-relative URLs by adding the current URL's scheme
  105. if (substr($href, 0, 2) === '//') {
  106. $href = ($urlParts['scheme'] ?? 'https') . ':' . $href;
  107. }
  108. $href = \SimplePie\IRI::absolutize($baseUrl, $href);
  109. if ($href == false) {
  110. return '';
  111. }
  112. $iri = $href->get_iri();
  113. if ($iri == false) {
  114. return '';
  115. }
  116. $favicon = downloadHttp($iri, [CURLOPT_REFERER => $url]);
  117. if (isImgMime($favicon)) {
  118. return $favicon;
  119. }
  120. }
  121. return '';
  122. }
  123. function download_favicon(string $url, string $dest): bool {
  124. $url = trim($url);
  125. $favicon = searchFavicon($url);
  126. if ($favicon == '') {
  127. $rootUrl = preg_replace('%^(https?://[^/]+).*$%i', '$1/', $url) ?? $url;
  128. if ($rootUrl != $url) {
  129. $url = $rootUrl;
  130. $favicon = searchFavicon($url);
  131. }
  132. if ($favicon == '') {
  133. $link = $rootUrl . 'favicon.ico';
  134. $favicon = downloadHttp($link, [CURLOPT_REFERER => $url]);
  135. if (!isImgMime($favicon)) {
  136. $favicon = '';
  137. }
  138. }
  139. }
  140. return ($favicon != '' && file_put_contents($dest, $favicon) > 0) ||
  141. @copy(DEFAULT_FAVICON, $dest);
  142. }
  143. function contentType(string $ico): string {
  144. $ico_content_type = 'image/x-icon';
  145. if (function_exists('mime_content_type')) {
  146. $ico_content_type = mime_content_type($ico) ?: $ico_content_type;
  147. }
  148. switch ($ico_content_type) {
  149. case 'image/svg':
  150. $ico_content_type = 'image/svg+xml';
  151. break;
  152. }
  153. return $ico_content_type;
  154. }