favicons.php 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. <?php
  2. declare(strict_types=1);
  3. const FAVICONS_DIR = DATA_PATH . '/favicons/';
  4. const DEFAULT_FAVICON = PUBLIC_PATH . '/themes/icons/default_favicon.ico';
  5. function isImgMime(string $content): bool {
  6. //Based on https://github.com/ArthurHoaro/favicon/blob/3a4f93da9bb24915b21771eb7873a21bde26f5d1/src/Favicon/Favicon.php#L311-L319
  7. if ($content == '') {
  8. return false;
  9. }
  10. if (!extension_loaded('fileinfo')) {
  11. return true;
  12. }
  13. $isImage = true;
  14. /** @var finfo $fInfo */
  15. $fInfo = finfo_open(FILEINFO_MIME_TYPE);
  16. /** @var string $content */
  17. $content = finfo_buffer($fInfo, $content);
  18. $isImage = strpos($content, 'image') !== false;
  19. finfo_close($fInfo);
  20. return $isImage;
  21. }
  22. /** @param array<int,int|bool> $curlOptions */
  23. function downloadHttp(string &$url, array $curlOptions = []): string {
  24. syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url);
  25. $url2 = checkUrl($url);
  26. if ($url2 == false) {
  27. return '';
  28. }
  29. $url = $url2;
  30. /** @var CurlHandle $ch */
  31. $ch = curl_init($url);
  32. curl_setopt_array($ch, [
  33. CURLOPT_RETURNTRANSFER => true,
  34. CURLOPT_TIMEOUT => 15,
  35. CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
  36. CURLOPT_MAXREDIRS => 10,
  37. CURLOPT_FOLLOWLOCATION => true,
  38. CURLOPT_ENCODING => '', //Enable all encodings
  39. //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers
  40. ]);
  41. FreshRSS_Context::initSystem();
  42. if (FreshRSS_Context::hasSystemConf()) {
  43. curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
  44. }
  45. curl_setopt_array($ch, $curlOptions);
  46. $response = curl_exec($ch);
  47. if (!is_string($response)) {
  48. $response = '';
  49. }
  50. $info = curl_getinfo($ch);
  51. curl_close($ch);
  52. if (!empty($info['url'])) {
  53. $url2 = checkUrl($info['url']);
  54. if ($url2 != false) {
  55. $url = $url2; //Possible redirect
  56. }
  57. }
  58. return $info['http_code'] == 200 ? $response : '';
  59. }
  60. function searchFavicon(string &$url): string {
  61. $dom = new DOMDocument();
  62. $html = downloadHttp($url);
  63. if ($html == '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
  64. return '';
  65. }
  66. $xpath = new DOMXPath($dom);
  67. $links = $xpath->query('//link[@href][translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="shortcut icon"'
  68. . ' or translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="icon"]');
  69. if (!($links instanceof DOMNodeList)) {
  70. return '';
  71. }
  72. // Use the base element for relative paths, if there is one
  73. $baseElements = $xpath->query('//base[@href]');
  74. $baseElement = ($baseElements !== false && $baseElements->length > 0) ? $baseElements->item(0) : null;
  75. $baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $url;
  76. foreach ($links as $link) {
  77. if (!$link instanceof DOMElement) {
  78. continue;
  79. }
  80. $href = trim($link->getAttribute('href'));
  81. $urlParts = parse_url($url);
  82. // Handle protocol-relative URLs by adding the current URL's scheme
  83. if (substr($href, 0, 2) === '//') {
  84. $href = ($urlParts['scheme'] ?? 'https') . ':' . $href;
  85. }
  86. $href = \SimplePie\IRI::absolutize($baseUrl, $href);
  87. if ($href == false) {
  88. return '';
  89. }
  90. $iri = $href->get_iri();
  91. $favicon = downloadHttp($iri, [CURLOPT_REFERER => $url]);
  92. if (isImgMime($favicon)) {
  93. return $favicon;
  94. }
  95. }
  96. return '';
  97. }
  98. function download_favicon(string $url, string $dest): bool {
  99. $url = trim($url);
  100. $favicon = searchFavicon($url);
  101. if ($favicon == '') {
  102. $rootUrl = preg_replace('%^(https?://[^/]+).*$%i', '$1/', $url);
  103. if ($rootUrl != $url) {
  104. $url = $rootUrl;
  105. $favicon = searchFavicon($url);
  106. }
  107. if ($favicon == '') {
  108. $link = $rootUrl . 'favicon.ico';
  109. $favicon = downloadHttp($link, [CURLOPT_REFERER => $url]);
  110. if (!isImgMime($favicon)) {
  111. $favicon = '';
  112. }
  113. }
  114. }
  115. return ($favicon != '' && file_put_contents($dest, $favicon) > 0) ||
  116. @copy(DEFAULT_FAVICON, $dest);
  117. }
  118. function contentType(string $ico): string {
  119. $ico_content_type = 'image/x-icon';
  120. if (function_exists('mime_content_type')) {
  121. $ico_content_type = mime_content_type($ico) ?: $ico_content_type;
  122. }
  123. switch ($ico_content_type) {
  124. case 'image/svg':
  125. $ico_content_type = 'image/svg+xml';
  126. break;
  127. }
  128. return $ico_content_type;
  129. }