Jelajahi Sumber

Merge pull request #1502 from Alkarex/Favicon1.2.0

Update to version 1.2 of Favicon library
Alexandre Alapetite 9 tahun lalu
induk
melakukan
6d5fb27f97
5 mengubah file dengan 199 tambahan dan 98 penghapusan
  1. 1 0
      CHANGELOG.md
  2. 27 25
      lib/Favicon/DataAccess.php
  3. 139 43
      lib/Favicon/Favicon.php
  4. 23 0
      lib/Favicon/FaviconDLType.php
  5. 9 30
      lib/favicons.php

+ 1 - 0
CHANGELOG.md

@@ -23,6 +23,7 @@
 	* Improve English [#1465](https://github.com/FreshRSS/FreshRSS/pull/1465)
 * Misc.
 	* Fall back to article URL when the article GUID is empty [#1482](https://github.com/FreshRSS/FreshRSS/issues/1482)
+	* Update to version 1.2 of Favicon library [#1501](https://github.com/FreshRSS/FreshRSS/issues/1501) 
 
 
 ## 2017-03-11 FreshRSS 1.6.3

+ 27 - 25
lib/Favicon/DataAccess.php

@@ -9,33 +9,35 @@ namespace Favicon;
  **/
 class DataAccess {
 	public function retrieveUrl($url) {
-		$this->set_context();
-		return @file_get_contents($url);
+	    $this->set_context();
+	    return @file_get_contents($url);
 	}
-
+	
 	public function retrieveHeader($url) {
-		$this->set_context();
+	    $this->set_context();
 		$headers = @get_headers($url, 1);
 		return is_array($headers) ? array_change_key_case($headers) : array();
 	}
-
-	public function saveCache($file, $data) {
-		file_put_contents($file, $data);
-	}
-
-	public function readCache($file) {
-		return file_get_contents($file);
-	}
-
-	private function set_context() {
-		stream_context_set_default(
-			array(
-				'http' => array(
-					'method' => 'GET',
-					'timeout' => 10,
-					'header' => "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:20.0; Favicon; +https://github.com/ArthurHoaro/favicon) Gecko/20100101 Firefox/32.0\r\n",
-				)
-			)
-		);
-	}
-}
+	
+    public function saveCache($file, $data) {
+        file_put_contents($file, $data);
+    }
+    
+    public function readCache($file) {
+    	return file_get_contents($file);
+    }
+    
+    private function set_context() {
+        stream_context_set_default(
+            array(
+                'http' => array(
+                    'method' => 'GET',
+                    'follow_location' => 0,
+                    'max_redirects' => 1,
+                    'timeout' => 10,
+                    'header' => "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:20.0; Favicon; +https://github.com/ArthurHoaro/favicon) Gecko/20100101 Firefox/32.0\r\n",
+                )
+            )
+        );
+    }
+}

+ 139 - 43
lib/Favicon/Favicon.php

@@ -4,6 +4,8 @@ namespace Favicon;
 
 class Favicon
 {
+    protected static $TYPE_CACHE_URL = 'url';
+    protected static $TYPE_CACHE_IMG = 'img';
     protected $url = '';
     protected $cacheDir;
     protected $cacheTimeout;
@@ -16,18 +18,24 @@ class Favicon
         }
         
         $this->cacheDir = __DIR__ . '/../../resources/cache';
+        $this->cacheTimeout = 604800;
         $this->dataAccess = new DataAccess();
     }
 
+    /**
+     * Set cache settings:
+     *   - dir: cache directory
+     *   - timeout: in seconds
+     *
+     * @param array $args
+     */
     public function cache($args = array()) {
         if (isset($args['dir'])) {
             $this->cacheDir = $args['dir'];
         }
 
         if (!empty($args['timeout'])) {
-                $this->cacheTimeout = $args['timeout'];
-        } else {
-                $this->cacheTimeout = 0;
+            $this->cacheTimeout = $args['timeout'];
         }
     }
 
@@ -103,6 +111,9 @@ class Favicon
                 case '301':
                 case '302':
                     $url = isset($headers['location']) ? $headers['location'] : '';
+                    if (is_array($url)) {
+                        $url = end($url);
+                    }
                     break;
                 default:
                     $loop = FALSE;
@@ -120,9 +131,16 @@ class Favicon
 
     /**
      * Find remote (or cached) favicon
-     * @return favicon URL, false if nothing was found
-     **/
-    public function get($url = '')
+     *
+     * @param string $url  to look for a favicon
+     * @param int    $type type of retrieval (FaviconDLType):
+     *                       - HOTLINK_URL: returns remote URL
+     *                       - DL_FILE_PATH: returns file path of the favicon downloaded locally
+     *                       - RAW_IMAGE: returns the favicon image binary string
+     *
+     * @return string|bool favicon URL, false if nothing was found
+     */
+    public function get($url = '', $type = FaviconDLType::HOTLINK_URL)
     {
         // URLs passed to this method take precedence.
         if (!empty($url)) {
@@ -130,25 +148,30 @@ class Favicon
         }
 
         // Get the base URL without the path for clearer concatenations.
-        $original = rtrim($this->baseUrl($this->url, true), '/');
-        $url = rtrim($this->endRedirect($this->baseUrl($this->url, false)), '/');
-
-        if(($favicon = $this->checkCache($url)) || ($favicon = $this->getFavicon($url))) {
-            $base = true;
-        }
-        elseif(($favicon = $this->checkCache($original)) || ($favicon = $this->getFavicon($original, false))) {
-            $base = false;    
+        $url = rtrim($this->baseUrl($this->url, true), '/');
+        $original = $url;
+        if (($favicon = $this->checkCache($original, self::$TYPE_CACHE_URL)) === false
+            && ! $favicon = $this->getFavicon($original, false)
+        ) {
+            $url = rtrim($this->endRedirect($this->baseUrl($this->url, false)), '/');
+            if (($favicon = $this->checkCache($url, self::$TYPE_CACHE_URL)) === false
+                && ! $favicon = $this->getFavicon($url)
+            ) {
+                $url = $original;
+            }
         }
-        else
-            return false;
-            
-        // Save cache if necessary
-        $cache = $this->cacheDir . '/' . md5($base ? $url : $original);
-        if ($this->cacheTimeout && !file_exists($cache) || (is_writable($cache) && time() - filemtime($cache) > $this->cacheTimeout)) {
-            $this->dataAccess->saveCache($cache, $favicon);
+
+        $this->saveCache($url, $favicon, self::$TYPE_CACHE_URL);
+
+        switch ($type) {
+            case FaviconDLType::DL_FILE_PATH:
+                return $this->getImage($url, $favicon, false);
+            case FaviconDLType::RAW_IMAGE:
+                return $this->getImage($url, $favicon, true);
+            case FaviconDLType::HOTLINK_URL:
+            default:
+                return empty($favicon) ? false : $favicon;
         }
-        
-        return $favicon;
     }
     
     private function getFavicon($url, $checkDefault = true) {
@@ -168,7 +191,10 @@ class Favicon
 
         // See if it's specified in a link tag in domain url.
         if (!$favicon) {
-            $favicon = $this->getInPage($url);
+            $favicon = trim($this->getInPage($url));
+        }
+        if (substr($favicon, 0, 2) === '//') {
+            $favicon = 'https:' . $favicon;
         }
         
         // Make sure the favicon is an absolute URL.
@@ -185,7 +211,48 @@ class Favicon
 
         return $favicon;
     }
-    
+
+    /**
+     * Find remote favicon and return it as an image
+     */
+    private function getImage($url, $faviconUrl = '', $image = false)
+    {
+        if (empty($faviconUrl)) {
+            return false;
+        }
+
+        $favicon = $this->checkCache($url, self::$TYPE_CACHE_IMG);
+        // Favicon not found in the cache
+        if( $favicon === false ) {
+            $favicon = $this->dataAccess->retrieveUrl($faviconUrl);
+            // Definitely not found
+            if (!$this->checkImageMTypeContent($favicon)) {
+                return false;
+            } else {
+                $this->saveCache($url, $favicon, self::$TYPE_CACHE_IMG);
+            }
+        }
+
+        if( $image ) {
+            return $favicon;
+        }
+        else
+            return self::$TYPE_CACHE_IMG . md5($url);
+    }
+
+    /**
+     * Display data as a PNG Favicon, then exit
+     * @param $data
+     */
+    private function displayFavicon($data) {
+        header('Content-Type: image/png');
+        header('Cache-Control: private, max-age=10800, pre-check=10800');
+        header('Pragma: private');
+        header('Expires: ' . date(DATE_RFC822,strtotime('7 day')));
+        echo $data;
+        exit;
+    }
+
     private function getInPage($url) {
         $html = $this->dataAccess->retrieveUrl("{$url}/");
         preg_match('!<head.*?>.*</head>!ims', $html, $match);
@@ -197,48 +264,77 @@ class Favicon
         $head = $match[0];
         
         $dom = new \DOMDocument();
-        // Use error supression, because the HTML might be too malformed.
+        // Use error suppression, because the HTML might be too malformed.
         if (@$dom->loadHTML($head)) {
             $links = $dom->getElementsByTagName('link');
             foreach ($links as $link) {
                 if ($link->hasAttribute('rel') && strtolower($link->getAttribute('rel')) == 'shortcut icon') {
                     return $link->getAttribute('href');
-                } elseif ($link->hasAttribute('rel') && strtolower($link->getAttribute('rel')) == 'icon') {
+                }
+            }
+            foreach ($links as $link) {
+                if ($link->hasAttribute('rel') && strtolower($link->getAttribute('rel')) == 'icon') {
                     return $link->getAttribute('href');
-                } elseif ($link->hasAttribute('href') && strpos($link->getAttribute('href'), 'favicon') !== FALSE) {
+                }
+            }
+            foreach ($links as $link) {
+                if ($link->hasAttribute('href') && strpos($link->getAttribute('href'), 'favicon') !== FALSE) {
                     return $link->getAttribute('href');
                 }
             }
         }
         return false;
     }
-    
-    private function checkCache($url) {
+
+    private function checkCache($url, $type) {
         if ($this->cacheTimeout) {
-            $cache = $this->cacheDir . '/' . md5($url);
-            if (file_exists($cache) && is_readable($cache) && (time() - filemtime($cache) < $this->cacheTimeout)) {
+            $cache = $this->cacheDir . '/'. $type . md5($url);
+            if (file_exists($cache) && is_readable($cache)
+                && ($this->cacheTimeout === -1 || time() - filemtime($cache) < $this->cacheTimeout)
+            ) {
                 return $this->dataAccess->readCache($cache);
             }
-        } 
+        }
         return false;
     }
-    
+
+    /**
+     * Will save data in cacheDir if the directory writable and any previous cache is expired (cacheTimeout)
+     * @param $url
+     * @param $data
+     * @param $type
+     * @return string cache file path
+     */
+    private function saveCache($url, $data, $type) {
+        // Save cache if necessary
+        $cache = $this->cacheDir . '/'. $type . md5($url);
+        if ($this->cacheTimeout && !file_exists($cache)
+            || (is_writable($cache) && $this->cacheTimeout !== -1 && time() - filemtime($cache) > $this->cacheTimeout)
+        ) {
+            $this->dataAccess->saveCache($cache, $data);
+        }
+        return $cache;
+    }
+
     private function checkImageMType($url) {
-        $tmpFile = $this->cacheDir . '/tmp.ico';
         
         $fileContent = $this->dataAccess->retrieveUrl($url);
-        $this->dataAccess->saveCache($tmpFile, $fileContent);
         
+        return $this->checkImageMTypeContent($fileContent);
+    }
+
+    private function checkImageMTypeContent($content) {
+        if(empty($content)) return false;
+
         $isImage = true;
         try {
-            $finfo = finfo_open(FILEINFO_MIME_TYPE);
-            $isImage = strpos(finfo_file($finfo, $tmpFile), 'image') !== false;
-            finfo_close($finfo);
-        } catch (Exception $e) {
+            $fInfo = finfo_open(FILEINFO_MIME_TYPE);
+            $isImage = strpos(finfo_buffer($fInfo, $content), 'image') !== false;
+            finfo_close($fInfo);
+        } catch (\Exception $e) {
+            error_log('Favicon checkImageMTypeContent error: ' . $e->getMessage());
         }
 
-        unlink($tmpFile);
-        
         return $isImage;
     }
     
@@ -291,7 +387,7 @@ class Favicon
     }
 
     /**
-     * @param DataAccess $dataAccess
+     * @param DataAccess|\PHPUnit_Framework_MockObject_MockObject $dataAccess
      */
     public function setDataAccess($dataAccess)
     {

+ 23 - 0
lib/Favicon/FaviconDLType.php

@@ -0,0 +1,23 @@
+<?php
+
+
+namespace Favicon;
+
+
+interface FaviconDLType
+{
+    /**
+     * Retrieve remote favicon URL.
+     */
+    const HOTLINK_URL = 0;
+
+    /**
+     * Retrieve downloaded favicon path (requires cache).
+     */
+    const DL_FILE_PATH = 1;
+
+    /**
+     * Retrieve the image content as a binary string.
+     */
+    const RAW_IMAGE = 2;
+}

+ 9 - 30
lib/favicons.php

@@ -1,43 +1,22 @@
 <?php
 
-include(LIB_PATH . '/Favicon/Favicon.php');
+include(LIB_PATH . '/Favicon/FaviconDLType.php');
 include(LIB_PATH . '/Favicon/DataAccess.php');
+include(LIB_PATH . '/Favicon/Favicon.php');
 
 $favicons_dir = DATA_PATH . '/favicons/';
 $default_favicon = PUBLIC_PATH . '/themes/icons/default_favicon.ico';
 
 function download_favicon($website, $dest) {
-	global $favicons_dir, $default_favicon;
+	global $default_favicon;
 
 	syslog(LOG_INFO, 'FreshRSS Favicon discovery GET ' . $website);
 	$favicon_getter = new \Favicon\Favicon();
-	$favicon_getter->setCacheDir($favicons_dir);
-	$favicon_url = $favicon_getter->get($website);
-
-	if ($favicon_url === false) {
-		return @copy($default_favicon, $dest);
-	}
-
-	syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $favicon_url);
-	$c = curl_init($favicon_url);
-	curl_setopt($c, CURLOPT_HEADER, false);
-	curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
-	curl_setopt($c, CURLOPT_BINARYTRANSFER, true);
-	curl_setopt($c, CURLOPT_USERAGENT, 'FreshRSS/' . FRESHRSS_VERSION . ' (' . PHP_OS . '; ' . FRESHRSS_WEBSITE . ')');
-	$img_raw = curl_exec($c);
-	$status_code = curl_getinfo($c, CURLINFO_HTTP_CODE);
-	curl_close($c);
-
-	if ($status_code === 200) {
-		$file = fopen($dest, 'w');
-		if ($file !== false) {
-			fwrite($file, $img_raw);
-			fclose($file);
-			return true;
-		}
-	} else {
-		syslog(LOG_WARNING, 'FreshRSS Favicon GET ' . $favicon_url . ' error ' . $status_code);
-	}
+	$tmpPath = realpath(TMP_PATH);
+	$favicon_getter->setCacheDir($tmpPath);
+	$favicon_getter->setCacheTimeout(-1);
+	$favicon_path = $favicon_getter->get($website, \Favicon\FaviconDLType::DL_FILE_PATH);
 
-	return false;
+	return ($favicon_path != false && @rename($tmpPath . '/' . $favicon_path, $dest)) ||
+		@copy($default_favicon, $dest);
 }