Przeglądaj źródła

Rework the behavior of favicon search (#5839)

* Rework the behavior of favicon search

Use xpath queries to reduce the filtering necessary.

Don't call checkUrl since the href value will not always be a full URL.

Check for a base path in the HTML to be used if the favicon href doesn't
start with a leading slash.

* Break long xpath query into two lines

* Fix issues found by CI
Dan Hersam 2 lat temu
rodzic
commit
57f46922e8
1 zmienionych plików z 35 dodań i 28 usunięć
  1. 35 28
      lib/favicons.php

+ 35 - 28
lib/favicons.php

@@ -68,34 +68,41 @@ function downloadHttp(string &$url, array $curlOptions = []): string {
 function searchFavicon(string &$url): string {
 	$dom = new DOMDocument();
 	$html = downloadHttp($url);
-	if ($html != '' && @$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
-		$rels = array('shortcut icon', 'icon');
-		$links = $dom->getElementsByTagName('link');
-		foreach ($rels as $rel) {
-			foreach ($links as $link) {
-				if ($link->hasAttribute('rel') && $link->hasAttribute('href') &&
-					strtolower(trim($link->getAttribute('rel'))) === $rel) {
-					$href = trim($link->getAttribute('href'));
-					if (substr($href, 0, 2) === '//') {
-						// Case of protocol-relative URLs
-						if (preg_match('%^(https?:)//%i', $url, $matches) === 1) {
-							$href = $matches[1] . $href;
-						} else {
-							$href = 'https:' . $href;
-						}
-					}
-					$checkUrl = checkUrl($href, false);
-					if (is_string($checkUrl)) {
-						$href = SimplePie_IRI::absolutize($url, $href);
-					}
-					$favicon = downloadHttp($href, array(
-							CURLOPT_REFERER => $url,
-						));
-					if (isImgMime($favicon)) {
-						return $favicon;
-					}
-				}
-			}
+
+	if ($html == '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
+		return '';
+	}
+
+	$xpath = new DOMXPath($dom);
+	$links = $xpath->query('//link[@href][translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="shortcut icon"'
+		. ' or translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="icon"]');
+
+	if (!$links) {
+		return '';
+	}
+
+	// Use the base element for relative paths, if there is one
+	$baseElements = $xpath->query('//base[@href]');
+	$baseElement = ($baseElements !== false && $baseElements->length > 0) ? $baseElements->item(0) : null;
+	$baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $url;
+
+	foreach ($links as $link) {
+		if (!$link instanceof DOMElement) {
+			continue;
+		}
+		$href = trim($link->getAttribute('href'));
+		$urlParts = parse_url($url);
+
+		// Handle protocol-relative URLs by adding the current URL's scheme
+		if (substr($href, 0, 2) === '//') {
+			$href = ($urlParts['scheme'] ?? 'https') . '://' . $href;
+		}
+
+		$href = SimplePie_IRI::absolutize($baseUrl, $href);
+
+		$favicon = downloadHttp($href, array(CURLOPT_REFERER => $url));
+		if (isImgMime($favicon)) {
+			return $favicon;
 		}
 	}
 	return '';