Ver código fonte

Get content by parsing: fix base for protocol-relative URLs (#4500)

* Get content by parsing: fix base for protocol-relative URLs

* Guess missing URL scheme for base

* Light refactoring
Alexandre Alapetite 3 anos atrás
pai
commit
96e0efa6f0
1 arquivos alterados com 7 adições e 3 exclusões
  1. 7 3
      app/Models/Entry.php

+ 7 - 3
app/Models/Entry.php

@@ -547,9 +547,13 @@ class FreshRSS_Entry extends Minz_Model {
 			}
 
 			$base = $xpath->evaluate('normalize-space(//base/@href)');
-			if ($base != false && is_string($base)) {
-				$url = $base;
+			if ($base == false || !is_string($base)) {
+				$base = $url;
+			} elseif (substr($base, 0, 2) === '//') {
+				//Protocol-relative URLs "//www.example.net"
+				$base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base;
 			}
+
 			$content = '';
 			$nodes = $xpath->query(new Gt\CssXPath\Translator($path));
 			if ($nodes != false) {
@@ -557,7 +561,7 @@ class FreshRSS_Entry extends Minz_Model {
 					$content .= $doc->saveHtml($node) . "\n";
 				}
 			}
-			$html = trim(sanitizeHTML($content, $url));
+			$html = trim(sanitizeHTML($content, $base));
 			return $html;
 		} else {
 			throw new Exception();