Explorar el Código

Encore plus de flux tolérés avec leurs erreurs

Corrige https://github.com/marienfressinaud/FreshRSS/issues/332
Alexandre Alapetite hace 12 años
padre
commit
7e6d2eb6f4
Se han modificado 3 ficheros con 28 adiciones y 16 borrados
  1. 1 0
      CHANGELOG
  2. 7 6
      lib/SimplePie/SimplePie.php
  3. 20 10
      lib/SimplePie/SimplePie/Parser.php

+ 1 - 0
CHANGELOG

@@ -43,6 +43,7 @@
 * PHP :
 	* Meilleure gestion des caractères spéciaux dans différents cas
 	* Amélioration des performances
+	* Encore plus tolérant pour les flux comportant des erreurs
 	* Chargement automatique des classes
 	* Alternative dans le cas d’absence de librairie JSON
 	* Pour le développement, le cache HTTP peut être désactivé en créant un fichier “./no-cache.txt”

+ 7 - 6
lib/SimplePie/SimplePie.php

@@ -1313,7 +1313,7 @@ class SimplePie
 		// First check to see if input has been overridden.
 		if ($this->input_encoding !== false)
 		{
-			$encodings[] = $this->input_encoding;
+			$encodings[] = strtoupper($this->input_encoding);
 		}
 
 		$application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity');
@@ -1330,18 +1330,18 @@ class SimplePie
 				}
 				else
 				{
-					$encodings[] = '';	//Let the DOM parser decide first
+					$encodings[] = '';	//FreshRSS: Let the DOM parser decide first
 				}
 			}
 			elseif (in_array($sniffed, $text_types) || substr($sniffed, 0, 5) === 'text/' && substr($sniffed, -4) === '+xml')
 			{
 				if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset))
 				{
-					$encodings[] = $charset[1];
+					$encodings[] = strtoupper($charset[1]);
 				}
 				else
 				{
-					$encodings[] = '';
+					$encodings[] = '';	//FreshRSS: Let the DOM parser decide first
 				}
 				$encodings[] = 'US-ASCII';
 			}
@@ -1364,13 +1364,14 @@ class SimplePie
 		foreach ($encodings as $encoding)
 		{
 			// Change the encoding to UTF-8 (as we always use UTF-8 internally)
-			if ($utf8_data = (empty($encoding) || $encoding === 'UTF-8') ? $this->raw_data : $this->registry->call('Misc', 'change_encoding', array($this->raw_data, $encoding, 'UTF-8')))
+			if ($utf8_data = (empty($encoding) || $encoding === 'UTF-8') ? $this->raw_data :	//FreshRSS
+				$this->registry->call('Misc', 'change_encoding', array($this->raw_data, $encoding, 'UTF-8')))
 			{
 				// Create new parser
 				$parser = $this->registry->create('Parser');
 
 				// If it's parsed fine
-				if ($parser->parse($utf8_data, 'UTF-8'))
+				if ($parser->parse($utf8_data, empty($encoding) ? '' : 'UTF-8'))	//FreshRSS
 				{
 					$this->data = $parser->get_data();
 					if (!($this->get_type() & ~SIMPLEPIE_TYPE_NONE))

+ 20 - 10
lib/SimplePie/SimplePie/Parser.php

@@ -77,6 +77,8 @@ class SimplePie_Parser
 
 	public function parse(&$data, $encoding)
 	{
+		$xmlEncoding = '';
+
 		if (!empty($encoding))
 		{
 			// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
@@ -121,6 +123,7 @@ class SimplePie_Parser
 				$declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
 				if ($declaration->parse())
 				{
+					$xmlEncoding = strtoupper($declaration->encoding);	//FreshRSS
 					$data = substr($data, $pos + 2);
 					$data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
 				}
@@ -132,17 +135,24 @@ class SimplePie_Parser
 			}
 		}
 
-		try	//FreshRSS
-		{
-			$dom = new DOMDocument();
-			$dom->recover = true;
-			$dom->strictErrorChecking = false;
-			$dom->loadXML($data);
-			$this->encoding = $encoding = $dom->encoding = 'UTF-8';
-			$data = $dom->saveXML();
-		}
-		catch (Exception $e)
+		if ($xmlEncoding === '' || $xmlEncoding === 'UTF-8')	//FreshRSS: case of no explicit HTTP encoding, and lax UTF-8
 		{
+			try
+			{
+				$dom = new DOMDocument();
+				$dom->recover = true;
+				$dom->strictErrorChecking = false;
+				$dom->loadXML($data);
+				$this->encoding = $encoding = $dom->encoding = 'UTF-8';
+				$data2 = $dom->saveXML();
+				if (strlen($data2) > (strlen($data) / 2.0))
+				{
+					$data = $data2;
+				}
+			}
+			catch (Exception $e)
+			{
+			}
 		}
 
 		$return = true;