Forráskód Böngészése

Fix: handle very big feed (#3416)

* fix: handle big xml files which cause out of memory exceptions by working with chunks in cleanMd5 function (because of preg_replace) and parse (because of xml_parse)

* Review

* Fixes in error handling (case of the last call to xml_parse, case of
error during fopen, break in case of XML error...)
* Takes advantage of the chunking for computing the cache hash
* Larger chunks of 1MB

Co-authored-by: e <bokes74743@tjuln.com>
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
Kiblyn11 5 éve
szülő
commit
0e6ad01dbf
2 módosított fájl, 38 hozzáadás és 9 törlés
  1. 18 6
      lib/SimplePie/SimplePie.php
  2. 20 3
      lib/SimplePie/SimplePie/Parser.php

+ 18 - 6
lib/SimplePie/SimplePie.php

@@ -1322,12 +1322,24 @@ class SimplePie
 
 	function cleanMd5($rss)
 	{
-		return md5(preg_replace(array(
-			'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
-			'#<(media:starRating|media:statistics) [^/<>]+/>#',
-			'#<!--.+?-->#s',
-			), '', $rss));
-		
+		//Process by chunks not to use too much memory
+		if (($stream = fopen('php://temp', 'r+')) &&
+			fwrite($stream, $rss) &&
+			rewind($stream))
+		{
+			$ctx = hash_init('md5');
+			while ($stream_data = fread($stream, 1048576))
+			{
+				hash_update($ctx, preg_replace([
+					'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
+					'#<(media:starRating|media:statistics) [^/<>]+/>#',
+					'#<!--.+?-->#s',
+				], '', $stream_data));
+			}
+			fclose($stream);
+			return hash_final($ctx);
+		}
+		return '';
 	}
 
 	/**

+ 20 - 3
lib/SimplePie/SimplePie/Parser.php

@@ -181,12 +181,29 @@ class SimplePie_Parser
 			xml_set_element_handler($xml, 'tag_open', 'tag_close');
 
 			// Parse!
-			if (!xml_parse($xml, $data, true))
+			if (($stream = fopen('php://temp', 'r+')) &&
+				fwrite($stream, $data) &&
+				rewind($stream))
+			{
+				//Parse by chunks not to use too much memory
+				do
+				{
+					$stream_data = fread($stream, 1048576);
+					if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream)))
+					{
+						$this->error_code = xml_get_error_code($xml);
+						$this->error_string = xml_error_string($this->error_code);
+						$return = false;
+						break;
+					}
+				} while (!feof($stream));
+				fclose($stream);
+			}
+			else
 			{
-				$this->error_code = xml_get_error_code($xml);
-				$this->error_string = xml_error_string($this->error_code);
 				$return = false;
 			}
+
 			$this->current_line = xml_get_current_line_number($xml);
 			$this->current_column = xml_get_current_column_number($xml);
 			$this->current_byte = xml_get_current_byte_index($xml);