Преглед изворни кода

Stream instead of memory copy of SimplePie entries (#2972)

* Stream instead of memory copy of SimplePie entries

https://github.com/FreshRSS/FreshRSS/issues/2952

* Undo lines delete

* Typo

* Remove unaccessible code

https://github.com/FreshRSS/FreshRSS/pull/2972/files#r425624163

* Back-compatibility for Feed->entries

https://github.com/FreshRSS/FreshRSS/pull/2972/files#r425631913
Alexandre Alapetite пре 5 година
родитељ
комит
83b5944dcb
4 измењених фајлова са 66 додато и 76 уклоњено
  1. 6 13
      app/Controllers/feedController.php
  2. 1 11
      app/Models/Entry.php
  3. 58 51
      app/Models/Feed.php
  4. 1 1
      p/api/pshb.php

+ 6 - 13
app/Controllers/feedController.php

@@ -320,10 +320,12 @@ class FreshRSS_feed_Controller extends Minz_ActionController {
 
 			try {
 				if ($simplePiePush) {
-					$feed->loadEntries($simplePiePush);	//Used by PubSubHubbub
+					$simplePie = $simplePiePush;	//Used by WebSub
 				} else {
-					$feed->load(false, $isNewFeed);
+					$simplePie = $feed->load(false, $isNewFeed);
 				}
+				$newGuids = $simplePie == null ? [] : $feed->loadGuids($simplePie);
+				$entries = $simplePie == null ? [] : $feed->loadEntries($simplePie);
 			} catch (FreshRSS_Feed_Exception $e) {
 				Minz_Log::warning($e->getMessage());
 				$feedDAO->updateLastUpdate($feed->id(), true);
@@ -333,21 +335,14 @@ class FreshRSS_feed_Controller extends Minz_ActionController {
 
 			$needFeedCacheRefresh = false;
 
-			$entries = $feed->entries();
-			$nbEntries = count($entries);
-			if ($nbEntries > 0) {
-				$newGuids = array();
-				foreach ($entries as $entry) {
-					$newGuids[] = safe_ascii($entry->guid());
-				}
+			if (count($newGuids) > 0) {
 				// For this feed, check existing GUIDs already in database.
 				$existingHashForGuids = $entryDAO->listHashForFeedGuids($feed->id(), $newGuids);
 				$newGuids = array();
 
 				$oldGuids = array();
 				// Add entries in database if possible.
-				for ($i = 0; $i < $nbEntries; $i++) {
-					$entry = $entries[$i];
+				foreach ($entries as $entry) {
 					if (isset($newGuids[$entry->guid()])) {
 						continue;	//Skip subsequent articles with same GUID
 					}
@@ -406,8 +401,6 @@ class FreshRSS_feed_Controller extends Minz_ActionController {
 						$entryDAO->addEntry($entry->toArray());
 						$nb_new_articles++;
 					}
-					unset($entry);
-					unset($entries[$i]);
 				}
 				$entryDAO->updateLastSeen($feed->id(), $oldGuids, $mtime);
 			}

+ 1 - 11
app/Models/Entry.php

@@ -353,7 +353,6 @@ class FreshRSS_Entry extends Minz_Model {
 	}
 
 	public static function getContentByParsing($url, $path, $attributes = array()) {
-		require_once(LIB_PATH . '/lib_phpQuery.php');
 		$system_conf = Minz_Configuration::get('system');
 		$limits = $system_conf->limits;
 		$feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']);
@@ -391,18 +390,9 @@ class FreshRSS_Entry extends Minz_Model {
 		}
 
 		if ($html) {
+			require_once(LIB_PATH . '/lib_phpQuery.php');
 			$doc = phpQuery::newDocument($html);
 			$content = $doc->find($path);
-
-			foreach (pq('img[data-src]') as $img) {
-				$imgP = pq($img);
-				$dataSrc = $imgP->attr('data-src');
-				if (strlen($dataSrc) > 4) {
-					$imgP->attr('src', $dataSrc);
-					$imgP->removeAttr('data-src');
-				}
-			}
-
 			return trim(sanitizeHTML($content->__toString(), $url));
 		} else {
 			throw new Exception();

+ 58 - 51
app/Models/Feed.php

@@ -15,7 +15,6 @@ class FreshRSS_Feed extends Minz_Model {
 	private $category = 1;
 	private $nbEntries = -1;
 	private $nbNotRead = -1;
-	private $entries = null;
 	private $name = '';
 	private $website = '';
 	private $description = '';
@@ -72,7 +71,9 @@ class FreshRSS_Feed extends Minz_Model {
 		return $this->category;
 	}
 	public function entries() {
-		return $this->entries === null ? array() : $this->entries;
+		Minz_Log::warning(__method__ . ' is deprecated since FreshRSS 1.16.1!');
+		$simplePie = $this->load(false, true);
+		return $simplePie == null ? [] : iterator_to_array($this->loadEntries($simplePie));
 	}
 	public function name() {
 		return $this->name;
@@ -267,46 +268,47 @@ class FreshRSS_Feed extends Minz_Model {
 				if ($this->httpAuth != '') {
 					$url = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $url);
 				}
-				$feed = customSimplePie($this->attributes());
+				$simplePie = customSimplePie($this->attributes());
 				if (substr($url, -11) === '#force_feed') {
-					$feed->force_feed(true);
+					$simplePie->force_feed(true);
 					$url = substr($url, 0, -11);
 				}
-				$feed->set_feed_url($url);
+				$simplePie->set_feed_url($url);
 				if (!$loadDetails) {	//Only activates auto-discovery when adding a new feed
-					$feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE);
+					$simplePie->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE);
 				}
 				if ($this->attributes('clear_cache')) {
 					// Do not use `$simplePie->enable_cache(false);` as it would prevent caching in multiuser context
 					$this->clearCache();
 				}
-				Minz_ExtensionManager::callHook('simplepie_before_init', $feed, $this);
-				$mtime = $feed->init();
+				Minz_ExtensionManager::callHook('simplepie_before_init', $simplePie, $this);
+				$mtime = $simplePie->init();
 
-				if ((!$mtime) || $feed->error()) {
-					$errorMessage = $feed->error();
+				if ((!$mtime) || $simplePie->error()) {
+					$errorMessage = $simplePie->error();
 					throw new FreshRSS_Feed_Exception(
 						($errorMessage == '' ? 'Unknown error for feed' : $errorMessage) . ' [' . $url . ']'
 					);
 				}
 
-				$links = $feed->get_links('self');
+				$links = $simplePie->get_links('self');
 				$this->selfUrl = isset($links[0]) ? $links[0] : null;
-				$links = $feed->get_links('hub');
+				$links = $simplePie->get_links('hub');
 				$this->hubUrl = isset($links[0]) ? $links[0] : null;
 
 				if ($loadDetails) {
 					// si on a utilisé l'auto-discover, notre url va avoir changé
-					$subscribe_url = $feed->subscribe_url(false);
+					$subscribe_url = $simplePie->subscribe_url(false);
 
-					$title = strtr(html_only_entity_decode($feed->get_title()), array('<' => '&lt;', '>' => '&gt;', '"' => '&quot;'));	//HTML to HTML-PRE	//ENT_COMPAT except &
+					//HTML to HTML-PRE	//ENT_COMPAT except '&'
+					$title = strtr(html_only_entity_decode($simplePie->get_title()), array('<' => '&lt;', '>' => '&gt;', '"' => '&quot;'));
 					$this->_name($title == '' ? $url : $title);
 
-					$this->_website(html_only_entity_decode($feed->get_link()));
-					$this->_description(html_only_entity_decode($feed->get_description()));
+					$this->_website(html_only_entity_decode($simplePie->get_link()));
+					$this->_description(html_only_entity_decode($simplePie->get_description()));
 				} else {
 					//The case of HTTP 301 Moved Permanently
-					$subscribe_url = $feed->subscribe_url(true);
+					$subscribe_url = $simplePie->subscribe_url(true);
 				}
 
 				$clean_url = SimplePie_Misc::url_remove_credentials($subscribe_url);
@@ -316,26 +318,51 @@ class FreshRSS_Feed extends Minz_Model {
 
 				if (($mtime === true) || ($mtime > $this->lastUpdate) || $noCache) {
 					//Minz_Log::debug('FreshRSS no cache ' . $mtime . ' > ' . $this->lastUpdate . ' for ' . $clean_url);
-					$this->loadEntries($feed);	// et on charge les articles du flux
+					return $simplePie;
 				} else {
 					//Minz_Log::debug('FreshRSS use cache for ' . $clean_url);
-					$this->entries = array();
+					return null;
 				}
-
-				$feed->__destruct();	//http://simplepie.org/wiki/faq/i_m_getting_memory_leaks
-				unset($feed);
 			}
 		}
 	}
 
-	public function loadEntries($feed) {
-		$entries = array();
-		$guids = array();
+	public function loadGuids($simplePie) {
 		$hasUniqueGuids = true;
+		$testGuids = [];
+		$guids = [];
+		$hasBadGuids = $this->attributes('hasBadGuids');
+
+		for ($i = $simplePie->get_item_quantity() - 1; $i >= 0; $i--) {
+			$item = $simplePie->get_item($i);
+			if ($item == null) {
+				continue;
+			}
+			$guid = safe_ascii($item->get_id(false, false));
+			$hasUniqueGuids &= empty($testGuids['_' . $guid]);
+			$testGuids['_' . $guid] = true;
+			$guids[] = $guid;
+		}
+
+		if ($hasBadGuids != !$hasUniqueGuids) {
+			$hasBadGuids = !$hasUniqueGuids;
+			if ($hasBadGuids) {
+				Minz_Log::warning('Feed has invalid GUIDs: ' . $this->url);
+			} else {
+				Minz_Log::warning('Feed has valid GUIDs again: ' . $this->url);
+			}
+			$feedDAO = FreshRSS_Factory::createFeedDao();
+			$feedDAO->updateFeedAttribute($this, 'hasBadGuids', $hasBadGuids);
+		}
+		return $guids;
+	}
+
+	public function loadEntries($simplePie) {
+		$hasBadGuids = $this->attributes('hasBadGuids');
 
 		// We want chronological order and SimplePie uses reverse order.
-		for ($i = $feed->get_item_quantity() - 1; $i >= 0; $i--) {
-			$item = $feed->get_item($i);
+		for ($i = $simplePie->get_item_quantity() - 1; $i >= 0; $i--) {
+			$item = $simplePie->get_item($i);
 			if ($item == null) {
 				continue;
 			}
@@ -418,10 +445,9 @@ class FreshRSS_Feed extends Minz_Model {
 				}
 			}
 
-			$guid = $item->get_id(false, false);
+			$guid = safe_ascii($item->get_id(false, false));
 			unset($item);
-			$hasUniqueGuids &= empty($guids['_' . $guid]);
-			$guids['_' . $guid] = true;
+
 			$author_names = '';
 			if (is_array($authors)) {
 				foreach ($authors as $author) {
@@ -432,7 +458,7 @@ class FreshRSS_Feed extends Minz_Model {
 
 			$entry = new FreshRSS_Entry(
 				$this->id(),
-				$guid,
+				$hasBadGuids ? '' : $guid,
 				$title === null ? '' : $title,
 				$author_names,
 				$content === null ? '' : $content,
@@ -446,27 +472,8 @@ class FreshRSS_Feed extends Minz_Model {
 				$entry->loadCompleteContent();
 			}
 
-			$entries[] = $entry;
-		}
-
-		$hasBadGuids = $this->attributes('hasBadGuids');
-		if ($hasBadGuids != !$hasUniqueGuids) {
-			$hasBadGuids = !$hasUniqueGuids;
-			if ($hasBadGuids) {
-				Minz_Log::warning('Feed has invalid GUIDs: ' . $this->url);
-			} else {
-				Minz_Log::warning('Feed has valid GUIDs again: ' . $this->url);
-			}
-			$feedDAO = FreshRSS_Factory::createFeedDao();
-			$feedDAO->updateFeedAttribute($this, 'hasBadGuids', $hasBadGuids);
-		}
-		if (!$hasUniqueGuids) {
-			foreach ($entries as $entry) {
-				$entry->_guid('');
-			}
+			yield $entry;
 		}
-
-		$this->entries = $entries;
 	}
 
 	public function cleanOldEntries() {	//Remember to call updateCachedValue($id_feed) or updateCachedValues() just after

+ 1 - 1
p/api/pshb.php

@@ -152,7 +152,7 @@ foreach ($users as $userFilename) {
 	}
 }
 
-$simplePie->__destruct();
+$simplePie->__destruct();	//http://simplepie.org/wiki/faq/i_m_getting_memory_leaks
 unset($simplePie);
 
 if ($nb === 0) {