Browse Source

Apply CSS selector filter also when not using full content (#6786)

Before, removing content from articles was only possible when fetching full article contents.
With this PR, the same cleaning can be applied to the normal content provided by RSS feeds.
Alexandre Alapetite 1 year ago
parent
commit
feffa5598c
1 changed files with 33 additions and 5 deletions
  1. 33 5
      app/Models/Entry.php

+ 33 - 5
app/Models/Entry.php

@@ -856,7 +856,7 @@ HTML;
 					if ($path_entries_filter !== '') {
 					if ($path_entries_filter !== '') {
 						$filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter))->asXPath(), $node) ?: [];
 						$filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter))->asXPath(), $node) ?: [];
 						foreach ($filterednodes as $filterednode) {
 						foreach ($filterednodes as $filterednode) {
-							if ($filterednode->parentNode === null) {
+							if (!($filterednode instanceof DOMElement) || $filterednode->parentNode === null) {
 								continue;
 								continue;
 							}
 							}
 							$filterednode->parentNode->removeChild($filterednode);
 							$filterednode->parentNode->removeChild($filterednode);
@@ -872,15 +872,20 @@ HTML;
 		}
 		}
 	}
 	}
 
 
+	/**
+	 * @return bool True if the content was modified, false otherwise
+	 */
 	public function loadCompleteContent(bool $force = false): bool {
 	public function loadCompleteContent(bool $force = false): bool {
 		// Gestion du contenu
 		// Gestion du contenu
 		// Trying to fetch full article content even when feeds do not propose it
 		// Trying to fetch full article content even when feeds do not propose it
 		$feed = $this->feed();
 		$feed = $this->feed();
-		if ($feed != null && trim($feed->pathEntries()) != '') {
+		if ($feed === null) {
+			return false;
+		}
+		if (trim($feed->pathEntries()) != '') {
 			$entryDAO = FreshRSS_Factory::createEntryDao();
 			$entryDAO = FreshRSS_Factory::createEntryDao();
 			$entry = $force ? null : $entryDAO->searchByGuid($this->feedId, $this->guid);
 			$entry = $force ? null : $entryDAO->searchByGuid($this->feedId, $this->guid);
-
-			if ($entry) {
+			if ($entry !== null) {
 				// l’article existe déjà en BDD, en se contente de recharger ce contenu
 				// l’article existe déjà en BDD, en se contente de recharger ce contenu
 				$this->content = $entry->content(false);
 				$this->content = $entry->content(false);
 			} else {
 			} else {
@@ -902,7 +907,6 @@ HTML;
 								$this->content = $fullContent;
 								$this->content = $fullContent;
 								break;
 								break;
 						}
 						}
-
 						return true;
 						return true;
 					}
 					}
 				} catch (Exception $e) {
 				} catch (Exception $e) {
@@ -910,6 +914,30 @@ HTML;
 					Minz_Log::warning($e->getMessage());
 					Minz_Log::warning($e->getMessage());
 				}
 				}
 			}
 			}
+		} elseif (trim($feed->attributeString('path_entries_filter') ?? '') !== '') {
+			$doc = new DOMDocument();
+			$utf8BOM = "\xEF\xBB\xBF";
+			if (!$doc->loadHTML($utf8BOM . $this->content, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
+				return false;
+			}
+			$modified = false;
+			$xpath = new DOMXPath($doc);
+			$filterednodes = $xpath->query((new Gt\CssXPath\Translator($feed->attributeString('path_entries_filter') ?? '', '//'))->asXPath()) ?: [];
+			foreach ($filterednodes as $filterednode) {
+				if (!($filterednode instanceof DOMElement) || $filterednode->parentNode === null) {
+					continue;
+				}
+				$filterednode->parentNode->removeChild($filterednode);
+				$modified = true;
+			}
+			if ($modified) {
+				$html = $doc->saveHTML();
+				if (!is_string($html)) {
+					return false;
+				}
+				$this->content = $html;
+			}
+			return $modified;
 		}
 		}
 		return false;
 		return false;
 	}
 	}