Просмотр исходного кода

Improved CSS filter (#7091)

* Improved CSS filter
Remove unwanted elements both before and after sanitizing
fix https://github.com/FreshRSS/FreshRSS/issues/7084
Improved
fix bug in https://github.com/FreshRSS/FreshRSS/commit/33fd07f6f26310d4806077cc87bcdf9b8b940e35#commitcomment-150152171

* fix typing
Alexandre Alapetite 1 год назад
Родитель
Сommit
272af0f3c4
2 измененных файлов с 34 добавлено и 11 удалено
  1. 3 1
      app/Controllers/subscriptionController.php
  2. 31 10
      app/Models/Entry.php

+ 3 - 1
app/Controllers/subscriptionController.php

@@ -299,7 +299,9 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController {
 				}
 			}
 
-			$feed->_attribute('path_entries_conditions', Minz_Request::paramTextToArray('path_entries_conditions', plaintext: true));
+			$conditions = Minz_Request::paramTextToArray('path_entries_conditions', plaintext: true);
+			$conditions = array_filter(array_map('trim', $conditions));
+			$feed->_attribute('path_entries_conditions', empty($conditions) ? null : $conditions);
 			$feed->_attribute('path_entries_filter', Minz_Request::paramString('path_entries_filter', true));
 
 			$values = [

+ 31 - 10
app/Models/Entry.php

@@ -814,9 +814,12 @@ HTML;
 		if ($url === '' || $feed === null || $feed->pathEntries() === '') {
 			return '';
 		}
-		if (!empty($feed->attributeArray('path_entries_conditions'))) {
+
+		$conditions = $feed->attributeArray('path_entries_conditions') ?? [];
+		$conditions = array_filter(array_map(fn($v) => is_string($v) ? trim($v) : '', $conditions));
+		if (count($conditions) > 0) {
 			$found = false;
-			foreach ($feed->attributeArray('path_entries_conditions') as $condition) {
+			foreach ($conditions as $condition) {
 				if (!is_string($condition) || trim($condition) === '') {
 					continue;
 				}
@@ -860,22 +863,16 @@ HTML;
 				$base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base;
 			}
 
-			unset($xpath, $doc);
-			$html = sanitizeHTML($html, $base);
-			$doc = new DOMDocument();
-			$utf8BOM = "\xEF\xBB\xBF";
-			$doc->loadHTML($utf8BOM . $html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
-			$xpath = new DOMXPath($doc);
-
 			$html = '';
 			$cssSelector = htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES);
 			$cssSelector = trim($cssSelector, ', ');
+			$path_entries_filter = trim($feed->attributeString('path_entries_filter') ?? '', ', ');
 			$nodes = $xpath->query((new Gt\CssXPath\Translator($cssSelector, '//'))->asXPath());
 			if ($nodes != false) {
-				$path_entries_filter = trim($feed->attributeString('path_entries_filter') ?? '');
 				$filter_xpath = $path_entries_filter === '' ? '' : (new Gt\CssXPath\Translator($path_entries_filter, 'descendant-or-self::'))->asXPath();
 				foreach ($nodes as $node) {
 					if ($filter_xpath !== '') {
+						// Remove unwanted elements once before sanitizing, for CSS selectors to also match original content
 						$filterednodes = $xpath->query($filter_xpath, $node) ?: [];
 						foreach ($filterednodes as $filterednode) {
 							if ($filterednode === $node) {
@@ -890,6 +887,30 @@ HTML;
 					$html .= $doc->saveHTML($node) . "\n";
 				}
 			}
+
+			unset($xpath, $doc);
+			$html = sanitizeHTML($html, $base);
+
+			if ($path_entries_filter !== '') {
+				// Remove unwanted elements again after sanitizing, for CSS selectors to also match sanitized content
+				$modified = false;
+				$doc = new DOMDocument();
+				$utf8BOM = "\xEF\xBB\xBF";
+				$doc->loadHTML($utf8BOM . $html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
+				$xpath = new DOMXPath($doc);
+				$filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter, '//'))->asXPath()) ?: [];
+				foreach ($filterednodes as $filterednode) {
+					if (!($filterednode instanceof DOMElement) || $filterednode->parentNode === null) {
+						continue;
+					}
+					$filterednode->parentNode->removeChild($filterednode);
+					$modified = true;
+				}
+				if ($modified) {
+					$html = $doc->saveHTML($doc->getElementsByTagName('body')->item(0) ?? $doc->firstElementChild) ?: $html;
+				}
+			}
+
 			return trim($html);
 		} else {
 			throw new Minz_Exception();