Переглянути джерело

Sanitize before CSS manipulation (#7073)

Allows using the same CSS filters for content coming from RSS feeds and from Web scraping
fix https://github.com/FreshRSS/FreshRSS/issues/7039
https://github.com/FreshRSS/FreshRSS/issues/7014#issuecomment-2508987606
https://github.com/FreshRSS/FreshRSS/pull/7037
Alexandre Alapetite 1 рік тому
батько
коміт
5ba5271e48
1 змінених файлів з 9 додано та 4 видалено
  1. 9 4
      app/Models/Entry.php

+ 9 - 4
app/Models/Entry.php

@@ -844,7 +844,13 @@ HTML;
 				$base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base;
 			}
 
-			$content = '';
+			unset($xpath, $doc);
+			$html = sanitizeHTML($html, $base);
+			$doc = new DOMDocument();
+			$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
+			$xpath = new DOMXPath($doc);
+
+			$html = '';
 			$cssSelector = htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES);
 			$cssSelector = trim($cssSelector, ', ');
 			$nodes = $xpath->query((new Gt\CssXPath\Translator($cssSelector, '//'))->asXPath());
@@ -864,11 +870,10 @@ HTML;
 							$filterednode->parentNode->removeChild($filterednode);
 						}
 					}
-					$content .= $doc->saveHTML($node) . "\n";
+					$html .= $doc->saveHTML($node) . "\n";
 				}
 			}
-			$html = trim(sanitizeHTML($content, $base));
-			return $html;
+			return trim($html);
 		} else {
 			throw new Minz_Exception();
 		}