Răsfoiți Sursa

Implement whitelist for SimplePie sanitizer (#7924)

* Implement whitelist for SimplePie sanitizer

ref: https://github.com/FreshRSS/FreshRSS/pull/7770#issuecomment-3140334326

https://github.com/FreshRSS/simplepie/pull/53
https://github.com/simplepie/simplepie/pull/947

* Remove `<plaintext>` from whitelist

* Improve order

* Remove some tags from whitelist

* Revert partially

* sync

* Display contents of `<noscript>` and `<noembed>`

* sync

* Allow use of `<track>`

* sync again

* Sync to SimplePie fork
https://github.com/FreshRSS/simplepie/pull/53

* Alphabetic order

* Reduce list of stripped attributes

* Temporarily strip some attributes

---------

Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
Inverle 5 luni în urmă
părinte
comite
500d05f3c5

+ 2 - 2
docs/en/admins/10_ServerConfig.md

@@ -116,9 +116,9 @@ server {
 ## Security
 
 Avoid overwriting the [`Content-Security-Policy`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/CSP) header with directives such as `more_set_headers "Content-Security-Policy: ..."`
-This will likely make your FreshRSS instance vulnerable to event handler XSS attacks, since FreshRSS does not yet blacklist all event attributes.
 
-✅ Example of good CSP: `default-src 'self' frame-ancestors 'self'`
+✅ Example of good CSP: `default-src 'self'; frame-ancestors 'self'`
+
 ❌ Bad CSP: `upgrade-insecure-requests`
 
 Debug CSP header:

+ 1 - 1
lib/composer.json

@@ -14,7 +14,7 @@
         "marienfressinaud/lib_opml": "0.5.1",
         "phpgt/cssxpath": "v1.4.0",
         "phpmailer/phpmailer": "7.0.0",
-        "simplepie/simplepie": "dev-freshrss#24cfb0c6d81f81ef110c8257d3464b2649476c77"
+        "simplepie/simplepie": "dev-freshrss#187c2f28c6a7050e46e7bbfa5579552f78a6c1df"
     },
     "config": {
         "sort-packages": true,

+ 157 - 15
lib/lib_rss.php

@@ -366,22 +366,164 @@ function customSimplePie(array $attributes = [], array $curl_options = []): \Sim
 	$simplePie->set_curl_options($curl_options);
 
 	$simplePie->strip_comments(true);
-	$simplePie->strip_htmltags([
-		'base', 'blink', 'body', 'doctype', 'embed',
-		'font', 'form', 'frame', 'frameset', 'html',
-		'link', 'input', 'marquee', 'meta', 'noscript',
-		'object', 'param', 'plaintext', 'script', 'style',
-		'svg',	//TODO: Support SVG after sanitizing and URL rewriting of xlink:href
-	]);
 	$simplePie->rename_attributes(['id', 'class']);
-	$simplePie->strip_attributes(array_merge($simplePie->strip_attributes, [
-		'alink', 'autoplay', 'background', 'bgcolor', 'class', 'form', 'formaction',
-		'link', 'onblur', 'onchange', 'onclick', 'ondblclick', 'onfocus',
-		'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onmousedown', 'onmousemove',
-		'onmouseout', 'onmouseover', 'onmouseup', 'onselect', 'onunload',
-		'seamless', 'sizes', 'srcdoc', 'srcset', 'text', 'vlink', 'referrerpolicy', 'ping',
-		'target', 'rel', 'name', 'download', 'attributionsrc',
-	]));
+	$simplePie->allow_aria_attr(true);
+	$simplePie->allow_data_attr(true);
+	$simplePie->allowed_html_attributes([
+		// HTML
+		'dir', 'draggable', 'hidden', 'lang', 'role', 'title',
+		// MathML
+		'displaystyle', 'mathsize', 'scriptlevel',
+	]);
+	$simplePie->allowed_html_elements_with_attributes([
+		// HTML
+		'a' => ['href', 'hreflang', 'type'],
+		'abbr' => [],
+		'acronym' => [],
+		'address' => [],
+		// 'area' => [], // TODO: support <area> after rewriting ids with a format like #ugc-<insert original id here> (maybe)
+		'article' => [],
+		'aside' => [],
+		'audio' => ['controlslist', 'loop', 'muted', 'src'],
+		'b' => [],
+		'bdi' => [],
+		'bdo' => [],
+		'big' => [],
+		'blink' => [],
+		'blockquote' => ['cite'],
+		'br' => ['clear'],
+		'button' => ['disabled'],
+		'canvas' => ['width', 'height'],
+		'caption' => ['align'],
+		'center' => [],
+		'cite' => [],
+		'code' => [],
+		'col' => ['span', 'align', 'valign', 'width'],
+		'colgroup' => ['span', 'align', 'valign', 'width'],
+		'data' => ['value'],
+		'datalist' => [],
+		'dd' => [],
+		'del' => ['cite', 'datetime'],
+		'details' => ['open'],
+		'dfn' => [],
+		'dialog' => [],
+		'dir' => [],
+		'div' => ['align'],
+		'dl' => [],
+		'dt' => [],
+		'em' => [],
+		'fieldset' => ['disabled'],
+		'figcaption' => [],
+		'figure' => [],
+		'footer' => [],
+		'h1' => [],
+		'h2' => [],
+		'h3' => [],
+		'h4' => [],
+		'h5' => [],
+		'h6' => [],
+		'header' => [],
+		'hgroup' => [],
+		'hr' => ['align', 'noshade', 'size', 'width'],
+		'i' => [],
+		'iframe' => ['src', 'align', 'frameborder', 'longdesc', 'marginheight', 'marginwidth', 'scrolling'],
+		'image' => ['src', 'alt', 'width', 'height', 'align', 'border', 'hspace', 'longdesc', 'vspace'],
+		'img' => ['src', 'alt', 'width', 'height', 'align', 'border', 'hspace', 'longdesc', 'vspace'],
+		'ins' => ['cite', 'datetime'],
+		'kbd' => [],
+		'label' => [],
+		'legend' => [],
+		'li' => ['value', 'type'],
+		'main' => [],
+		// 'map' => [], // TODO: support <map> after rewriting ids with a format like #ugc-<insert original id here> (maybe)
+		'mark' => [],
+		'marquee' => ['behavior', 'direction', 'height', 'hspace', 'loop', 'scrollamount', 'scrolldelay', 'truespeed', 'vspace', 'width'],
+		'menu' => [],
+		'meter' => ['value', 'min', 'max', 'low', 'high', 'optimum'],
+		'nav' => [],
+		'nobr' => [],
+		// 'noembed' => [], // <embed> is not allowed, so we want to display the contents of <noembed>
+		'noframes' => [],
+		// 'noscript' => [], // From the perspective of the feed content, JS isn't allowed so we want to display the contents of <noscript>
+		'ol' => ['reversed', 'start', 'type'],
+		'optgroup' => ['disabled', 'label'],
+		'option' => ['disabled', 'label', 'selected', 'value'],
+		'output' => [],
+		'p' => ['align'],
+		'picture' => [],
+		// 'plaintext' => [], // Can't be closed. See: https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/plaintext
+		'pre' => ['width', 'wrap'],
+		'progress' => ['max', 'value'],
+		'q' => ['cite'],
+		'rb' => [],
+		'rp' => [],
+		'rt' => [],
+		'rtc' => [],
+		'ruby' => [],
+		's' => [],
+		'samp' => [],
+		'search' => [],
+		'section' => [],
+		'select' => ['disabled', 'multiple', 'size'],
+		'small' => [],
+		'source' => ['type', 'src', 'media', 'height', 'width'],
+		'span' => [],
+		'strike' => [],
+		'strong' => [],
+		'sub' => [],
+		'summary' => [],
+		'sup' => [],
+		'table' => ['align', 'border', 'cellpadding', 'cellspacing', 'rules', 'summary', 'width'],
+		'tbody' => ['align', 'char', 'charoff', 'valign'],
+		'td' => ['colspan', 'headers', 'rowspan', 'abbr', 'align', 'height', 'scope', 'valign', 'width'],
+		'textarea' => ['cols', 'disabled', 'maxlength', 'minlength', 'placeholder', 'readonly', 'rows', 'wrap'],
+		'tfoot' => ['align', 'valign'],
+		'th' => ['abbr', 'colspan', 'rowspan', 'scope', 'align', 'height', 'valign', 'width'],
+		'thead' => ['align', 'valign'],
+		'time' => ['datetime'],
+		'tr' => ['align', 'valign'],
+		'track' => ['default', 'kind', 'srclang', 'label', 'src'],
+		'tt' => [],
+		'u' => [],
+		'ul' => ['type'],
+		'var' => [],
+		'video' => ['src', 'poster', 'controlslist', 'height', 'loop', 'muted', 'playsinline', 'width'],
+		'wbr' => [],
+		'xmp' => [],
+		// MathML
+		'maction' => ['actiontype', 'selection'],
+		'math' => ['display'],
+		'menclose' => ['notation'],
+		'merror' => [],
+		'mfenced' => ['close', 'open', 'separators'],
+		'mfrac' => ['denomalign', 'linethickness', 'numalign'],
+		'mi' => ['mathvariant'],
+		'mmultiscripts' => ['subscriptshift', 'superscriptshift'],
+		'mn' => [],
+		'mo' => ['accent', 'fence', 'form', 'largeop', 'lspace', 'maxsize', 'minsize', 'movablelimits', 'rspace', 'separator', 'stretchy', 'symmetric'],
+		'mover' => ['accent'],
+		'mpadded' => ['depth', 'height', 'lspace', 'voffset', 'width'],
+		'mphantom' => [],
+		'mprescripts' => [],
+		'mroot' => [],
+		'mrow' => [],
+		'ms' => [],
+		'mspace' => ['depth', 'height', 'width'],
+		'msqrt' => [],
+		'msub' => [],
+		'msubsup' => ['subscriptshift', 'superscriptshift'],
+		'msup' => ['superscriptshift'],
+		'mtable' => ['align', 'columnalign', 'columnlines', 'columnspacing', 'frame', 'framespacing', 'rowalign', 'rowlines', 'rowspacing', 'width'],
+		'mtd' => ['columnspan', 'rowspan', 'columnalign', 'rowalign'],
+		'mtext' => [],
+		'mtr' => ['columnalign', 'rowalign'],
+		'munder' => ['accentunder'],
+		'munderover' => ['accent', 'accentunder'],
+		// TODO: Support SVG after sanitizing and URL rewriting of xlink:href
+	]);
+	$simplePie->strip_attributes([
+		'data-auto-leave-validation', 'data-leave-validation', 'data-no-leave-validation', 'data-original',
+	]);
 	$simplePie->add_attributes([
 		'audio' => ['controls' => 'controls', 'preload' => 'none'],
 		'iframe' => [

+ 14 - 0
lib/simplepie/simplepie/CHANGELOG.md

@@ -7,8 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased](https://github.com/simplepie/simplepie/compare/1.9.0...master)
 
+### Added
+
 Nothing yet.
 
+### Changed
+
+Nothing yet.
+
+### Fixed
+
+Nothing yet.
+
+### Deprecated
+
+- `SimplePie\SimplePie::set_item_limit()` is deprecated since it only affects multi-feed mode, which has been deprecated in SimplePie 1.9 (by @jtojnar in [#954](https://github.com/simplepie/simplepie/pull/954))
+
 ## [1.9.0](https://github.com/simplepie/simplepie/compare/1.8.1...1.9.0) - 2025-09-12
 
 ### Added

+ 1 - 1
lib/simplepie/simplepie/phpcs.xml

@@ -23,7 +23,7 @@
 	<rule ref="PSR1.Classes.ClassDeclaration.MissingNamespace">
 		<exclude-pattern>./library/</exclude-pattern>
 	</rule>
-	<rule ref="Squiz.Classes.ValidClassName.NotCamelCaps">
+	<rule ref="Squiz.Classes.ValidClassName.NotPascalCase">
 		<exclude-pattern>./library/</exclude-pattern>
 	</rule>
 </ruleset>

+ 109 - 6
lib/simplepie/simplepie/src/Sanitize.php

@@ -50,6 +50,14 @@ class Sanitize implements RegistryAware
     public $strip_attributes = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'];
     /** @var string[] */
     public $rename_attributes = [];
+    /** @var array<string, string[]> */
+    public $allowed_html_elements_with_attributes = [];
+    /** @var string[] */
+    public $allowed_html_attributes = [];
+    /** @var bool */
+    public $allow_data_attr = true;
+    /** @var bool */
+    public $allow_aria_attr = true;
     /** @var array<string, array<string, string>> */
     public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']];
     /** @var bool */
@@ -236,6 +244,42 @@ class Sanitize implements RegistryAware
         }
     }
 
+    /**
+     * @param array<string,string[]> $tags Set array of allowed HTML elements with their allowed attributes.
+     * Note that `<html>`, `<head>`, `<body>`, `<div>` are always allowed.
+     * Preferred over {@see Sanitize::strip_htmltags()}.
+     */
+    public function allowed_html_elements_with_attributes(array $tags = []): void
+    {
+        $this->strip_htmltags = [];
+        $this->strip_attributes = [];
+        $this->allowed_html_elements_with_attributes = $tags;
+    }
+
+    /**
+     * @param string[] $attrs Set default array of allowed HTML attributes.
+     */
+    public function allowed_html_attributes(array $attrs = []): void
+    {
+        $this->allowed_html_attributes = $attrs;
+    }
+
+    /**
+     * @param bool $allow Whether data-* should be allowed or not
+     */
+    public function allow_data_attr(bool $allow = true): void
+    {
+        $this->allow_data_attr = $allow;
+    }
+
+    /**
+     * @param bool $allow Whether aria-* should be allowed or not
+     */
+    public function allow_aria_attr(bool $allow = true): void
+    {
+        $this->allow_aria_attr = $allow;
+    }
+
     /**
      * @return void
      */
@@ -460,6 +504,16 @@ class Sanitize implements RegistryAware
                     }
                 }
 
+                if (!empty($this->rename_attributes)) {
+                    foreach ($this->rename_attributes as $attrib) {
+                        $this->rename_attr($attrib, $xpath);
+                    }
+                }
+
+                if (!empty($this->allowed_html_elements_with_attributes)) {
+                    $this->enforce_allowed_html_nodes($document, $this->allow_data_attr, $this->allow_aria_attr);
+                }
+
                 // Strip out HTML tags and attributes that might cause various security problems.
                 // Based on recommendations by Mark Pilgrim at:
                 // https://web.archive.org/web/20110902041826/http://diveintomark.org:80/archives/2003/06/12/how_to_consume_rss_safely
@@ -469,12 +523,6 @@ class Sanitize implements RegistryAware
                     }
                 }
 
-                if ($this->rename_attributes) {
-                    foreach ($this->rename_attributes as $attrib) {
-                        $this->rename_attr($attrib, $xpath);
-                    }
-                }
-
                 if ($this->strip_attributes) {
                     foreach ($this->strip_attributes as $attrib) {
                         $this->strip_attr($attrib, $xpath);
@@ -639,6 +687,61 @@ class Sanitize implements RegistryAware
         }
     }
 
+    /**
+     * Keep only allowed HTML elements (tags) and their allowed attributes.
+     */
+    protected function enforce_allowed_html_nodes(\DOMNode $element, bool $allow_data_attr = true, bool $allow_aria_attr = true): void
+    {
+        if ($element instanceof \DOMElement) {
+            $tag = $element->tagName;
+            $parent = $element->parentNode;
+            if (!in_array($tag, ['html', 'head', 'body', 'div'], true)
+                && !isset($this->allowed_html_elements_with_attributes[$tag])) {
+                if (!in_array($tag, ['script', 'style', 'svg', 'math'], true)) {
+                    // Preserve children inside the disallowed element
+                    for ($i = $element->childNodes->length - 1; $i >= 0; $i--) {
+                        $child = $element->childNodes->item($i);
+                        if ($child === null) {
+                            continue;
+                        }
+                        if ($child instanceof \DOMText) {
+                            $child->nodeValue = htmlspecialchars($child->nodeValue ?? '', ENT_QUOTES, 'UTF-8');
+                        }
+                        if ($parent !== null) {
+                            $parent->insertBefore($child, $element);
+                        }
+                        $this->enforce_allowed_html_nodes($child, $allow_data_attr, $allow_aria_attr);
+                    }
+                }
+                if ($parent !== null) {
+                    $parent->removeChild($element);
+                    return;
+                }
+            }
+            $allowed_attrs = array_merge($this->allowed_html_elements_with_attributes[$tag] ?? [], $this->allowed_html_attributes);
+            for ($i = $element->attributes->length - 1; $i >= 0; $i--) {
+                $attr = $element->attributes[$i]->nodeName;
+                // Skip data-*, aria-* if allowed
+                if (($allow_data_attr && str_starts_with($attr, 'data-'))
+                    || ($allow_aria_attr && str_starts_with($attr, 'aria-'))) {
+                    continue;
+                }
+
+                if (!in_array($attr, $allowed_attrs, true)) {
+                    $element->removeAttributeNode($element->attributes[$i]);
+                }
+            }
+        }
+        if ($element instanceof \DOMElement || $element instanceof \DOMDocument) {
+            for ($i = $element->childNodes->length - 1; $i >= 0; $i--) {
+                $child = $element->childNodes->item($i);
+                if ($child !== null) {
+                    $this->enforce_allowed_html_nodes($child, $allow_data_attr, $allow_aria_attr);
+                }
+            }
+        }
+    }
+
     /**
      * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
      * @return void

+ 67 - 0
lib/simplepie/simplepie/src/SimplePie.php

@@ -663,6 +663,35 @@ class SimplePie
      */
     public $rename_attributes = [];
 
+    /**
+     * @var array<string,string[]> Stores allowed tags and attributes.
+     * Preferred over $strip_htmltags and $strip_attributes.
+     * Note that `<html>`, `<head>`, `<body>`, `<div>` are always allowed.
+     * @see SimplePie::allowed_html_elements_with_attributes()
+     * @access private
+     */
+    public $allowed_html_elements_with_attributes = [];
+
+    /**
+     * @var string[] Stores array of default allowed attributes.
+     * @see SimplePie::allowed_html_attributes()
+     * @access private
+     */
+    public $allowed_html_attributes = [];
+
+    /**
+     * @var bool Whether `data-*` attributes should be allowed or not
+     * @see SimplePie::allow_data_attr()
+     * @access private
+     */
+    public $allow_data_attr = true;
+    /**
+     * @var bool Whether `aria-*` attributes should be allowed or not
+     * @see SimplePie::allow_aria_attr()
+     * @access private
+     */
+    public $allow_aria_attr = true;
+
     /**
      * @var bool Should we throw exceptions, or use the old-style error property?
      * @access private
@@ -1524,6 +1553,42 @@ class SimplePie
         }
     }
 
+    /**
+     * @param array<string,string[]> $tags Set array of allowed tags and attributes.
+     * Note that `<html>`, `<head>`, `<body>`, `<div>` are always allowed.
+     * Preferred over {@see SimplePie::allowed_html_attributes()} and {@see SimplePie::strip_attributes()}.
+     *
+     * Example: `['a' => ['href', 'title'], 'img' => ['src', 'alt']]`
+     */
+    public function allowed_html_elements_with_attributes(array $tags = []): void
+    {
+        $this->sanitize->allowed_html_elements_with_attributes($tags);
+    }
+
+    /**
+     * @param string[] $attrs Set default array of allowed attributes.
+     */
+    public function allowed_html_attributes(array $attrs = []): void
+    {
+        $this->sanitize->allowed_html_attributes($attrs);
+    }
+
+    /**
+     * @param bool $allow Whether `data-*` attributes should be allowed or not
+     */
+    public function allow_data_attr(bool $allow = true): void
+    {
+        $this->sanitize->allow_data_attr($allow);
+    }
+
+    /**
+     * @param bool $allow Whether `aria-*` attributes should be allowed or not
+     */
+    public function allow_aria_attr(bool $allow = true): void
+    {
+        $this->sanitize->allow_aria_attr($allow);
+    }
+
     /**
      * @return void
      */
@@ -1651,6 +1716,8 @@ class SimplePie
     /**
      * Set the limit for items returned per-feed with multifeeds
      *
+     * @deprecated since SimplePie 1.10.0, this does nothing outside multifeeds.
+     *
      * @param int $limit The maximum number of items to return.
      * @return void
      */