|
|
@@ -58,6 +58,8 @@ class Sanitize implements RegistryAware
|
|
|
public $allow_data_attr = true;
|
|
|
/** @var bool */
|
|
|
public $allow_aria_attr = true;
|
|
|
+ /** @var string[] */
|
|
|
+ public $disallowed_uri_schemes = ['javascript'];
|
|
|
/** @var array<string, array<string, string>> */
|
|
|
public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']];
|
|
|
/** @var bool */
|
|
|
@@ -280,6 +282,14 @@ class Sanitize implements RegistryAware
|
|
|
$this->allow_aria_attr = $allow;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * @param string[] $schemes List of URI schemes (protocols) to disallow
|
|
|
+ */
|
|
|
+ public function disallow_uri_schemes(array $schemes = ['javascript']): void
|
|
|
+ {
|
|
|
+ $this->disallowed_uri_schemes = $schemes;
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* @return void
|
|
|
*/
|
|
|
@@ -352,8 +362,8 @@ class Sanitize implements RegistryAware
|
|
|
* containing URLs that need to be resolved relative to the feed
|
|
|
*
|
|
|
* Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite,
|
|
|
- * |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src,
|
|
|
- * |ins|@cite, |q|@cite, |source|@src, |video|@src
|
|
|
+ * |del|@cite, |form|@action, |iframe|@src, |img|@longdesc, |img|@src,
|
|
|
+ * |input|@src, |ins|@cite, |q|@cite, |source|@src, |video|@src
|
|
|
*
|
|
|
* @since 1.0
|
|
|
* @param array<string, string|string[]>|null $element_attribute Element/attribute key/value pairs, null for default
|
|
|
@@ -369,6 +379,7 @@ class Sanitize implements RegistryAware
|
|
|
'blockquote' => 'cite',
|
|
|
'del' => 'cite',
|
|
|
'form' => 'action',
|
|
|
+ 'iframe' => 'src',
|
|
|
'img' => [
|
|
|
'longdesc',
|
|
|
'src'
|
|
|
@@ -535,12 +546,20 @@ class Sanitize implements RegistryAware
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Replace relative URLs
|
|
|
+ // Replace relative URLs and blocks disallowed URI schemes (protocols)
|
|
|
$this->base = $base;
|
|
|
foreach ($this->replace_url_attributes as $element => $attributes) {
|
|
|
$this->replace_urls($document, $element, $attributes);
|
|
|
}
|
|
|
|
|
|
+ // MathML and SVG allow href on arbitrary descendants,
|
|
|
+ // so require a walk of the DOM
|
|
|
+ if ($this->disallowed_uri_schemes !== []
|
|
|
+ && ($document->getElementsByTagName('math')->length > 0
|
|
|
+ || $document->getElementsByTagName('svg')->length > 0)) {
|
|
|
+ $this->block_disallowed_schemes_in_descendants($xpath);
|
|
|
+ }
|
|
|
+
|
|
|
// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
|
|
|
if ($this->image_handler !== '' && $this->enable_cache) {
|
|
|
$images = $document->getElementsByTagName('img');
|
|
|
@@ -657,7 +676,10 @@ class Sanitize implements RegistryAware
|
|
|
if ($element->hasAttribute($attribute)) {
|
|
|
$value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]);
|
|
|
if ($value !== false) {
|
|
|
- $value = $this->https_url($value);
|
|
|
+ // Block disallowed URI protocols (e.g. javascript:), otherwise force HTTPS where applicable
|
|
|
+ $value = ($this->disallowed_uri_schemes !== [] && !$this->is_allowed_scheme($value))
|
|
|
+ ? 'unsafe:' . $value
|
|
|
+ : $this->https_url($value);
|
|
|
$element->setAttribute($attribute, $value);
|
|
|
}
|
|
|
}
|
|
|
@@ -742,6 +764,51 @@ class Sanitize implements RegistryAware
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ public function is_allowed_scheme(string $uri): bool
|
|
|
+ {
|
|
|
+ $pos = strpos($uri, ':');
|
|
|
+ if ($pos === false) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ $scheme = strtolower(substr($uri, 0, $pos));
|
|
|
+ if (!ctype_alnum($scheme)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return !in_array($scheme, $this->disallowed_uri_schemes, true);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Block disallowed URI schemes (protocols) on elements carrying an href attribute.
|
|
|
+ *
|
|
|
+ * This handles MathML and SVG elements which permit href on arbitrary descendant elements
|
|
|
+ * For SVG, also checks xlink:href attribute.
|
|
|
+ */
|
|
|
+ private function block_disallowed_schemes_in_descendants(\DOMXPath $xpath): void
|
|
|
+ {
|
|
|
+ // Note: content is parsed via loadHTML(), which does not namespace-process attributes
|
|
|
+ $elements = $xpath->query('.//*[self::math or self::svg]/descendant-or-self::*[@href or @*[name()="xlink:href"]]');
|
|
|
+ if ($elements === false) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ foreach ($elements as $element) {
|
|
|
+ if (!($element instanceof \DOMElement)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if ($element->hasAttribute('href')) {
|
|
|
+ $href = $element->getAttribute('href');
|
|
|
+ if (!$this->is_allowed_scheme($href)) {
|
|
|
+ $element->setAttribute('href', 'unsafe:' . $href);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ($element->hasAttribute('xlink:href')) {
|
|
|
+ $href = $element->getAttribute('xlink:href');
|
|
|
+ if (!$this->is_allowed_scheme($href)) {
|
|
|
+ $element->setAttribute('xlink:href', 'unsafe:' . $href);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* @param int-mask-of<SimplePie::CONSTRUCT_*> $type
|
|
|
* @return void
|