Sanitize.php 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. <?php
  2. // SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
  3. // SPDX-License-Identifier: BSD-3-Clause
  4. declare(strict_types=1);
  5. namespace SimplePie;
  6. use DOMDocument;
  7. use DOMXPath;
  8. use InvalidArgumentException;
  9. use Psr\Http\Client\ClientInterface;
  10. use Psr\Http\Message\RequestFactoryInterface;
  11. use Psr\Http\Message\UriFactoryInterface;
  12. use SimplePie\Cache\Base;
  13. use SimplePie\Cache\BaseDataCache;
  14. use SimplePie\Cache\CallableNameFilter;
  15. use SimplePie\Cache\DataCache;
  16. use SimplePie\Cache\NameFilter;
  17. use SimplePie\HTTP\Client;
  18. use SimplePie\HTTP\ClientException;
  19. use SimplePie\HTTP\FileClient;
  20. use SimplePie\HTTP\Psr18Client;
  21. /**
  22. * Used for data cleanup and post-processing
  23. *
  24. *
  25. * This class can be overloaded with {@see \SimplePie\SimplePie::set_sanitize_class()}
  26. *
  27. * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
  28. */
  29. class Sanitize implements RegistryAware
  30. {
  31. // Private vars
  32. /** @var string */
  33. public $base = '';
  34. // Options
  35. /** @var bool */
  36. public $remove_div = true;
  37. /** @var string */
  38. public $image_handler = '';
  39. /** @var string[] */
  40. public $strip_htmltags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'];
  41. /** @var bool */
  42. public $encode_instead_of_strip = false;
  43. /** @var string[] */
  44. public $strip_attributes = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'];
  45. /** @var string[] */
  46. public $rename_attributes = [];
  47. /** @var array<string, array<string, string>> */
  48. public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']];
  49. /** @var bool */
  50. public $strip_comments = false;
  51. /** @var string */
  52. public $output_encoding = 'UTF-8';
  53. /** @var bool */
  54. public $enable_cache = true;
  55. /** @var string */
  56. public $cache_location = './cache';
  57. /** @var string */
  58. public $cache_name_function = 'md5';
  59. /**
  60. * @var NameFilter
  61. */
  62. private $cache_namefilter;
  63. /** @var int */
  64. public $timeout = 10;
  65. /** @var string */
  66. public $useragent = '';
  67. /** @var bool */
  68. public $force_fsockopen = false;
  69. /** @var array<string, string|string[]> */
  70. public $replace_url_attributes = [];
  71. /**
  72. * @var array<int, mixed> Custom curl options
  73. * @see SimplePie::set_curl_options()
  74. */
  75. private $curl_options = [];
  76. /** @var Registry */
  77. public $registry;
  78. /**
  79. * @var DataCache|null
  80. */
  81. private $cache = null;
  82. /**
  83. * @var int Cache duration (in seconds)
  84. */
  85. private $cache_duration = 3600;
  86. /**
  87. * List of domains for which to force HTTPS.
  88. * @see \SimplePie\Sanitize::set_https_domains()
  89. * Array is a tree split at DNS levels. Example:
  90. * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true)))
  91. * @var true|array<string, true|array<string, true|array<string, array<string, true|array<string, true|array<string, true>>>>>>
  92. */
  93. public $https_domains = [];
  94. /**
  95. * @var Client|null
  96. */
  97. private $http_client = null;
  98. public function __construct()
  99. {
  100. // Set defaults
  101. $this->set_url_replacements(null);
  102. }
  103. /**
  104. * @return void
  105. */
  106. public function remove_div(bool $enable = true)
  107. {
  108. $this->remove_div = (bool) $enable;
  109. }
  110. /**
  111. * @param string|false $page
  112. * @return void
  113. */
  114. public function set_image_handler($page = false)
  115. {
  116. if ($page) {
  117. $this->image_handler = (string) $page;
  118. } else {
  119. $this->image_handler = '';
  120. }
  121. }
  122. /**
  123. * @return void
  124. */
  125. public function set_registry(\SimplePie\Registry $registry)
  126. {
  127. $this->registry = $registry;
  128. }
  129. /**
  130. * @param string|NameFilter $cache_name_function
  131. * @param class-string<Cache> $cache_class
  132. * @return void
  133. */
  134. public function pass_cache_data(bool $enable_cache = true, string $cache_location = './cache', $cache_name_function = 'md5', string $cache_class = Cache::class, ?DataCache $cache = null)
  135. {
  136. $this->enable_cache = $enable_cache;
  137. if ($cache_location) {
  138. $this->cache_location = $cache_location;
  139. }
  140. // @phpstan-ignore-next-line Enforce PHPDoc type.
  141. if (!is_string($cache_name_function) && !$cache_name_function instanceof NameFilter) {
  142. throw new InvalidArgumentException(sprintf(
  143. '%s(): Argument #3 ($cache_name_function) must be of type %s',
  144. __METHOD__,
  145. NameFilter::class
  146. ), 1);
  147. }
  148. // BC: $cache_name_function could be a callable as string
  149. if (is_string($cache_name_function)) {
  150. // trigger_error(sprintf('Providing $cache_name_function as string in "%s()" is deprecated since SimplePie 1.8.0, provide as "%s" instead.', __METHOD__, NameFilter::class), \E_USER_DEPRECATED);
  151. $this->cache_name_function = (string) $cache_name_function;
  152. $cache_name_function = new CallableNameFilter($cache_name_function);
  153. }
  154. $this->cache_namefilter = $cache_name_function;
  155. if ($cache !== null) {
  156. $this->cache = $cache;
  157. }
  158. }
  159. /**
  160. * Set a PSR-18 client and PSR-17 factories
  161. *
  162. * Allows you to use your own HTTP client implementations.
  163. */
  164. final public function set_http_client(
  165. ClientInterface $http_client,
  166. RequestFactoryInterface $request_factory,
  167. UriFactoryInterface $uri_factory
  168. ): void {
  169. $this->http_client = new Psr18Client($http_client, $request_factory, $uri_factory);
  170. }
  171. /**
  172. * @deprecated since SimplePie 1.9.0, use \SimplePie\Sanitize::set_http_client() instead.
  173. * @param class-string<File> $file_class
  174. * @param array<int, mixed> $curl_options
  175. * @return void
  176. */
  177. public function pass_file_data(string $file_class = File::class, int $timeout = 10, string $useragent = '', bool $force_fsockopen = false, array $curl_options = [])
  178. {
  179. // trigger_error(sprintf('SimplePie\Sanitize::pass_file_data() is deprecated since SimplePie 1.9.0, please use "SimplePie\Sanitize::set_http_client()" instead.'), \E_USER_DEPRECATED);
  180. if ($timeout) {
  181. $this->timeout = $timeout;
  182. }
  183. if ($useragent) {
  184. $this->useragent = $useragent;
  185. }
  186. if ($force_fsockopen) {
  187. $this->force_fsockopen = $force_fsockopen;
  188. }
  189. $this->curl_options = $curl_options;
  190. // Invalidate the registered client.
  191. $this->http_client = null;
  192. }
  193. /**
  194. * @param string[]|string $tags
  195. * @return void
  196. */
  197. public function strip_htmltags($tags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'])
  198. {
  199. if ($tags) {
  200. if (is_array($tags)) {
  201. $this->strip_htmltags = $tags;
  202. } else {
  203. $this->strip_htmltags = explode(',', $tags);
  204. }
  205. } else {
  206. $this->strip_htmltags = [];
  207. }
  208. }
  209. /**
  210. * @return void
  211. */
  212. public function encode_instead_of_strip(bool $encode = false)
  213. {
  214. $this->encode_instead_of_strip = $encode;
  215. }
  216. /**
  217. * @param string[]|string $attribs
  218. * @return void
  219. */
  220. public function rename_attributes($attribs = [])
  221. {
  222. if ($attribs) {
  223. if (is_array($attribs)) {
  224. $this->rename_attributes = $attribs;
  225. } else {
  226. $this->rename_attributes = explode(',', $attribs);
  227. }
  228. } else {
  229. $this->rename_attributes = [];
  230. }
  231. }
  232. /**
  233. * @param string[]|string $attribs
  234. * @return void
  235. */
  236. public function strip_attributes($attribs = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'])
  237. {
  238. if ($attribs) {
  239. if (is_array($attribs)) {
  240. $this->strip_attributes = $attribs;
  241. } else {
  242. $this->strip_attributes = explode(',', $attribs);
  243. }
  244. } else {
  245. $this->strip_attributes = [];
  246. }
  247. }
  248. /**
  249. * @param array<string, array<string, string>> $attribs
  250. * @return void
  251. */
  252. public function add_attributes(array $attribs = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']])
  253. {
  254. $this->add_attributes = $attribs;
  255. }
  256. /**
  257. * @return void
  258. */
  259. public function strip_comments(bool $strip = false)
  260. {
  261. $this->strip_comments = $strip;
  262. }
  263. /**
  264. * @return void
  265. */
  266. public function set_output_encoding(string $encoding = 'UTF-8')
  267. {
  268. $this->output_encoding = $encoding;
  269. }
  270. /**
  271. * Set element/attribute key/value pairs of HTML attributes
  272. * containing URLs that need to be resolved relative to the feed
  273. *
  274. * Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite,
  275. * |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src,
  276. * |ins|@cite, |q|@cite, |source|@src, |video|@src
  277. *
  278. * @since 1.0
  279. * @param array<string, string|string[]>|null $element_attribute Element/attribute key/value pairs, null for default
  280. * @return void
  281. */
  282. public function set_url_replacements(?array $element_attribute = null)
  283. {
  284. if ($element_attribute === null) {
  285. $element_attribute = [
  286. 'a' => 'href',
  287. 'area' => 'href',
  288. 'audio' => 'src',
  289. 'blockquote' => 'cite',
  290. 'del' => 'cite',
  291. 'form' => 'action',
  292. 'img' => [
  293. 'longdesc',
  294. 'src'
  295. ],
  296. 'input' => 'src',
  297. 'ins' => 'cite',
  298. 'q' => 'cite',
  299. 'source' => 'src',
  300. 'video' => [
  301. 'poster',
  302. 'src'
  303. ]
  304. ];
  305. }
  306. $this->replace_url_attributes = $element_attribute;
  307. }
  308. /**
  309. * Set the list of domains for which to force HTTPS.
  310. * @see \SimplePie\Misc::https_url()
  311. * Example array('biz', 'example.com', 'example.org', 'www.example.net');
  312. *
  313. * @param string[] $domains list of domain names ['biz', 'example.com', 'example.org', 'www.example.net']
  314. *
  315. * @return void
  316. */
  317. public function set_https_domains(array $domains)
  318. {
  319. $this->https_domains = [];
  320. foreach ($domains as $domain) {
  321. $domain = trim($domain, ". \t\n\r\0\x0B");
  322. $segments = array_reverse(explode('.', $domain));
  323. /** @var true|array<string, true|array<string, true|array<string, array<string, true|array<string, true|array<string, true>>>>>> */ // Needed for PHPStan.
  324. $node = &$this->https_domains;
  325. foreach ($segments as $segment) {//Build a tree
  326. if ($node === true) {
  327. break;
  328. }
  329. if (!isset($node[$segment])) {
  330. $node[$segment] = [];
  331. }
  332. $node = &$node[$segment];
  333. }
  334. $node = true;
  335. }
  336. }
  337. /**
  338. * Check if the domain is in the list of forced HTTPS.
  339. *
  340. * @return bool
  341. */
  342. protected function is_https_domain(string $domain)
  343. {
  344. $domain = trim($domain, '. ');
  345. $segments = array_reverse(explode('.', $domain));
  346. $node = &$this->https_domains;
  347. foreach ($segments as $segment) {//Explore the tree
  348. if (isset($node[$segment])) {
  349. $node = &$node[$segment];
  350. } else {
  351. break;
  352. }
  353. }
  354. return $node === true;
  355. }
  356. /**
  357. * Force HTTPS for selected Web sites.
  358. *
  359. * @return string
  360. */
  361. public function https_url(string $url)
  362. {
  363. return (
  364. strtolower(substr($url, 0, 7)) === 'http://'
  365. && ($parsed = parse_url($url, PHP_URL_HOST)) !== false // Malformed URL
  366. && $parsed !== null // Missing host
  367. && $this->is_https_domain($parsed) // Should be forced?
  368. ) ? substr_replace($url, 's', 4, 0) // Add the 's' to HTTPS
  369. : $url;
  370. }
  371. /**
  372. * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
  373. * @param string $base
  374. * @return string|bool|string[]
  375. */
  376. public function sanitize(string $data, int $type, string $base = '')
  377. {
  378. $data = trim($data);
  379. if ($data !== '' || $type & \SimplePie\SimplePie::CONSTRUCT_IRI) {
  380. if ($type & \SimplePie\SimplePie::CONSTRUCT_MAYBE_HTML) {
  381. if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . \SimplePie\SimplePie::PCRE_HTML_ATTRIBUTE . '>)/', $data)) {
  382. $type |= \SimplePie\SimplePie::CONSTRUCT_HTML;
  383. } else {
  384. $type |= \SimplePie\SimplePie::CONSTRUCT_TEXT;
  385. }
  386. }
  387. if ($type & \SimplePie\SimplePie::CONSTRUCT_BASE64) {
  388. $data = base64_decode($data);
  389. }
  390. if ($type & (\SimplePie\SimplePie::CONSTRUCT_HTML | \SimplePie\SimplePie::CONSTRUCT_XHTML)) {
  391. if (!class_exists('DOMDocument')) {
  392. throw new \SimplePie\Exception('DOMDocument not found, unable to use sanitizer');
  393. }
  394. $document = new \DOMDocument();
  395. $document->encoding = 'UTF-8';
  396. $data = $this->preprocess($data, $type);
  397. set_error_handler([Misc::class, 'silence_errors']);
  398. $document->loadHTML($data);
  399. restore_error_handler();
  400. $xpath = new \DOMXPath($document);
  401. // Strip comments
  402. if ($this->strip_comments) {
  403. $comments = $xpath->query('//comment()');
  404. foreach ($comments as $comment) {
  405. $comment->parentNode->removeChild($comment);
  406. }
  407. }
  408. // Strip out HTML tags and attributes that might cause various security problems.
  409. // Based on recommendations by Mark Pilgrim at:
  410. // https://web.archive.org/web/20110902041826/http://diveintomark.org:80/archives/2003/06/12/how_to_consume_rss_safely
  411. if ($this->strip_htmltags) {
  412. foreach ($this->strip_htmltags as $tag) {
  413. $this->strip_tag($tag, $document, $xpath, $type);
  414. }
  415. }
  416. if ($this->rename_attributes) {
  417. foreach ($this->rename_attributes as $attrib) {
  418. $this->rename_attr($attrib, $xpath);
  419. }
  420. }
  421. if ($this->strip_attributes) {
  422. foreach ($this->strip_attributes as $attrib) {
  423. $this->strip_attr($attrib, $xpath);
  424. }
  425. }
  426. if ($this->add_attributes) {
  427. foreach ($this->add_attributes as $tag => $valuePairs) {
  428. $this->add_attr($tag, $valuePairs, $document);
  429. }
  430. }
  431. // Replace relative URLs
  432. $this->base = $base;
  433. foreach ($this->replace_url_attributes as $element => $attributes) {
  434. $this->replace_urls($document, $element, $attributes);
  435. }
  436. // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
  437. if ($this->image_handler !== '' && $this->enable_cache) {
  438. $images = $document->getElementsByTagName('img');
  439. foreach ($images as $img) {
  440. if ($img->hasAttribute('src')) {
  441. $image_url = $this->cache_namefilter->filter($img->getAttribute('src'));
  442. $cache = $this->get_cache($image_url);
  443. if ($cache->get_data($image_url, false)) {
  444. $img->setAttribute('src', $this->image_handler . $image_url);
  445. } else {
  446. try {
  447. $file = $this->get_http_client()->request(
  448. Client::METHOD_GET,
  449. $img->getAttribute('src'),
  450. ['X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']]
  451. );
  452. } catch (ClientException $th) {
  453. continue;
  454. }
  455. if ((!Misc::is_remote_uri($file->get_final_requested_uri()) || ($file->get_status_code() === 200 || $file->get_status_code() > 206 && $file->get_status_code() < 300))) {
  456. if ($cache->set_data($image_url, ['headers' => $file->get_headers(), 'body' => $file->get_body_content()], $this->cache_duration)) {
  457. $img->setAttribute('src', $this->image_handler . $image_url);
  458. } else {
  459. trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
  460. }
  461. }
  462. }
  463. }
  464. }
  465. }
  466. // Get content node
  467. $div = $document->getElementsByTagName('body')->item(0)->firstChild;
  468. // Finally, convert to a HTML string
  469. $data = trim($document->saveHTML($div));
  470. if ($this->remove_div) {
  471. $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '', $data);
  472. $data = preg_replace('/<\/div>$/', '', $data);
  473. } else {
  474. $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
  475. }
  476. $data = str_replace('</source>', '', $data);
  477. }
  478. if ($type & \SimplePie\SimplePie::CONSTRUCT_IRI) {
  479. $absolute = $this->registry->call(Misc::class, 'absolutize_url', [$data, $base]);
  480. if ($absolute !== false) {
  481. $data = $absolute;
  482. }
  483. }
  484. if ($type & (\SimplePie\SimplePie::CONSTRUCT_TEXT | \SimplePie\SimplePie::CONSTRUCT_IRI)) {
  485. $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
  486. }
  487. if ($this->output_encoding !== 'UTF-8') {
  488. $data = $this->registry->call(Misc::class, 'change_encoding', [$data, 'UTF-8', $this->output_encoding]);
  489. }
  490. }
  491. return $data;
  492. }
  493. /**
  494. * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
  495. * @return string
  496. */
  497. protected function preprocess(string $html, int $type)
  498. {
  499. $ret = '';
  500. $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
  501. if ($type & ~\SimplePie\SimplePie::CONSTRUCT_XHTML) {
  502. // Atom XHTML constructs are wrapped with a div by default
  503. // Note: No protection if $html contains a stray </div>!
  504. $html = '<div>' . $html . '</div>';
  505. $ret .= '<!DOCTYPE html>';
  506. $content_type = 'text/html';
  507. } else {
  508. $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
  509. $content_type = 'application/xhtml+xml';
  510. }
  511. $ret .= '<html><head>';
  512. $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
  513. $ret .= '</head><body>' . $html . '</body></html>';
  514. return $ret;
  515. }
  516. /**
  517. * @param array<string>|string $attributes
  518. * @return void
  519. */
  520. public function replace_urls(DOMDocument $document, string $tag, $attributes)
  521. {
  522. if (!is_array($attributes)) {
  523. $attributes = [$attributes];
  524. }
  525. if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) {
  526. $elements = $document->getElementsByTagName($tag);
  527. foreach ($elements as $element) {
  528. foreach ($attributes as $attribute) {
  529. if ($element->hasAttribute($attribute)) {
  530. $value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]);
  531. if ($value !== false) {
  532. $value = $this->https_url($value);
  533. $element->setAttribute($attribute, $value);
  534. }
  535. }
  536. }
  537. }
  538. }
  539. }
  540. /**
  541. * @param array<int, string> $match
  542. * @return string
  543. */
  544. public function do_strip_htmltags(array $match)
  545. {
  546. if ($this->encode_instead_of_strip) {
  547. if (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
  548. $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
  549. $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
  550. return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
  551. } else {
  552. return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
  553. }
  554. } elseif (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
  555. return $match[4];
  556. } else {
  557. return '';
  558. }
  559. }
  560. /**
  561. * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
  562. * @return void
  563. */
  564. protected function strip_tag(string $tag, DOMDocument $document, DOMXPath $xpath, int $type)
  565. {
  566. $elements = $xpath->query('body//' . $tag);
  567. if ($this->encode_instead_of_strip) {
  568. foreach ($elements as $element) {
  569. $fragment = $document->createDocumentFragment();
  570. // For elements which aren't script or style, include the tag itself
  571. if (!in_array($tag, ['script', 'style'])) {
  572. $text = '<' . $tag;
  573. if ($element->hasAttributes()) {
  574. $attrs = [];
  575. foreach ($element->attributes as $name => $attr) {
  576. $value = $attr->value;
  577. // In XHTML, empty values should never exist, so we repeat the value
  578. if (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_XHTML)) {
  579. $value = $name;
  580. }
  581. // For HTML, empty is fine
  582. elseif (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_HTML)) {
  583. $attrs[] = $name;
  584. continue;
  585. }
  586. // Standard attribute text
  587. $attrs[] = $name . '="' . $attr->value . '"';
  588. }
  589. $text .= ' ' . implode(' ', $attrs);
  590. }
  591. $text .= '>';
  592. $fragment->appendChild(new \DOMText($text));
  593. }
  594. $number = $element->childNodes->length;
  595. for ($i = $number; $i > 0; $i--) {
  596. $child = $element->childNodes->item(0);
  597. $fragment->appendChild($child);
  598. }
  599. if (!in_array($tag, ['script', 'style'])) {
  600. $fragment->appendChild(new \DOMText('</' . $tag . '>'));
  601. }
  602. $element->parentNode->replaceChild($fragment, $element);
  603. }
  604. return;
  605. } elseif (in_array($tag, ['script', 'style'])) {
  606. foreach ($elements as $element) {
  607. $element->parentNode->removeChild($element);
  608. }
  609. return;
  610. } else {
  611. foreach ($elements as $element) {
  612. $fragment = $document->createDocumentFragment();
  613. $number = $element->childNodes->length;
  614. for ($i = $number; $i > 0; $i--) {
  615. $child = $element->childNodes->item(0);
  616. $fragment->appendChild($child);
  617. }
  618. $element->parentNode->replaceChild($fragment, $element);
  619. }
  620. }
  621. }
  622. /**
  623. * @return void
  624. */
  625. protected function strip_attr(string $attrib, DOMXPath $xpath)
  626. {
  627. $elements = $xpath->query('//*[@' . $attrib . ']');
  628. /** @var \DOMElement $element */
  629. foreach ($elements as $element) {
  630. $element->removeAttribute($attrib);
  631. }
  632. }
  633. /**
  634. * @return void
  635. */
  636. protected function rename_attr(string $attrib, DOMXPath $xpath)
  637. {
  638. $elements = $xpath->query('//*[@' . $attrib . ']');
  639. /** @var \DOMElement $element */
  640. foreach ($elements as $element) {
  641. $element->setAttribute('data-sanitized-' . $attrib, $element->getAttribute($attrib));
  642. $element->removeAttribute($attrib);
  643. }
  644. }
  645. /**
  646. * @param array<string, string> $valuePairs
  647. * @return void
  648. */
  649. protected function add_attr(string $tag, array $valuePairs, DOMDocument $document)
  650. {
  651. $elements = $document->getElementsByTagName($tag);
  652. /** @var \DOMElement $element */
  653. foreach ($elements as $element) {
  654. foreach ($valuePairs as $attrib => $value) {
  655. $element->setAttribute($attrib, $value);
  656. }
  657. }
  658. }
  659. /**
  660. * Get a DataCache
  661. *
  662. * @param string $image_url Only needed for BC, can be removed in SimplePie 2.0.0
  663. *
  664. * @return DataCache
  665. */
  666. private function get_cache(string $image_url = ''): DataCache
  667. {
  668. if ($this->cache === null) {
  669. // @trigger_error(sprintf('Not providing as PSR-16 cache implementation is deprecated since SimplePie 1.8.0, please use "SimplePie\SimplePie::set_cache()".'), \E_USER_DEPRECATED);
  670. $cache = $this->registry->call(Cache::class, 'get_handler', [
  671. $this->cache_location,
  672. $image_url,
  673. Base::TYPE_IMAGE
  674. ]);
  675. return new BaseDataCache($cache);
  676. }
  677. return $this->cache;
  678. }
  679. /**
  680. * Get a HTTP client
  681. */
  682. private function get_http_client(): Client
  683. {
  684. if ($this->http_client === null) {
  685. $this->http_client = new FileClient(
  686. $this->registry,
  687. [
  688. 'timeout' => $this->timeout,
  689. 'redirects' => 5,
  690. 'useragent' => $this->useragent,
  691. 'force_fsockopen' => $this->force_fsockopen,
  692. 'curl_options' => $this->curl_options,
  693. ]
  694. );
  695. }
  696. return $this->http_client;
  697. }
  698. }
  699. class_alias('SimplePie\Sanitize', 'SimplePie_Sanitize');