SimplePieCustom.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. <?php
  2. declare(strict_types=1);
  3. final class FreshRSS_SimplePieCustom extends \SimplePie\SimplePie
  4. {
  5. /**
  6. * @param array<string,mixed> $attributes
  7. * @param array<int,mixed> $curl_options
  8. * @throws FreshRSS_Context_Exception
  9. */
  10. public function __construct(array $attributes = [], array $curl_options = []) {
  11. parent::__construct();
  12. $limits = FreshRSS_Context::systemConf()->limits;
  13. $this->get_registry()->register(\SimplePie\File::class, FreshRSS_SimplePieFetch::class);
  14. $this->set_useragent(FRESHRSS_USERAGENT);
  15. $this->set_cache_name_function('sha1'); // @phpstan-ignore method.deprecated
  16. $this->set_cache_location(CACHE_PATH); // @phpstan-ignore method.deprecated
  17. $this->set_cache_duration($limits['cache_duration'], $limits['cache_duration_min'], $limits['cache_duration_max']);
  18. $this->enable_order_by_date(false);
  19. $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : (int)$attributes['timeout'];
  20. $this->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']);
  21. $curl_options = array_replace(FreshRSS_Context::systemConf()->curl_options, $curl_options);
  22. if (isset($attributes['ssl_verify'])) {
  23. $curl_options[CURLOPT_SSL_VERIFYHOST] = empty($attributes['ssl_verify']) ? 0 : 2;
  24. $curl_options[CURLOPT_SSL_VERIFYPEER] = (bool)$attributes['ssl_verify'];
  25. if (empty($attributes['ssl_verify'])) {
  26. $curl_options[CURLOPT_SSL_CIPHER_LIST] = 'DEFAULT@SECLEVEL=1';
  27. }
  28. }
  29. $attributes['curl_params'] = FreshRSS_http_Util::sanitizeCurlParams(is_array($attributes['curl_params'] ?? null) ? $attributes['curl_params'] : []);
  30. if (!empty($attributes['curl_params']) && is_array($attributes['curl_params'])) {
  31. foreach ($attributes['curl_params'] as $co => $v) {
  32. if (is_int($co)) {
  33. $curl_options[$co] = $v;
  34. }
  35. }
  36. }
  37. if (!empty($curl_options[CURLOPT_PROXYTYPE]) && ($curl_options[CURLOPT_PROXYTYPE] < 0 || $curl_options[CURLOPT_PROXYTYPE] === 3)) {
  38. // 3 is legacy for NONE
  39. unset($curl_options[CURLOPT_PROXYTYPE]);
  40. if (isset($curl_options[CURLOPT_PROXY])) {
  41. unset($curl_options[CURLOPT_PROXY]);
  42. }
  43. }
  44. if (defined('CURLOPT_PROTOCOLS_STR') && is_int(CURLOPT_PROTOCOLS_STR)) {
  45. $curl_options[CURLOPT_PROTOCOLS_STR] = 'http,https';
  46. if (defined('CURLOPT_REDIR_PROTOCOLS_STR') && is_int(CURLOPT_REDIR_PROTOCOLS_STR)) {
  47. $curl_options[CURLOPT_REDIR_PROTOCOLS_STR] = 'http,https';
  48. }
  49. } elseif (defined('CURLPROTO_HTTP') && defined('CURLPROTO_HTTPS')) {
  50. // Legacy PHP 8.2-
  51. if (defined('CURLOPT_PROTOCOLS')) {
  52. $curl_options[CURLOPT_PROTOCOLS] = CURLPROTO_HTTP | CURLPROTO_HTTPS;
  53. }
  54. if (defined('CURLOPT_REDIR_PROTOCOLS')) {
  55. $curl_options[CURLOPT_REDIR_PROTOCOLS] = CURLPROTO_HTTP | CURLPROTO_HTTPS;
  56. }
  57. }
  58. $this->set_curl_options($curl_options);
  59. $this->strip_comments(true);
  60. $this->rename_attributes(['id', 'class']);
  61. $this->allow_aria_attr(true);
  62. $this->allow_data_attr(true);
  63. $this->allowed_html_attributes([
  64. // HTML
  65. 'dir',
  66. 'draggable',
  67. 'hidden',
  68. 'lang',
  69. 'role',
  70. 'title',
  71. // MathML
  72. 'displaystyle',
  73. 'mathsize',
  74. 'scriptlevel',
  75. ]);
  76. $this->allowed_html_elements_with_attributes([
  77. // HTML
  78. 'a' => ['href', 'hreflang', 'type'],
  79. 'abbr' => [],
  80. 'acronym' => [],
  81. 'address' => [],
  82. // 'area' => [], // TODO: support <area> after rewriting ids with a format like #ugc-<insert original id here> (maybe)
  83. 'article' => [],
  84. 'aside' => [],
  85. 'audio' => ['controlslist', 'loop', 'muted', 'src'],
  86. 'b' => [],
  87. 'bdi' => [],
  88. 'bdo' => [],
  89. 'big' => [],
  90. 'blink' => [],
  91. 'blockquote' => ['cite'],
  92. 'br' => ['clear'],
  93. 'button' => ['disabled'],
  94. 'canvas' => ['width', 'height'],
  95. 'caption' => ['align'],
  96. 'center' => [],
  97. 'cite' => [],
  98. 'code' => [],
  99. 'col' => ['span', 'align', 'valign', 'width'],
  100. 'colgroup' => ['span', 'align', 'valign', 'width'],
  101. 'data' => ['value'],
  102. 'datalist' => [],
  103. 'dd' => [],
  104. 'del' => ['cite', 'datetime'],
  105. 'details' => ['open'],
  106. 'dfn' => [],
  107. 'dialog' => [],
  108. 'dir' => [],
  109. 'div' => ['align'],
  110. 'dl' => [],
  111. 'dt' => [],
  112. 'em' => [],
  113. 'fieldset' => ['disabled'],
  114. 'figcaption' => [],
  115. 'figure' => [],
  116. 'footer' => [],
  117. 'h1' => [],
  118. 'h2' => [],
  119. 'h3' => [],
  120. 'h4' => [],
  121. 'h5' => [],
  122. 'h6' => [],
  123. 'header' => [],
  124. 'hgroup' => [],
  125. 'hr' => ['align', 'noshade', 'size', 'width'],
  126. 'i' => [],
  127. 'iframe' => ['src', 'align', 'frameborder', 'longdesc', 'marginheight', 'marginwidth', 'scrolling', 'allowfullscreen'],
  128. 'image' => ['src', 'alt', 'width', 'height', 'align', 'border', 'hspace', 'longdesc', 'vspace'],
  129. 'img' => ['src', 'alt', 'width', 'height', 'align', 'border', 'hspace', 'longdesc', 'vspace'],
  130. 'ins' => ['cite', 'datetime'],
  131. 'kbd' => [],
  132. 'label' => [],
  133. 'legend' => [],
  134. 'li' => ['value', 'type'],
  135. 'main' => [],
  136. // 'map' => [], // TODO: support <map> after rewriting ids with a format like #ugc-<insert original id here> (maybe)
  137. 'mark' => [],
  138. 'marquee' => ['behavior', 'direction', 'height', 'hspace', 'loop', 'scrollamount', 'scrolldelay', 'truespeed', 'vspace', 'width'],
  139. 'menu' => [],
  140. 'meter' => ['value', 'min', 'max', 'low', 'high', 'optimum'],
  141. 'nav' => [],
  142. 'nobr' => [],
  143. // 'noembed' => [], // <embed> is not allowed, so we want to display the contents of <noembed>
  144. 'noframes' => [],
  145. // 'noscript' => [], // From the perspective of the feed content, JS isn't allowed so we want to display the contents of <noscript>
  146. 'ol' => ['reversed', 'start', 'type'],
  147. 'optgroup' => ['disabled', 'label'],
  148. 'option' => ['disabled', 'label', 'selected', 'value'],
  149. 'output' => [],
  150. 'p' => ['align'],
  151. 'picture' => [],
  152. // 'plaintext' => [], // Can't be closed. See: https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/plaintext
  153. 'pre' => ['width', 'wrap'],
  154. 'progress' => ['max', 'value'],
  155. 'q' => ['cite'],
  156. 'rb' => [],
  157. 'rp' => [],
  158. 'rt' => [],
  159. 'rtc' => [],
  160. 'ruby' => [],
  161. 's' => [],
  162. 'samp' => [],
  163. 'search' => [],
  164. 'section' => [],
  165. 'select' => ['disabled', 'multiple', 'size'],
  166. 'small' => [],
  167. 'source' => ['type', 'src', 'media', 'height', 'width'],
  168. 'span' => [],
  169. 'strike' => [],
  170. 'strong' => [],
  171. 'sub' => [],
  172. 'summary' => [],
  173. 'sup' => [],
  174. 'table' => ['align', 'border', 'cellpadding', 'cellspacing', 'rules', 'summary', 'width'],
  175. 'tbody' => ['align', 'char', 'charoff', 'valign'],
  176. 'td' => ['colspan', 'headers', 'rowspan', 'abbr', 'align', 'height', 'scope', 'valign', 'width'],
  177. 'textarea' => ['cols', 'disabled', 'maxlength', 'minlength', 'placeholder', 'readonly', 'rows', 'wrap'],
  178. 'tfoot' => ['align', 'valign'],
  179. 'th' => ['abbr', 'colspan', 'rowspan', 'scope', 'align', 'height', 'valign', 'width'],
  180. 'thead' => ['align', 'valign'],
  181. 'time' => ['datetime'],
  182. 'tr' => ['align', 'valign'],
  183. 'track' => ['default', 'kind', 'srclang', 'label', 'src'],
  184. 'tt' => [],
  185. 'u' => [],
  186. 'ul' => ['type'],
  187. 'var' => [],
  188. 'video' => ['src', 'poster', 'controlslist', 'height', 'loop', 'muted', 'playsinline', 'width'],
  189. 'wbr' => [],
  190. 'xmp' => [],
  191. // MathML
  192. 'maction' => ['actiontype', 'selection'],
  193. 'math' => ['display'],
  194. 'menclose' => ['notation'],
  195. 'merror' => [],
  196. 'mfenced' => ['close', 'open', 'separators'],
  197. 'mfrac' => ['denomalign', 'linethickness', 'numalign'],
  198. 'mi' => ['mathvariant'],
  199. 'mmultiscripts' => ['subscriptshift', 'superscriptshift'],
  200. 'mn' => [],
  201. 'mo' => ['accent', 'fence', 'form', 'largeop', 'lspace', 'maxsize', 'minsize', 'movablelimits', 'rspace', 'separator', 'stretchy', 'symmetric'],
  202. 'mover' => ['accent'],
  203. 'mpadded' => ['depth', 'height', 'lspace', 'voffset', 'width'],
  204. 'mphantom' => [],
  205. 'mprescripts' => [],
  206. 'mroot' => [],
  207. 'mrow' => [],
  208. 'ms' => [],
  209. 'mspace' => ['depth', 'height', 'width'],
  210. 'msqrt' => [],
  211. 'msub' => [],
  212. 'msubsup' => ['subscriptshift', 'superscriptshift'],
  213. 'msup' => ['superscriptshift'],
  214. 'mtable' => ['align', 'columnalign', 'columnlines', 'columnspacing', 'frame', 'framespacing', 'rowalign', 'rowlines', 'rowspacing', 'width'],
  215. 'mtd' => ['columnspan', 'rowspan', 'columnalign', 'rowalign'],
  216. 'mtext' => [],
  217. 'mtr' => ['columnalign', 'rowalign'],
  218. 'munder' => ['accentunder'],
  219. 'munderover' => ['accent', 'accentunder'],
  220. // TODO: Support SVG after sanitizing and URL rewriting of xlink:href
  221. ]);
  222. $this->strip_attributes([
  223. 'data-auto-leave-validation',
  224. 'data-leave-validation',
  225. 'data-no-leave-validation',
  226. 'data-original',
  227. ]);
  228. $this->add_attributes([
  229. 'audio' => ['controls' => 'controls', 'preload' => 'none'],
  230. 'iframe' => [
  231. 'allow' => 'accelerometer; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share',
  232. 'sandbox' => 'allow-scripts allow-same-origin',
  233. 'allowfullscreen' => 'allowfullscreen',
  234. ],
  235. 'video' => ['controls' => 'controls', 'preload' => 'none'],
  236. ]);
  237. $this->set_url_replacements([
  238. 'a' => 'href',
  239. 'area' => 'href',
  240. 'audio' => 'src',
  241. 'blockquote' => 'cite',
  242. 'del' => 'cite',
  243. 'form' => 'action',
  244. 'iframe' => 'src',
  245. 'img' => [
  246. 'longdesc',
  247. 'src',
  248. ],
  249. 'image' => [
  250. 'longdesc',
  251. 'src',
  252. ],
  253. 'input' => 'src',
  254. 'ins' => 'cite',
  255. 'q' => 'cite',
  256. 'source' => 'src',
  257. 'track' => 'src',
  258. 'video' => [
  259. 'poster',
  260. 'src',
  261. ],
  262. ]);
  263. $https_domains = [];
  264. $force = @file(FRESHRSS_PATH . '/force-https.default.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
  265. if (is_array($force)) {
  266. $https_domains = array_merge($https_domains, $force);
  267. }
  268. $force = @file(DATA_PATH . '/force-https.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
  269. if (is_array($force)) {
  270. $https_domains = array_merge($https_domains, $force);
  271. }
  272. // Remove whitespace and comments starting with # / ;
  273. $https_domains = preg_replace('%\\s+|[\/#;].*$%', '', $https_domains) ?? $https_domains;
  274. $https_domains = array_filter($https_domains, fn(string $v) => $v !== '');
  275. $this->set_https_domains($https_domains);
  276. }
  277. public static function sanitizeHTML(string $data, string $base = '', ?int $maxLength = null): string {
  278. if ($data === '' || ($maxLength !== null && $maxLength <= 0)) {
  279. return '';
  280. }
  281. if ($maxLength !== null) {
  282. $data = mb_strcut($data, 0, $maxLength, 'UTF-8');
  283. }
  284. /** @var FreshRSS_SimplePieCustom|null $simplePie */
  285. static $simplePie = null;
  286. if ($simplePie === null) {
  287. $simplePie = new static();
  288. $simplePie->enable_cache(false);
  289. $simplePie->init();
  290. }
  291. $sanitized = $simplePie->sanitize->sanitize($data, \SimplePie\SimplePie::CONSTRUCT_HTML, $base);
  292. if (!is_string($sanitized)) {
  293. return '';
  294. }
  295. $result = html_only_entity_decode($sanitized);
  296. if ($maxLength !== null && strlen($result) > $maxLength) {
  297. //Sanitizing has made the result too long so try again shorter
  298. $data = mb_strcut($result, 0, (2 * $maxLength) - strlen($result) - 2, 'UTF-8');
  299. return self::sanitizeHTML($data, $base, $maxLength);
  300. }
  301. return $result;
  302. }
  303. }