lib_rss.php 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
  1. <?php
  2. declare(strict_types=1);
  3. if (!function_exists('mb_strcut')) {
  4. function mb_strcut(string $str, int $start, ?int $length = null, string $encoding = 'UTF-8'): string {
  5. return substr($str, $start, $length) ?: '';
  6. }
  7. }
  8. if (!function_exists('syslog')) {
  9. if (COPY_SYSLOG_TO_STDERR && !defined('STDERR')) {
  10. define('STDERR', fopen('php://stderr', 'w'));
  11. }
  12. function syslog(int $priority, string $message): bool {
  13. if (COPY_SYSLOG_TO_STDERR && defined('STDERR') && is_resource(STDERR)) {
  14. return fwrite(STDERR, $message . "\n") != false;
  15. }
  16. return false;
  17. }
  18. }
  19. if (function_exists('openlog')) {
  20. if (COPY_SYSLOG_TO_STDERR) {
  21. openlog('FreshRSS', LOG_CONS | LOG_ODELAY | LOG_PID | LOG_PERROR, LOG_USER);
  22. } else {
  23. openlog('FreshRSS', LOG_CONS | LOG_ODELAY | LOG_PID, LOG_USER);
  24. }
  25. }
  26. /**
  27. * Build a directory path by concatenating a list of directory names.
  28. *
  29. * @param string ...$path_parts a list of directory names
  30. * @return string corresponding to the final pathname
  31. */
  32. function join_path(...$path_parts): string {
  33. return join(DIRECTORY_SEPARATOR, $path_parts);
  34. }
  35. //<Auto-loading>
  36. function classAutoloader(string $class): void {
  37. if (strpos($class, 'FreshRSS') === 0) {
  38. $components = explode('_', $class);
  39. switch (count($components)) {
  40. case 1:
  41. include(APP_PATH . '/' . $components[0] . '.php');
  42. return;
  43. case 2:
  44. include(APP_PATH . '/Models/' . $components[1] . '.php');
  45. return;
  46. case 3: //Controllers, Exceptions
  47. include(APP_PATH . '/' . $components[2] . 's/' . $components[1] . $components[2] . '.php');
  48. return;
  49. }
  50. } elseif (strpos($class, 'Minz') === 0) {
  51. include(LIB_PATH . '/' . str_replace('_', '/', $class) . '.php');
  52. } elseif (str_starts_with($class, 'SimplePie\\')) {
  53. $prefix = 'SimplePie\\';
  54. $base_dir = LIB_PATH . '/simplepie/simplepie/src/';
  55. $relative_class_name = substr($class, strlen($prefix));
  56. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  57. } elseif (str_starts_with($class, 'Gt\\CssXPath\\')) {
  58. $prefix = 'Gt\\CssXPath\\';
  59. $base_dir = LIB_PATH . '/phpgt/cssxpath/src/';
  60. $relative_class_name = substr($class, strlen($prefix));
  61. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  62. } elseif (str_starts_with($class, 'marienfressinaud\\LibOpml\\')) {
  63. $prefix = 'marienfressinaud\\LibOpml\\';
  64. $base_dir = LIB_PATH . '/marienfressinaud/lib_opml/src/LibOpml/';
  65. $relative_class_name = substr($class, strlen($prefix));
  66. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  67. } elseif (str_starts_with($class, 'PHPMailer\\PHPMailer\\')) {
  68. $prefix = 'PHPMailer\\PHPMailer\\';
  69. $base_dir = LIB_PATH . '/phpmailer/phpmailer/src/';
  70. $relative_class_name = substr($class, strlen($prefix));
  71. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  72. }
  73. }
  74. spl_autoload_register('classAutoloader');
  75. //</Auto-loading>
  76. /**
  77. * @param array<mixed,mixed> $array
  78. * @phpstan-assert-if-true array<string,mixed> $array
  79. */
  80. function is_array_keys_string(array $array): bool {
  81. foreach ($array as $key => $value) {
  82. if (!is_string($key)) {
  83. return false;
  84. }
  85. }
  86. return true;
  87. }
  88. /**
  89. * @param array<mixed,mixed> $array
  90. * @phpstan-assert-if-true array<mixed,string> $array
  91. */
  92. function is_array_values_string(array $array): bool {
  93. foreach ($array as $value) {
  94. if (!is_string($value)) {
  95. return false;
  96. }
  97. }
  98. return true;
  99. }
  100. /**
  101. * Memory efficient replacement of `echo json_encode(...)`
  102. * @param array<mixed>|mixed $json
  103. * @param int $optimisationDepth Number of levels for which to perform memory optimisation
  104. * before calling the faster native JSON serialisation.
  105. * Set to negative value for infinite depth.
  106. */
  107. function echoJson($json, int $optimisationDepth = -1): void {
  108. if ($optimisationDepth === 0 || !is_array($json)) {
  109. echo json_encode($json, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
  110. return;
  111. }
  112. $first = true;
  113. if (array_is_list($json)) {
  114. echo '[';
  115. foreach ($json as $item) {
  116. if ($first) {
  117. $first = false;
  118. } else {
  119. echo ',';
  120. }
  121. echoJson($item, $optimisationDepth - 1);
  122. }
  123. echo ']';
  124. } else {
  125. echo '{';
  126. foreach ($json as $key => $value) {
  127. if ($first) {
  128. $first = false;
  129. } else {
  130. echo ',';
  131. }
  132. echo json_encode($key, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE), ':';
  133. echoJson($value, $optimisationDepth - 1);
  134. }
  135. echo '}';
  136. }
  137. }
  138. function idn_to_puny(string $url): string {
  139. if (function_exists('idn_to_ascii')) {
  140. $idn = parse_url($url, PHP_URL_HOST);
  141. if (is_string($idn) && $idn != '') {
  142. $puny = idn_to_ascii($idn);
  143. $pos = strpos($url, $idn);
  144. if ($puny != false && $pos !== false) {
  145. $url = substr_replace($url, $puny, $pos, strlen($idn));
  146. }
  147. }
  148. }
  149. return $url;
  150. }
  151. function checkUrl(string $url, bool $fixScheme = true): string|false {
  152. $url = trim($url);
  153. if ($url == '') {
  154. return '';
  155. }
  156. if ($fixScheme && preg_match('#^https?://#i', $url) !== 1) {
  157. $url = 'https://' . ltrim($url, '/');
  158. }
  159. $url = idn_to_puny($url); // https://bugs.php.net/bug.php?id=53474
  160. $urlRelaxed = str_replace('_', 'z', $url); //PHP discussion #64948 Underscore
  161. if (is_string(filter_var($urlRelaxed, FILTER_VALIDATE_URL))) {
  162. return $url;
  163. } else {
  164. return false;
  165. }
  166. }
  167. function safe_ascii(?string $text): string {
  168. return $text === null ? '' : (filter_var($text, FILTER_DEFAULT, FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH) ?: '');
  169. }
  170. if (function_exists('mb_convert_encoding')) {
  171. function safe_utf8(string $text): string {
  172. return mb_convert_encoding($text, 'UTF-8', 'UTF-8') ?: '';
  173. }
  174. } elseif (function_exists('iconv')) {
  175. function safe_utf8(string $text): string {
  176. return iconv('UTF-8', 'UTF-8//IGNORE', $text) ?: '';
  177. }
  178. } else {
  179. function safe_utf8(string $text): string {
  180. return $text;
  181. }
  182. }
  183. function escapeToUnicodeAlternative(string $text, bool $extended = true): string {
  184. $text = htmlspecialchars_decode($text, ENT_QUOTES);
  185. //Problematic characters
  186. $problem = ['&', '<', '>'];
  187. //Use their fullwidth Unicode form instead:
  188. $replace = ['&', '<', '>'];
  189. // https://raw.githubusercontent.com/mihaip/google-reader-api/master/wiki/StreamId.wiki
  190. if ($extended) {
  191. $problem += ["'", '"', '^', '?', '\\', '/', ',', ';'];
  192. $replace += ["’", '"', '^', '?', '\', '/', ',', ';'];
  193. }
  194. return trim(str_replace($problem, $replace, $text));
  195. }
  196. function format_number(int|float $n, int $precision = 0): string {
  197. // number_format does not seem to be Unicode-compatible
  198. return str_replace(' ', ' ', // Thin non-breaking space
  199. number_format((float)$n, $precision, '.', ' ')
  200. );
  201. }
  202. function format_bytes(int $bytes, int $precision = 2, string $system = 'IEC'): string {
  203. if ($system === 'IEC') {
  204. $base = 1024;
  205. $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
  206. } elseif ($system === 'SI') {
  207. $base = 1000;
  208. $units = ['B', 'KB', 'MB', 'GB', 'TB'];
  209. } else {
  210. return format_number($bytes, $precision);
  211. }
  212. $bytes = max(intval($bytes), 0);
  213. $pow = $bytes === 0 ? 0 : floor(log($bytes) / log($base));
  214. $pow = min($pow, count($units) - 1);
  215. $bytes /= pow($base, $pow);
  216. return format_number($bytes, $precision) . ' ' . $units[$pow];
  217. }
  218. function timestamptodate(int $t, bool $hour = true): string {
  219. $month = _t('gen.date.' . date('M', $t));
  220. if ($hour) {
  221. $date = _t('gen.date.format_date_hour', $month);
  222. } else {
  223. $date = _t('gen.date.format_date', $month);
  224. }
  225. return @date($date, $t) ?: '';
  226. }
  227. /**
  228. * Decode HTML entities but preserve XML entities.
  229. */
  230. function html_only_entity_decode(?string $text): string {
  231. /** @var array<string,string>|null $htmlEntitiesOnly */
  232. static $htmlEntitiesOnly = null;
  233. if ($htmlEntitiesOnly === null) {
  234. $htmlEntitiesOnly = array_flip(array_diff(
  235. get_html_translation_table(HTML_ENTITIES, ENT_NOQUOTES, 'UTF-8'), //Decode HTML entities
  236. get_html_translation_table(HTML_SPECIALCHARS, ENT_NOQUOTES, 'UTF-8') //Preserve XML entities
  237. ));
  238. }
  239. return $text == null ? '' : strtr($text, $htmlEntitiesOnly);
  240. }
  241. /**
  242. * Remove passwords in FreshRSS logs.
  243. * See also ../cli/sensitive-log.sh for Web server logs.
  244. * @param array<string,mixed>|string $log
  245. * @return array<string,mixed>|string
  246. */
  247. function sensitive_log(array|string $log): array|string {
  248. if (is_array($log)) {
  249. foreach ($log as $k => $v) {
  250. if (in_array($k, ['api_key', 'Passwd', 'T'], true)) {
  251. $log[$k] = '██';
  252. } elseif ((is_array($v) && is_array_keys_string($v)) || is_string($v)) {
  253. $log[$k] = sensitive_log($v);
  254. } else {
  255. return '';
  256. }
  257. }
  258. } elseif (is_string($log)) {
  259. $log = preg_replace([
  260. '/\b(auth=.*?\/)[^&]+/i',
  261. '/\b(Passwd=)[^&]+/i',
  262. '/\b(Authorization)[^&]+/i',
  263. ], '$1█', $log) ?? '';
  264. }
  265. return $log;
  266. }
  267. /**
  268. * @param array<string,mixed> $attributes
  269. * @param array<int,mixed> $curl_options
  270. * @throws FreshRSS_Context_Exception
  271. */
  272. function customSimplePie(array $attributes = [], array $curl_options = []): \SimplePie\SimplePie {
  273. $limits = FreshRSS_Context::systemConf()->limits;
  274. $simplePie = new \SimplePie\SimplePie();
  275. if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) {
  276. $simplePie->get_registry()->register(\SimplePie\File::class, FreshRSS_SimplePieResponse::class);
  277. }
  278. $simplePie->set_useragent(FRESHRSS_USERAGENT);
  279. $simplePie->set_cache_name_function('sha1');
  280. $simplePie->set_cache_location(CACHE_PATH);
  281. $simplePie->set_cache_duration($limits['cache_duration'], $limits['cache_duration_min'], $limits['cache_duration_max']);
  282. $simplePie->enable_order_by_date(false);
  283. $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : (int)$attributes['timeout'];
  284. $simplePie->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']);
  285. $curl_options = array_replace(FreshRSS_Context::systemConf()->curl_options, $curl_options);
  286. if (isset($attributes['ssl_verify'])) {
  287. $curl_options[CURLOPT_SSL_VERIFYHOST] = empty($attributes['ssl_verify']) ? 0 : 2;
  288. $curl_options[CURLOPT_SSL_VERIFYPEER] = (bool)$attributes['ssl_verify'];
  289. if (empty($attributes['ssl_verify'])) {
  290. $curl_options[CURLOPT_SSL_CIPHER_LIST] = 'DEFAULT@SECLEVEL=1';
  291. }
  292. }
  293. if (!empty($attributes['curl_params']) && is_array($attributes['curl_params'])) {
  294. foreach ($attributes['curl_params'] as $co => $v) {
  295. if (is_int($co)) {
  296. $curl_options[$co] = $v;
  297. }
  298. }
  299. }
  300. if (!empty($curl_options[CURLOPT_PROXYTYPE]) && ($curl_options[CURLOPT_PROXYTYPE] < 0 || $curl_options[CURLOPT_PROXYTYPE] === 3)) {
  301. // 3 is legacy for NONE
  302. unset($curl_options[CURLOPT_PROXYTYPE]);
  303. if (isset($curl_options[CURLOPT_PROXY])) {
  304. unset($curl_options[CURLOPT_PROXY]);
  305. }
  306. }
  307. $simplePie->set_curl_options($curl_options);
  308. $simplePie->strip_comments(true);
  309. $simplePie->strip_htmltags([
  310. 'base', 'blink', 'body', 'doctype', 'embed',
  311. 'font', 'form', 'frame', 'frameset', 'html',
  312. 'link', 'input', 'marquee', 'meta', 'noscript',
  313. 'object', 'param', 'plaintext', 'script', 'style',
  314. 'svg', //TODO: Support SVG after sanitizing and URL rewriting of xlink:href
  315. ]);
  316. $simplePie->rename_attributes(['id', 'class']);
  317. $simplePie->strip_attributes(array_merge($simplePie->strip_attributes, [
  318. 'autoplay', 'class', 'form', 'formaction',
  319. 'onload', 'onunload', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup',
  320. 'onmouseover', 'onmousemove', 'onmouseout', 'onfocus', 'onblur',
  321. 'onkeypress', 'onkeydown', 'onkeyup', 'onselect', 'onchange', 'seamless', 'sizes', 'srcdoc', 'srcset']));
  322. $simplePie->add_attributes([
  323. 'audio' => ['controls' => 'controls', 'preload' => 'none'],
  324. 'iframe' => [
  325. 'allow' => 'accelerometer; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share',
  326. 'sandbox' => 'allow-scripts allow-same-origin',
  327. ],
  328. 'video' => ['controls' => 'controls', 'preload' => 'none'],
  329. ]);
  330. $simplePie->set_url_replacements([
  331. 'a' => 'href',
  332. 'area' => 'href',
  333. 'audio' => 'src',
  334. 'blockquote' => 'cite',
  335. 'del' => 'cite',
  336. 'form' => 'action',
  337. 'iframe' => 'src',
  338. 'img' => [
  339. 'longdesc',
  340. 'src'
  341. ],
  342. 'input' => 'src',
  343. 'ins' => 'cite',
  344. 'q' => 'cite',
  345. 'source' => 'src',
  346. 'track' => 'src',
  347. 'video' => [
  348. 'poster',
  349. 'src',
  350. ],
  351. ]);
  352. $https_domains = [];
  353. $force = @file(FRESHRSS_PATH . '/force-https.default.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
  354. if (is_array($force)) {
  355. $https_domains = array_merge($https_domains, $force);
  356. }
  357. $force = @file(DATA_PATH . '/force-https.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
  358. if (is_array($force)) {
  359. $https_domains = array_merge($https_domains, $force);
  360. }
  361. // Remove whitespace and comments starting with # / ;
  362. $https_domains = preg_replace('%\\s+|[\/#;].*$%', '', $https_domains) ?? $https_domains;
  363. $https_domains = array_filter($https_domains, fn(string $v) => $v !== '');
  364. $simplePie->set_https_domains($https_domains);
  365. return $simplePie;
  366. }
  367. function sanitizeHTML(string $data, string $base = '', ?int $maxLength = null): string {
  368. if ($data === '' || ($maxLength !== null && $maxLength <= 0)) {
  369. return '';
  370. }
  371. if ($maxLength !== null) {
  372. $data = mb_strcut($data, 0, $maxLength, 'UTF-8');
  373. }
  374. /** @var \SimplePie\SimplePie|null $simplePie */
  375. static $simplePie = null;
  376. if ($simplePie === null) {
  377. $simplePie = customSimplePie();
  378. $simplePie->enable_cache(false);
  379. $simplePie->init();
  380. }
  381. $sanitized = $simplePie->sanitize->sanitize($data, \SimplePie\SimplePie::CONSTRUCT_HTML, $base);
  382. if (!is_string($sanitized)) {
  383. return '';
  384. }
  385. $result = html_only_entity_decode($sanitized);
  386. if ($maxLength !== null && strlen($result) > $maxLength) {
  387. //Sanitizing has made the result too long so try again shorter
  388. $data = mb_strcut($result, 0, (2 * $maxLength) - strlen($result) - 2, 'UTF-8');
  389. return sanitizeHTML($data, $base, $maxLength);
  390. }
  391. return $result;
  392. }
  393. function cleanCache(int $hours = 720): void {
  394. // N.B.: GLOB_BRACE is not available on all platforms
  395. $files = array_merge(
  396. glob(CACHE_PATH . '/*.html', GLOB_NOSORT) ?: [],
  397. glob(CACHE_PATH . '/*.json', GLOB_NOSORT) ?: [],
  398. glob(CACHE_PATH . '/*.spc', GLOB_NOSORT) ?: [],
  399. glob(CACHE_PATH . '/*.xml', GLOB_NOSORT) ?: []);
  400. foreach ($files as $file) {
  401. if (substr($file, -10) === 'index.html') {
  402. continue;
  403. }
  404. $cacheMtime = @filemtime($file);
  405. if ($cacheMtime !== false && $cacheMtime < time() - (3600 * $hours)) {
  406. unlink($file);
  407. }
  408. }
  409. }
  410. /**
  411. * Remove the charset meta information of an HTML document, e.g.:
  412. * `<meta charset="..." />`
  413. * `<meta http-equiv="Content-Type" content="text/html; charset=...">`
  414. */
  415. function stripHtmlMetaCharset(string $html): string {
  416. return preg_replace('/<meta\s[^>]*charset\s*=\s*[^>]+>/i', '', $html, 1) ?? '';
  417. }
  418. /**
  419. * Set an XML preamble to enforce the HTML content type charset received by HTTP.
  420. * @param string $html the raw downloaded HTML content
  421. * @param string $contentType an HTTP Content-Type such as 'text/html; charset=utf-8'
  422. * @return string an HTML string with XML encoding information for DOMDocument::loadHTML()
  423. */
  424. function enforceHttpEncoding(string $html, string $contentType = ''): string {
  425. $httpCharset = preg_match('/\bcharset=([0-9a-z_-]{2,12})$/i', $contentType, $matches) === 1 ? $matches[1] : '';
  426. if ($httpCharset == '') {
  427. // No charset defined by HTTP
  428. if (preg_match('/<meta\s[^>]*charset\s*=[\s\'"]*UTF-?8\b/i', substr($html, 0, 2048))) {
  429. // Detect UTF-8 even if declared too deep in HTML for DOMDocument
  430. $httpCharset = 'UTF-8';
  431. } else {
  432. // Do nothing
  433. return $html;
  434. }
  435. }
  436. $httpCharsetNormalized = \SimplePie\Misc::encoding($httpCharset);
  437. if (in_array($httpCharsetNormalized, ['windows-1252', 'US-ASCII'], true)) {
  438. // Default charset for HTTP, do nothing
  439. return $html;
  440. }
  441. if (substr($html, 0, 3) === "\xEF\xBB\xBF" || // UTF-8 BOM
  442. substr($html, 0, 2) === "\xFF\xFE" || // UTF-16 Little Endian BOM
  443. substr($html, 0, 2) === "\xFE\xFF" || // UTF-16 Big Endian BOM
  444. substr($html, 0, 4) === "\xFF\xFE\x00\x00" || // UTF-32 Little Endian BOM
  445. substr($html, 0, 4) === "\x00\x00\xFE\xFF") { // UTF-32 Big Endian BOM
  446. // Existing byte order mark, do nothing
  447. return $html;
  448. }
  449. if (preg_match('/^<[?]xml[^>]+encoding\b/', substr($html, 0, 64))) {
  450. // Existing XML declaration, do nothing
  451. return $html;
  452. }
  453. if ($httpCharsetNormalized !== 'UTF-8') {
  454. // Try to change encoding to UTF-8 using mbstring or iconv or intl
  455. $utf8 = \SimplePie\Misc::change_encoding($html, $httpCharsetNormalized, 'UTF-8');
  456. if (is_string($utf8)) {
  457. $html = stripHtmlMetaCharset($utf8);
  458. $httpCharsetNormalized = 'UTF-8';
  459. }
  460. }
  461. if ($httpCharsetNormalized === 'UTF-8') {
  462. // Save encoding information as XML declaration
  463. return '<' . '?xml version="1.0" encoding="' . $httpCharsetNormalized . '" ?' . ">\n" . $html;
  464. }
  465. // Give up
  466. return $html;
  467. }
  468. /**
  469. * @param string $type {html,json,opml,xml}
  470. * @param array<string,mixed> $attributes
  471. * @param array<int,mixed> $curl_options
  472. */
  473. function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = [], array $curl_options = []): string {
  474. $limits = FreshRSS_Context::systemConf()->limits;
  475. $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : intval($attributes['timeout']);
  476. $cacheMtime = @filemtime($cachePath);
  477. if ($cacheMtime !== false && $cacheMtime > time() - intval($limits['cache_duration'])) {
  478. $body = @file_get_contents($cachePath);
  479. if ($body != false) {
  480. syslog(LOG_DEBUG, 'FreshRSS uses cache for ' . \SimplePie\Misc::url_remove_credentials($url));
  481. return $body;
  482. }
  483. }
  484. if (mt_rand(0, 30) === 1) { // Remove old entries once in a while
  485. cleanCache(CLEANCACHE_HOURS);
  486. }
  487. if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) {
  488. syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url));
  489. }
  490. $accept = '*/*;q=0.8';
  491. switch ($type) {
  492. case 'json':
  493. $accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7';
  494. break;
  495. case 'opml':
  496. $accept = 'text/x-opml,text/xml;q=0.9,application/xml;q=0.9,*/*;q=0.8';
  497. break;
  498. case 'xml':
  499. $accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8';
  500. break;
  501. case 'html':
  502. default:
  503. $accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
  504. break;
  505. }
  506. // TODO: Implement HTTP 1.1 conditional GET If-Modified-Since
  507. $ch = curl_init();
  508. if ($ch === false) {
  509. return '';
  510. }
  511. curl_setopt_array($ch, [
  512. CURLOPT_URL => $url,
  513. CURLOPT_HTTPHEADER => ['Accept: ' . $accept],
  514. CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
  515. CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
  516. CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
  517. CURLOPT_MAXREDIRS => 4,
  518. CURLOPT_RETURNTRANSFER => true,
  519. CURLOPT_FOLLOWLOCATION => true,
  520. CURLOPT_ENCODING => '', //Enable all encodings
  521. //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers
  522. ]);
  523. curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
  524. if (is_array($attributes['curl_params'] ?? null)) {
  525. $options = $attributes['curl_params'];
  526. if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
  527. // Remove headers problematic for security
  528. $options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER],
  529. fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header));
  530. // Add Accept header if it is not set
  531. if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) {
  532. $options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept;
  533. }
  534. $attributes['curl_params'] = $options;
  535. }
  536. curl_setopt_array($ch, $attributes['curl_params']);
  537. }
  538. if (isset($attributes['ssl_verify'])) {
  539. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2);
  540. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']);
  541. if (empty($attributes['ssl_verify'])) {
  542. curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1');
  543. }
  544. }
  545. curl_setopt_array($ch, $curl_options);
  546. $body = curl_exec($ch);
  547. $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  548. $c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
  549. $c_error = curl_error($ch);
  550. curl_close($ch);
  551. if ($c_status != 200 || $c_error != '' || $body === false) {
  552. Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
  553. $body = '';
  554. // TODO: Implement HTTP 410 Gone
  555. } elseif (!is_string($body) || strlen($body) === 0) {
  556. $body = '';
  557. } else {
  558. $body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM
  559. if ($type !== 'json') {
  560. $body = enforceHttpEncoding($body, $c_content_type);
  561. }
  562. }
  563. if (file_put_contents($cachePath, $body) === false) {
  564. Minz_Log::warning("Error saving cache $cachePath for $url");
  565. }
  566. return $body;
  567. }
  568. /**
  569. * Validate an email address, supports internationalized addresses.
  570. *
  571. * @param string $email The address to validate
  572. * @return bool true if email is valid, else false
  573. */
  574. function validateEmailAddress(string $email): bool {
  575. $mailer = new PHPMailer\PHPMailer\PHPMailer();
  576. $mailer->CharSet = 'utf-8';
  577. $punyemail = $mailer->punyencodeAddress($email);
  578. return PHPMailer\PHPMailer\PHPMailer::validateAddress($punyemail, 'html5');
  579. }
  580. /**
  581. * Add support of image lazy loading
  582. * Move content from src attribute to data-original
  583. * @param string $content is the text we want to parse
  584. */
  585. function lazyimg(string $content): string {
  586. return preg_replace([
  587. '/<((?:img|iframe)[^>]+?)src="([^"]+)"([^>]*)>/i',
  588. "/<((?:img|iframe)[^>]+?)src='([^']+)'([^>]*)>/i",
  589. ], [
  590. '<$1src="' . Minz_Url::display('/themes/icons/grey.gif') . '" data-original="$2"$3>',
  591. "<$1src='" . Minz_Url::display('/themes/icons/grey.gif') . "' data-original='$2'$3>",
  592. ],
  593. $content
  594. ) ?? '';
  595. }
  596. /** @return numeric-string */
  597. function uTimeString(): string {
  598. $t = gettimeofday();
  599. // @phpstan-ignore return.type
  600. return ((string)$t['sec']) . str_pad((string)$t['usec'], 6, '0', STR_PAD_LEFT);
  601. }
  602. function invalidateHttpCache(string $username = ''): bool {
  603. if (!FreshRSS_user_Controller::checkUsername($username)) {
  604. Minz_Session::_param('touch', uTimeString());
  605. $username = Minz_User::name() ?? Minz_User::INTERNAL_USER;
  606. }
  607. return FreshRSS_UserDAO::ctouch($username);
  608. }
  609. /**
  610. * @return list<string>
  611. */
  612. function listUsers(): array {
  613. $final_list = [];
  614. $base_path = join_path(DATA_PATH, 'users');
  615. $dir_list = array_values(array_diff(
  616. scandir($base_path) ?: [],
  617. ['..', '.', Minz_User::INTERNAL_USER]
  618. ));
  619. foreach ($dir_list as $file) {
  620. if ($file[0] !== '.' && is_dir(join_path($base_path, $file)) && file_exists(join_path($base_path, $file, 'config.php'))) {
  621. $final_list[] = $file;
  622. }
  623. }
  624. return $final_list;
  625. }
  626. /**
  627. * Return if the maximum number of registrations has been reached.
  628. * Note a max_registrations of 0 means there is no limit.
  629. *
  630. * @return bool true if number of users >= max registrations, false else.
  631. */
  632. function max_registrations_reached(): bool {
  633. $limit_registrations = FreshRSS_Context::systemConf()->limits['max_registrations'];
  634. $number_accounts = count(listUsers());
  635. return $limit_registrations > 0 && $number_accounts >= $limit_registrations;
  636. }
  637. /**
  638. * Register and return the configuration for a given user.
  639. *
  640. * Note this function has been created to generate temporary configuration
  641. * objects. If you need a long-time configuration, please don't use this function.
  642. *
  643. * @param string $username the name of the user of which we want the configuration.
  644. * @return FreshRSS_UserConfiguration|null object, or null if the configuration cannot be loaded.
  645. * @throws Minz_ConfigurationNamespaceException
  646. */
  647. function get_user_configuration(string $username): ?FreshRSS_UserConfiguration {
  648. if (!FreshRSS_user_Controller::checkUsername($username)) {
  649. return null;
  650. }
  651. $namespace = 'user_' . $username;
  652. try {
  653. FreshRSS_UserConfiguration::register($namespace,
  654. USERS_PATH . '/' . $username . '/config.php',
  655. FRESHRSS_PATH . '/config-user.default.php');
  656. } catch (Minz_FileNotExistException $e) {
  657. Minz_Log::warning($e->getMessage(), ADMIN_LOG);
  658. return null;
  659. }
  660. $user_conf = FreshRSS_UserConfiguration::get($namespace);
  661. return $user_conf;
  662. }
  663. /**
  664. * Converts an IP (v4 or v6) to a binary representation using inet_pton
  665. *
  666. * @param string $ip the IP to convert
  667. * @return string a binary representation of the specified IP
  668. */
  669. function ipToBits(string $ip): string {
  670. $binaryip = '';
  671. foreach (str_split(inet_pton($ip) ?: '') as $char) {
  672. $binaryip .= str_pad(decbin(ord($char)), 8, '0', STR_PAD_LEFT);
  673. }
  674. return $binaryip;
  675. }
  676. /**
  677. * Check if an ip belongs to the provided range (in CIDR format)
  678. *
  679. * @param string $ip the IP that we want to verify (ex: 192.168.16.1)
  680. * @param string $range the range to check against (ex: 192.168.16.0/24)
  681. * @return bool true if the IP is in the range, otherwise false
  682. */
  683. function checkCIDR(string $ip, string $range): bool {
  684. $binary_ip = ipToBits($ip);
  685. $split = explode('/', $range);
  686. $subnet = $split[0] ?? '';
  687. if ($subnet == '') {
  688. return false;
  689. }
  690. $binary_subnet = ipToBits($subnet);
  691. $mask_bits = $split[1] ?? '';
  692. $mask_bits = (int)$mask_bits;
  693. if ($mask_bits === 0) {
  694. $mask_bits = null;
  695. }
  696. $ip_net_bits = substr($binary_ip, 0, $mask_bits);
  697. $subnet_bits = substr($binary_subnet, 0, $mask_bits);
  698. return $ip_net_bits === $subnet_bits;
  699. }
  700. /**
  701. * Use CONN_REMOTE_ADDR (if available, to be robust even when using Apache mod_remoteip) or REMOTE_ADDR environment variable to determine the connection IP.
  702. */
  703. function connectionRemoteAddress(): string {
  704. $remoteIp = is_string($_SERVER['CONN_REMOTE_ADDR'] ?? null) ? $_SERVER['CONN_REMOTE_ADDR'] : '';
  705. if ($remoteIp == '') {
  706. $remoteIp = is_string($_SERVER['REMOTE_ADDR'] ?? null) ? $_SERVER['REMOTE_ADDR'] : '';
  707. }
  708. if ($remoteIp == 0) {
  709. $remoteIp = '';
  710. }
  711. return $remoteIp;
  712. }
  713. /**
  714. * Check if the client (e.g. last proxy) is allowed to send unsafe headers.
  715. * This uses the `TRUSTED_PROXY` environment variable or the `trusted_sources` configuration option to get an array of the authorized ranges,
  716. * The connection IP is obtained from the `CONN_REMOTE_ADDR` (if available, to be robust even when using Apache mod_remoteip) or `REMOTE_ADDR` environment variables.
  717. * @return bool true if the sender’s IP is in one of the ranges defined in the configuration, else false
  718. */
  719. function checkTrustedIP(): bool {
  720. if (!FreshRSS_Context::hasSystemConf()) {
  721. return false;
  722. }
  723. $remoteIp = connectionRemoteAddress();
  724. if ($remoteIp === '') {
  725. return false;
  726. }
  727. $trusted = getenv('TRUSTED_PROXY');
  728. if ($trusted != 0 && is_string($trusted)) {
  729. $trusted = preg_split('/\s+/', $trusted, -1, PREG_SPLIT_NO_EMPTY);
  730. }
  731. if (!is_array($trusted) || empty($trusted)) {
  732. $trusted = FreshRSS_Context::systemConf()->trusted_sources;
  733. }
  734. foreach ($trusted as $cidr) {
  735. if (checkCIDR($remoteIp, $cidr)) {
  736. return true;
  737. }
  738. }
  739. return false;
  740. }
  741. function httpAuthUser(bool $onlyTrusted = true): string {
  742. $auths = array_intersect_key($_SERVER, ['REMOTE_USER' => '', 'REDIRECT_REMOTE_USER' => '', 'HTTP_REMOTE_USER' => '', 'HTTP_X_WEBAUTH_USER' => '']);
  743. if (count($auths) > 1) {
  744. Minz_Log::warning('Multiple HTTP authentication headers!');
  745. return '';
  746. }
  747. if (!empty($_SERVER['REMOTE_USER']) && is_string($_SERVER['REMOTE_USER'])) {
  748. return $_SERVER['REMOTE_USER'];
  749. }
  750. if (!empty($_SERVER['REDIRECT_REMOTE_USER']) && is_string($_SERVER['REDIRECT_REMOTE_USER'])) {
  751. return $_SERVER['REDIRECT_REMOTE_USER'];
  752. }
  753. if (!$onlyTrusted || checkTrustedIP()) {
  754. if (!empty($_SERVER['HTTP_REMOTE_USER']) && is_string($_SERVER['HTTP_REMOTE_USER'])) {
  755. return $_SERVER['HTTP_REMOTE_USER'];
  756. }
  757. if (!empty($_SERVER['HTTP_X_WEBAUTH_USER']) && is_string($_SERVER['HTTP_X_WEBAUTH_USER'])) {
  758. return $_SERVER['HTTP_X_WEBAUTH_USER'];
  759. }
  760. }
  761. return '';
  762. }
  763. function cryptAvailable(): bool {
  764. $hash = '$2y$04$usesomesillystringfore7hnbRJHxXVLeakoG8K30oukPsA.ztMG';
  765. return $hash === @crypt('password', $hash);
  766. }
  767. /**
  768. * Check PHP and its extensions are well-installed.
  769. *
  770. * @return array<string,bool> of tested values.
  771. */
  772. function check_install_php(): array {
  773. $pdo_mysql = extension_loaded('pdo_mysql');
  774. $pdo_pgsql = extension_loaded('pdo_pgsql');
  775. $pdo_sqlite = extension_loaded('pdo_sqlite');
  776. return [
  777. 'php' => version_compare(PHP_VERSION, FRESHRSS_MIN_PHP_VERSION) >= 0,
  778. 'curl' => extension_loaded('curl'),
  779. 'pdo' => $pdo_mysql || $pdo_sqlite || $pdo_pgsql,
  780. 'pcre' => extension_loaded('pcre'),
  781. 'ctype' => extension_loaded('ctype'),
  782. 'fileinfo' => extension_loaded('fileinfo'),
  783. 'dom' => class_exists('DOMDocument'),
  784. 'json' => extension_loaded('json'),
  785. 'mbstring' => extension_loaded('mbstring'),
  786. 'zip' => extension_loaded('zip'),
  787. ];
  788. }
  789. /**
  790. * Check different data files and directories exist.
  791. * @return array<string,bool> of tested values.
  792. */
  793. function check_install_files(): array {
  794. return [
  795. 'data' => is_dir(DATA_PATH) && touch(DATA_PATH . '/index.html'), // is_writable() is not reliable for a folder on NFS
  796. 'cache' => is_dir(CACHE_PATH) && touch(CACHE_PATH . '/index.html'),
  797. 'users' => is_dir(USERS_PATH) && touch(USERS_PATH . '/index.html'),
  798. 'favicons' => is_dir(DATA_PATH) && touch(DATA_PATH . '/favicons/index.html'),
  799. 'tokens' => is_dir(DATA_PATH) && touch(DATA_PATH . '/tokens/index.html'),
  800. ];
  801. }
  802. /**
  803. * Check database is well-installed.
  804. *
  805. * @return array<string,bool> of tested values.
  806. */
  807. function check_install_database(): array {
  808. $status = [
  809. 'connection' => true,
  810. 'tables' => false,
  811. 'categories' => false,
  812. 'feeds' => false,
  813. 'entries' => false,
  814. 'entrytmp' => false,
  815. 'tag' => false,
  816. 'entrytag' => false,
  817. ];
  818. try {
  819. $dbDAO = FreshRSS_Factory::createDatabaseDAO();
  820. $status['tables'] = $dbDAO->tablesAreCorrect();
  821. $status['categories'] = $dbDAO->categoryIsCorrect();
  822. $status['feeds'] = $dbDAO->feedIsCorrect();
  823. $status['entries'] = $dbDAO->entryIsCorrect();
  824. $status['entrytmp'] = $dbDAO->entrytmpIsCorrect();
  825. $status['tag'] = $dbDAO->tagIsCorrect();
  826. $status['entrytag'] = $dbDAO->entrytagIsCorrect();
  827. } catch (Minz_PDOConnectionException $e) {
  828. $status['connection'] = false;
  829. }
  830. return $status;
  831. }
  832. /**
  833. * Remove a directory recursively.
  834. * From http://php.net/rmdir#110489
  835. */
  836. function recursive_unlink(string $dir): bool {
  837. if (!is_dir($dir)) {
  838. return true;
  839. }
  840. if (is_link($dir)) {
  841. if (PHP_OS_FAMILY === "Windows") {
  842. return rmdir($dir);
  843. }
  844. return unlink($dir);
  845. }
  846. $files = array_diff(scandir($dir) ?: [], ['.', '..']);
  847. foreach ($files as $filename) {
  848. $filename = $dir . '/' . $filename;
  849. if (is_dir($filename)) {
  850. @chmod($filename, 0777);
  851. recursive_unlink($filename);
  852. } else {
  853. unlink($filename);
  854. }
  855. }
  856. return rmdir($dir);
  857. }
  858. /**
  859. * Remove queries where $get is appearing.
  860. * @param string $get the get attribute which should be removed.
  861. * @param array<int,array{get?:string,name?:string,order?:string,search?:string,state?:int,url?:string}> $queries an array of queries.
  862. * @return array<int,array{get?:string,name?:string,order?:string,search?:string,state?:int,url?:string}> without queries where $get is appearing.
  863. */
  864. function remove_query_by_get(string $get, array $queries): array {
  865. $final_queries = [];
  866. foreach ($queries as $query) {
  867. if (empty($query['get']) || $query['get'] !== $get) {
  868. $final_queries[] = $query;
  869. }
  870. }
  871. return $final_queries;
  872. }
  873. function _i(string $icon, int $type = FreshRSS_Themes::ICON_DEFAULT): string {
  874. return FreshRSS_Themes::icon($icon, $type);
  875. }
  876. const SHORTCUT_KEYS = [
  877. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  878. 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  879. 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  880. 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12',
  881. 'ArrowDown', 'ArrowLeft', 'ArrowRight', 'ArrowUp', 'Backspace', 'Delete',
  882. 'End', 'Enter', 'Escape', 'Home', 'Insert', 'PageDown', 'PageUp', 'Space', 'Tab',
  883. ];
  884. /**
  885. * @param array<string> $shortcuts
  886. * @return list<string>
  887. */
  888. function getNonStandardShortcuts(array $shortcuts): array {
  889. $standard = strtolower(implode(' ', SHORTCUT_KEYS));
  890. $nonStandard = array_filter($shortcuts, static function (string $shortcut) use ($standard) {
  891. $shortcut = trim($shortcut);
  892. return $shortcut !== '' && stripos($standard, $shortcut) === false;
  893. });
  894. return array_values($nonStandard);
  895. }
  896. function errorMessageInfo(string $errorTitle, string $error = ''): string {
  897. $errorTitle = htmlspecialchars($errorTitle, ENT_NOQUOTES, 'UTF-8');
  898. $message = '';
  899. $details = '';
  900. $error = trim($error);
  901. // Prevent empty tags by checking if error is not empty first
  902. if ($error !== '') {
  903. $error = htmlspecialchars($error, ENT_NOQUOTES, 'UTF-8') . "\n";
  904. // First line is the main message, other lines are the details
  905. list($message, $details) = explode("\n", $error, 2);
  906. $message = "<h2>{$message}</h2>";
  907. $details = "<pre>{$details}</pre>";
  908. }
  909. header("Content-Security-Policy: default-src 'self'");
  910. header('Referrer-Policy: same-origin');
  911. return <<<MSG
  912. <!DOCTYPE html><html><header><title>HTTP 500: {$errorTitle}</title></header><body>
  913. <h1>HTTP 500: {$errorTitle}</h1>
  914. {$message}
  915. {$details}
  916. <hr />
  917. <small>For help see the documentation: <a href="https://freshrss.github.io/FreshRSS/en/admins/logs_and_errors.html" target="_blank">
  918. https://freshrss.github.io/FreshRSS/en/admins/logs_and_errors.html</a></small>
  919. </body></html>
  920. MSG;
  921. }