lib_rss.php 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. <?php
  2. declare(strict_types=1);
  3. if (version_compare(PHP_VERSION, FRESHRSS_MIN_PHP_VERSION, '<')) {
  4. die(sprintf('FreshRSS error: FreshRSS requires PHP %s+!', FRESHRSS_MIN_PHP_VERSION));
  5. }
  6. if (!function_exists('mb_strcut')) {
  7. function mb_strcut(string $str, int $start, ?int $length = null, string $encoding = 'UTF-8'): string {
  8. return substr($str, $start, $length) ?: '';
  9. }
  10. }
  11. if (!function_exists('syslog')) {
  12. if (COPY_SYSLOG_TO_STDERR && !defined('STDERR')) {
  13. define('STDERR', fopen('php://stderr', 'w'));
  14. }
  15. function syslog(int $priority, string $message): bool {
  16. if (COPY_SYSLOG_TO_STDERR && defined('STDERR') && is_resource(STDERR)) {
  17. return fwrite(STDERR, $message . "\n") != false;
  18. }
  19. return false;
  20. }
  21. }
  22. if (function_exists('openlog')) {
  23. if (COPY_SYSLOG_TO_STDERR) {
  24. openlog('FreshRSS', LOG_CONS | LOG_ODELAY | LOG_PID | LOG_PERROR, LOG_USER);
  25. } else {
  26. openlog('FreshRSS', LOG_CONS | LOG_ODELAY | LOG_PID, LOG_USER);
  27. }
  28. }
  29. /**
  30. * Build a directory path by concatenating a list of directory names.
  31. *
  32. * @param string ...$path_parts a list of directory names
  33. * @return string corresponding to the final pathname
  34. */
  35. function join_path(...$path_parts): string {
  36. return join(DIRECTORY_SEPARATOR, $path_parts);
  37. }
  38. //<Auto-loading>
  39. function classAutoloader(string $class): void {
  40. if (strpos($class, 'FreshRSS') === 0) {
  41. $components = explode('_', $class);
  42. switch (count($components)) {
  43. case 1:
  44. include(APP_PATH . '/' . $components[0] . '.php');
  45. return;
  46. case 2:
  47. include(APP_PATH . '/Models/' . $components[1] . '.php');
  48. return;
  49. case 3: //Controllers, Exceptions
  50. include(APP_PATH . '/' . $components[2] . 's/' . $components[1] . $components[2] . '.php');
  51. return;
  52. }
  53. } elseif (strpos($class, 'Minz') === 0) {
  54. include(LIB_PATH . '/' . str_replace('_', '/', $class) . '.php');
  55. } elseif (str_starts_with($class, 'SimplePie\\')) {
  56. $prefix = 'SimplePie\\';
  57. $base_dir = LIB_PATH . '/simplepie/simplepie/src/';
  58. $relative_class_name = substr($class, strlen($prefix));
  59. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  60. } elseif (str_starts_with($class, 'Gt\\CssXPath\\')) {
  61. $prefix = 'Gt\\CssXPath\\';
  62. $base_dir = LIB_PATH . '/phpgt/cssxpath/src/';
  63. $relative_class_name = substr($class, strlen($prefix));
  64. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  65. } elseif (str_starts_with($class, 'marienfressinaud\\LibOpml\\')) {
  66. $prefix = 'marienfressinaud\\LibOpml\\';
  67. $base_dir = LIB_PATH . '/marienfressinaud/lib_opml/src/LibOpml/';
  68. $relative_class_name = substr($class, strlen($prefix));
  69. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  70. } elseif (str_starts_with($class, 'PHPMailer\\PHPMailer\\')) {
  71. $prefix = 'PHPMailer\\PHPMailer\\';
  72. $base_dir = LIB_PATH . '/phpmailer/phpmailer/src/';
  73. $relative_class_name = substr($class, strlen($prefix));
  74. include $base_dir . str_replace('\\', '/', $relative_class_name) . '.php';
  75. }
  76. }
  77. spl_autoload_register('classAutoloader');
  78. //</Auto-loading>
  79. /**
  80. * @param array<mixed,mixed> $array
  81. * @phpstan-assert-if-true array<string,mixed> $array
  82. */
  83. function is_array_keys_string(array $array): bool {
  84. foreach ($array as $key => $value) {
  85. if (!is_string($key)) {
  86. return false;
  87. }
  88. }
  89. return true;
  90. }
  91. /**
  92. * @param array<mixed,mixed> $array
  93. * @phpstan-assert-if-true array<mixed,string> $array
  94. */
  95. function is_array_values_string(array $array): bool {
  96. foreach ($array as $value) {
  97. if (!is_string($value)) {
  98. return false;
  99. }
  100. }
  101. return true;
  102. }
  103. /**
  104. * Memory efficient replacement of `echo json_encode(...)`
  105. * @param array<mixed>|mixed $json
  106. * @param int $optimisationDepth Number of levels for which to perform memory optimisation
  107. * before calling the faster native JSON serialisation.
  108. * Set to negative value for infinite depth.
  109. */
  110. function echoJson($json, int $optimisationDepth = -1): void {
  111. if ($optimisationDepth === 0 || !is_array($json)) {
  112. echo json_encode($json, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
  113. return;
  114. }
  115. $first = true;
  116. if (array_is_list($json)) {
  117. echo '[';
  118. foreach ($json as $item) {
  119. if ($first) {
  120. $first = false;
  121. } else {
  122. echo ',';
  123. }
  124. echoJson($item, $optimisationDepth - 1);
  125. }
  126. echo ']';
  127. } else {
  128. echo '{';
  129. foreach ($json as $key => $value) {
  130. if ($first) {
  131. $first = false;
  132. } else {
  133. echo ',';
  134. }
  135. echo json_encode($key, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE), ':';
  136. echoJson($value, $optimisationDepth - 1);
  137. }
  138. echo '}';
  139. }
  140. }
  141. function idn_to_puny(string $url): string {
  142. if (function_exists('idn_to_ascii')) {
  143. $idn = parse_url($url, PHP_URL_HOST);
  144. if (is_string($idn) && $idn != '') {
  145. $puny = idn_to_ascii($idn);
  146. $pos = strpos($url, $idn);
  147. if ($puny != false && $pos !== false) {
  148. $url = substr_replace($url, $puny, $pos, strlen($idn));
  149. }
  150. }
  151. }
  152. return $url;
  153. }
  154. function checkUrl(string $url, bool $fixScheme = true): string|false {
  155. $url = trim($url);
  156. if ($url == '') {
  157. return '';
  158. }
  159. if ($fixScheme && preg_match('#^https?://#i', $url) !== 1) {
  160. $url = 'https://' . ltrim($url, '/');
  161. }
  162. $url = idn_to_puny($url); // https://bugs.php.net/bug.php?id=53474
  163. $urlRelaxed = str_replace('_', 'z', $url); //PHP discussion #64948 Underscore
  164. if (is_string(filter_var($urlRelaxed, FILTER_VALIDATE_URL))) {
  165. return $url;
  166. } else {
  167. return false;
  168. }
  169. }
  170. function safe_ascii(?string $text): string {
  171. return $text === null ? '' : (filter_var($text, FILTER_DEFAULT, FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH) ?: '');
  172. }
  173. if (function_exists('mb_convert_encoding')) {
  174. function safe_utf8(string $text): string {
  175. return mb_convert_encoding($text, 'UTF-8', 'UTF-8') ?: '';
  176. }
  177. } elseif (function_exists('iconv')) {
  178. function safe_utf8(string $text): string {
  179. return iconv('UTF-8', 'UTF-8//IGNORE', $text) ?: '';
  180. }
  181. } else {
  182. function safe_utf8(string $text): string {
  183. return $text;
  184. }
  185. }
  186. function escapeToUnicodeAlternative(string $text, bool $extended = true): string {
  187. $text = htmlspecialchars_decode($text, ENT_QUOTES);
  188. //Problematic characters
  189. $problem = ['&', '<', '>'];
  190. //Use their fullwidth Unicode form instead:
  191. $replace = ['&', '<', '>'];
  192. // https://raw.githubusercontent.com/mihaip/google-reader-api/master/wiki/StreamId.wiki
  193. if ($extended) {
  194. $problem += ["'", '"', '^', '?', '\\', '/', ',', ';'];
  195. $replace += ["’", '"', '^', '?', '\', '/', ',', ';'];
  196. }
  197. return trim(str_replace($problem, $replace, $text));
  198. }
  199. function format_number(int|float $n, int $precision = 0): string {
  200. // number_format does not seem to be Unicode-compatible
  201. return str_replace(' ', ' ', // Thin non-breaking space
  202. number_format((float)$n, $precision, '.', ' ')
  203. );
  204. }
  205. function format_bytes(int $bytes, int $precision = 2, string $system = 'IEC'): string {
  206. if ($system === 'IEC') {
  207. $base = 1024;
  208. $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
  209. } elseif ($system === 'SI') {
  210. $base = 1000;
  211. $units = ['B', 'KB', 'MB', 'GB', 'TB'];
  212. } else {
  213. return format_number($bytes, $precision);
  214. }
  215. $bytes = max(intval($bytes), 0);
  216. $pow = $bytes === 0 ? 0 : floor(log($bytes) / log($base));
  217. $pow = min($pow, count($units) - 1);
  218. $bytes /= pow($base, $pow);
  219. return format_number($bytes, $precision) . ' ' . $units[$pow];
  220. }
  221. function timestamptodate(int $t, bool $hour = true): string {
  222. $month = _t('gen.date.' . date('M', $t));
  223. if ($hour) {
  224. $date = _t('gen.date.format_date_hour', $month);
  225. } else {
  226. $date = _t('gen.date.format_date', $month);
  227. }
  228. return @date($date, $t) ?: '';
  229. }
  230. /**
  231. * Decode HTML entities but preserve XML entities.
  232. */
  233. function html_only_entity_decode(?string $text): string {
  234. /** @var array<string,string>|null $htmlEntitiesOnly */
  235. static $htmlEntitiesOnly = null;
  236. if ($htmlEntitiesOnly === null) {
  237. $htmlEntitiesOnly = array_flip(array_diff(
  238. get_html_translation_table(HTML_ENTITIES, ENT_NOQUOTES, 'UTF-8'), //Decode HTML entities
  239. get_html_translation_table(HTML_SPECIALCHARS, ENT_NOQUOTES, 'UTF-8') //Preserve XML entities
  240. ));
  241. }
  242. return $text == null ? '' : strtr($text, $htmlEntitiesOnly);
  243. }
  244. /**
  245. * Remove passwords in FreshRSS logs.
  246. * See also ../cli/sensitive-log.sh for Web server logs.
  247. * @param array<string,mixed>|string $log
  248. * @return array<string,mixed>|string
  249. */
  250. function sensitive_log(array|string $log): array|string {
  251. if (is_array($log)) {
  252. foreach ($log as $k => $v) {
  253. if (in_array($k, ['api_key', 'Passwd', 'T'], true)) {
  254. $log[$k] = '██';
  255. } elseif ((is_array($v) && is_array_keys_string($v)) || is_string($v)) {
  256. $log[$k] = sensitive_log($v);
  257. } else {
  258. return '';
  259. }
  260. }
  261. } elseif (is_string($log)) {
  262. $log = preg_replace([
  263. '/\b(auth=.*?\/)[^&]+/i',
  264. '/\b(Passwd=)[^&]+/i',
  265. '/\b(Authorization)[^&]+/i',
  266. ], '$1█', $log) ?? '';
  267. }
  268. return $log;
  269. }
  270. /**
  271. * @param array<string,mixed> $attributes
  272. * @param array<int,mixed> $curl_options
  273. * @throws FreshRSS_Context_Exception
  274. */
  275. function customSimplePie(array $attributes = [], array $curl_options = []): \SimplePie\SimplePie {
  276. $limits = FreshRSS_Context::systemConf()->limits;
  277. $simplePie = new \SimplePie\SimplePie();
  278. if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) {
  279. $simplePie->get_registry()->register(\SimplePie\File::class, FreshRSS_SimplePieResponse::class);
  280. }
  281. $simplePie->set_useragent(FRESHRSS_USERAGENT);
  282. $simplePie->set_cache_name_function('sha1');
  283. $simplePie->set_cache_location(CACHE_PATH);
  284. $simplePie->set_cache_duration($limits['cache_duration'], $limits['cache_duration_min'], $limits['cache_duration_max']);
  285. $simplePie->enable_order_by_date(false);
  286. $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : (int)$attributes['timeout'];
  287. $simplePie->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']);
  288. $curl_options = array_replace(FreshRSS_Context::systemConf()->curl_options, $curl_options);
  289. if (isset($attributes['ssl_verify'])) {
  290. $curl_options[CURLOPT_SSL_VERIFYHOST] = empty($attributes['ssl_verify']) ? 0 : 2;
  291. $curl_options[CURLOPT_SSL_VERIFYPEER] = (bool)$attributes['ssl_verify'];
  292. if (empty($attributes['ssl_verify'])) {
  293. $curl_options[CURLOPT_SSL_CIPHER_LIST] = 'DEFAULT@SECLEVEL=1';
  294. }
  295. }
  296. if (!empty($attributes['curl_params']) && is_array($attributes['curl_params'])) {
  297. foreach ($attributes['curl_params'] as $co => $v) {
  298. if (is_int($co)) {
  299. $curl_options[$co] = $v;
  300. }
  301. }
  302. }
  303. if (!empty($curl_options[CURLOPT_PROXYTYPE]) && ($curl_options[CURLOPT_PROXYTYPE] < 0 || $curl_options[CURLOPT_PROXYTYPE] === 3)) {
  304. // 3 is legacy for NONE
  305. unset($curl_options[CURLOPT_PROXYTYPE]);
  306. if (isset($curl_options[CURLOPT_PROXY])) {
  307. unset($curl_options[CURLOPT_PROXY]);
  308. }
  309. }
  310. $simplePie->set_curl_options($curl_options);
  311. $simplePie->strip_comments(true);
  312. $simplePie->strip_htmltags([
  313. 'base', 'blink', 'body', 'doctype', 'embed',
  314. 'font', 'form', 'frame', 'frameset', 'html',
  315. 'link', 'input', 'marquee', 'meta', 'noscript',
  316. 'object', 'param', 'plaintext', 'script', 'style',
  317. 'svg', //TODO: Support SVG after sanitizing and URL rewriting of xlink:href
  318. ]);
  319. $simplePie->rename_attributes(['id', 'class']);
  320. $simplePie->strip_attributes(array_merge($simplePie->strip_attributes, [
  321. 'autoplay', 'class', 'onload', 'onunload', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup',
  322. 'onmouseover', 'onmousemove', 'onmouseout', 'onfocus', 'onblur',
  323. 'onkeypress', 'onkeydown', 'onkeyup', 'onselect', 'onchange', 'seamless', 'sizes', 'srcset']));
  324. $simplePie->add_attributes([
  325. 'audio' => ['controls' => 'controls', 'preload' => 'none'],
  326. 'iframe' => [
  327. 'allow' => 'accelerometer; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share',
  328. 'sandbox' => 'allow-scripts allow-same-origin',
  329. ],
  330. 'video' => ['controls' => 'controls', 'preload' => 'none'],
  331. ]);
  332. $simplePie->set_url_replacements([
  333. 'a' => 'href',
  334. 'area' => 'href',
  335. 'audio' => 'src',
  336. 'blockquote' => 'cite',
  337. 'del' => 'cite',
  338. 'form' => 'action',
  339. 'iframe' => 'src',
  340. 'img' => [
  341. 'longdesc',
  342. 'src'
  343. ],
  344. 'input' => 'src',
  345. 'ins' => 'cite',
  346. 'q' => 'cite',
  347. 'source' => 'src',
  348. 'track' => 'src',
  349. 'video' => [
  350. 'poster',
  351. 'src',
  352. ],
  353. ]);
  354. $https_domains = [];
  355. $force = @file(FRESHRSS_PATH . '/force-https.default.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
  356. if (is_array($force)) {
  357. $https_domains = array_merge($https_domains, $force);
  358. }
  359. $force = @file(DATA_PATH . '/force-https.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
  360. if (is_array($force)) {
  361. $https_domains = array_merge($https_domains, $force);
  362. }
  363. // Remove whitespace and comments starting with # / ;
  364. $https_domains = preg_replace('%\\s+|[\/#;].*$%', '', $https_domains) ?? $https_domains;
  365. $https_domains = array_filter($https_domains, fn(string $v) => $v !== '');
  366. $simplePie->set_https_domains($https_domains);
  367. return $simplePie;
  368. }
  369. function sanitizeHTML(string $data, string $base = '', ?int $maxLength = null): string {
  370. if ($data === '' || ($maxLength !== null && $maxLength <= 0)) {
  371. return '';
  372. }
  373. if ($maxLength !== null) {
  374. $data = mb_strcut($data, 0, $maxLength, 'UTF-8');
  375. }
  376. /** @var \SimplePie\SimplePie|null $simplePie */
  377. static $simplePie = null;
  378. if ($simplePie === null) {
  379. $simplePie = customSimplePie();
  380. $simplePie->enable_cache(false);
  381. $simplePie->init();
  382. }
  383. $sanitized = $simplePie->sanitize->sanitize($data, \SimplePie\SimplePie::CONSTRUCT_HTML, $base);
  384. if (!is_string($sanitized)) {
  385. return '';
  386. }
  387. $result = html_only_entity_decode($sanitized);
  388. if ($maxLength !== null && strlen($result) > $maxLength) {
  389. //Sanitizing has made the result too long so try again shorter
  390. $data = mb_strcut($result, 0, (2 * $maxLength) - strlen($result) - 2, 'UTF-8');
  391. return sanitizeHTML($data, $base, $maxLength);
  392. }
  393. return $result;
  394. }
  395. function cleanCache(int $hours = 720): void {
  396. // N.B.: GLOB_BRACE is not available on all platforms
  397. $files = array_merge(
  398. glob(CACHE_PATH . '/*.html', GLOB_NOSORT) ?: [],
  399. glob(CACHE_PATH . '/*.json', GLOB_NOSORT) ?: [],
  400. glob(CACHE_PATH . '/*.spc', GLOB_NOSORT) ?: [],
  401. glob(CACHE_PATH . '/*.xml', GLOB_NOSORT) ?: []);
  402. foreach ($files as $file) {
  403. if (substr($file, -10) === 'index.html') {
  404. continue;
  405. }
  406. $cacheMtime = @filemtime($file);
  407. if ($cacheMtime !== false && $cacheMtime < time() - (3600 * $hours)) {
  408. unlink($file);
  409. }
  410. }
  411. }
  412. /**
  413. * Remove the charset meta information of an HTML document, e.g.:
  414. * `<meta charset="..." />`
  415. * `<meta http-equiv="Content-Type" content="text/html; charset=...">`
  416. */
  417. function stripHtmlMetaCharset(string $html): string {
  418. return preg_replace('/<meta\s[^>]*charset\s*=\s*[^>]+>/i', '', $html, 1) ?? '';
  419. }
  420. /**
  421. * Set an XML preamble to enforce the HTML content type charset received by HTTP.
  422. * @param string $html the raw downloaded HTML content
  423. * @param string $contentType an HTTP Content-Type such as 'text/html; charset=utf-8'
  424. * @return string an HTML string with XML encoding information for DOMDocument::loadHTML()
  425. */
  426. function enforceHttpEncoding(string $html, string $contentType = ''): string {
  427. $httpCharset = preg_match('/\bcharset=([0-9a-z_-]{2,12})$/i', $contentType, $matches) === 1 ? $matches[1] : '';
  428. if ($httpCharset == '') {
  429. // No charset defined by HTTP
  430. if (preg_match('/<meta\s[^>]*charset\s*=[\s\'"]*UTF-?8\b/i', substr($html, 0, 2048))) {
  431. // Detect UTF-8 even if declared too deep in HTML for DOMDocument
  432. $httpCharset = 'UTF-8';
  433. } else {
  434. // Do nothing
  435. return $html;
  436. }
  437. }
  438. $httpCharsetNormalized = \SimplePie\Misc::encoding($httpCharset);
  439. if (in_array($httpCharsetNormalized, ['windows-1252', 'US-ASCII'], true)) {
  440. // Default charset for HTTP, do nothing
  441. return $html;
  442. }
  443. if (substr($html, 0, 3) === "\xEF\xBB\xBF" || // UTF-8 BOM
  444. substr($html, 0, 2) === "\xFF\xFE" || // UTF-16 Little Endian BOM
  445. substr($html, 0, 2) === "\xFE\xFF" || // UTF-16 Big Endian BOM
  446. substr($html, 0, 4) === "\xFF\xFE\x00\x00" || // UTF-32 Little Endian BOM
  447. substr($html, 0, 4) === "\x00\x00\xFE\xFF") { // UTF-32 Big Endian BOM
  448. // Existing byte order mark, do nothing
  449. return $html;
  450. }
  451. if (preg_match('/^<[?]xml[^>]+encoding\b/', substr($html, 0, 64))) {
  452. // Existing XML declaration, do nothing
  453. return $html;
  454. }
  455. if ($httpCharsetNormalized !== 'UTF-8') {
  456. // Try to change encoding to UTF-8 using mbstring or iconv or intl
  457. $utf8 = \SimplePie\Misc::change_encoding($html, $httpCharsetNormalized, 'UTF-8');
  458. if (is_string($utf8)) {
  459. $html = stripHtmlMetaCharset($utf8);
  460. $httpCharsetNormalized = 'UTF-8';
  461. }
  462. }
  463. if ($httpCharsetNormalized === 'UTF-8') {
  464. // Save encoding information as XML declaration
  465. return '<' . '?xml version="1.0" encoding="' . $httpCharsetNormalized . '" ?' . ">\n" . $html;
  466. }
  467. // Give up
  468. return $html;
  469. }
  470. /**
  471. * @param string $type {html,json,opml,xml}
  472. * @param array<string,mixed> $attributes
  473. * @param array<int,mixed> $curl_options
  474. */
  475. function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = [], array $curl_options = []): string {
  476. $limits = FreshRSS_Context::systemConf()->limits;
  477. $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : intval($attributes['timeout']);
  478. $cacheMtime = @filemtime($cachePath);
  479. if ($cacheMtime !== false && $cacheMtime > time() - intval($limits['cache_duration'])) {
  480. $body = @file_get_contents($cachePath);
  481. if ($body != false) {
  482. syslog(LOG_DEBUG, 'FreshRSS uses cache for ' . \SimplePie\Misc::url_remove_credentials($url));
  483. return $body;
  484. }
  485. }
  486. if (mt_rand(0, 30) === 1) { // Remove old entries once in a while
  487. cleanCache(CLEANCACHE_HOURS);
  488. }
  489. if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) {
  490. syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url));
  491. }
  492. $accept = '*/*;q=0.8';
  493. switch ($type) {
  494. case 'json':
  495. $accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7';
  496. break;
  497. case 'opml':
  498. $accept = 'text/x-opml,text/xml;q=0.9,application/xml;q=0.9,*/*;q=0.8';
  499. break;
  500. case 'xml':
  501. $accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8';
  502. break;
  503. case 'html':
  504. default:
  505. $accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
  506. break;
  507. }
  508. // TODO: Implement HTTP 1.1 conditional GET If-Modified-Since
  509. $ch = curl_init();
  510. if ($ch === false) {
  511. return '';
  512. }
  513. curl_setopt_array($ch, [
  514. CURLOPT_URL => $url,
  515. CURLOPT_HTTPHEADER => ['Accept: ' . $accept],
  516. CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
  517. CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
  518. CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
  519. CURLOPT_MAXREDIRS => 4,
  520. CURLOPT_RETURNTRANSFER => true,
  521. CURLOPT_FOLLOWLOCATION => true,
  522. CURLOPT_ENCODING => '', //Enable all encodings
  523. //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers
  524. ]);
  525. curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
  526. if (isset($attributes['curl_params']) && is_array($attributes['curl_params'])) {
  527. curl_setopt_array($ch, $attributes['curl_params']);
  528. }
  529. if (isset($attributes['ssl_verify'])) {
  530. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2);
  531. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']);
  532. if (empty($attributes['ssl_verify'])) {
  533. curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1');
  534. }
  535. }
  536. curl_setopt_array($ch, $curl_options);
  537. $body = curl_exec($ch);
  538. $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  539. $c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
  540. $c_error = curl_error($ch);
  541. curl_close($ch);
  542. if ($c_status != 200 || $c_error != '' || $body === false) {
  543. Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
  544. $body = '';
  545. // TODO: Implement HTTP 410 Gone
  546. } elseif (!is_string($body) || strlen($body) === 0) {
  547. $body = '';
  548. } else {
  549. $body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM
  550. if ($type !== 'json') {
  551. $body = enforceHttpEncoding($body, $c_content_type);
  552. }
  553. }
  554. if (file_put_contents($cachePath, $body) === false) {
  555. Minz_Log::warning("Error saving cache $cachePath for $url");
  556. }
  557. return $body;
  558. }
  559. /**
  560. * Validate an email address, supports internationalized addresses.
  561. *
  562. * @param string $email The address to validate
  563. * @return bool true if email is valid, else false
  564. */
  565. function validateEmailAddress(string $email): bool {
  566. $mailer = new PHPMailer\PHPMailer\PHPMailer();
  567. $mailer->CharSet = 'utf-8';
  568. $punyemail = $mailer->punyencodeAddress($email);
  569. return PHPMailer\PHPMailer\PHPMailer::validateAddress($punyemail, 'html5');
  570. }
  571. /**
  572. * Add support of image lazy loading
  573. * Move content from src attribute to data-original
  574. * @param string $content is the text we want to parse
  575. */
  576. function lazyimg(string $content): string {
  577. return preg_replace([
  578. '/<((?:img|iframe)[^>]+?)src="([^"]+)"([^>]*)>/i',
  579. "/<((?:img|iframe)[^>]+?)src='([^']+)'([^>]*)>/i",
  580. ], [
  581. '<$1src="' . Minz_Url::display('/themes/icons/grey.gif') . '" data-original="$2"$3>',
  582. "<$1src='" . Minz_Url::display('/themes/icons/grey.gif') . "' data-original='$2'$3>",
  583. ],
  584. $content
  585. ) ?? '';
  586. }
  587. /** @return numeric-string */
  588. function uTimeString(): string {
  589. $t = @gettimeofday();
  590. $sec = is_numeric($t['sec']) ? (int)$t['sec'] : 0;
  591. $usec = is_numeric($t['usec']) ? (int)$t['usec'] : 0;
  592. $result = ((string)$sec) . str_pad((string)$usec, 6, '0', STR_PAD_LEFT);
  593. return ctype_digit($result) ? $result : '0';
  594. }
  595. function invalidateHttpCache(string $username = ''): bool {
  596. if (!FreshRSS_user_Controller::checkUsername($username)) {
  597. Minz_Session::_param('touch', uTimeString());
  598. $username = Minz_User::name() ?? Minz_User::INTERNAL_USER;
  599. }
  600. return FreshRSS_UserDAO::ctouch($username);
  601. }
  602. /**
  603. * @return list<string>
  604. */
  605. function listUsers(): array {
  606. $final_list = [];
  607. $base_path = join_path(DATA_PATH, 'users');
  608. $dir_list = array_values(array_diff(
  609. scandir($base_path) ?: [],
  610. ['..', '.', Minz_User::INTERNAL_USER]
  611. ));
  612. foreach ($dir_list as $file) {
  613. if ($file[0] !== '.' && is_dir(join_path($base_path, $file)) && file_exists(join_path($base_path, $file, 'config.php'))) {
  614. $final_list[] = $file;
  615. }
  616. }
  617. return $final_list;
  618. }
  619. /**
  620. * Return if the maximum number of registrations has been reached.
  621. * Note a max_registrations of 0 means there is no limit.
  622. *
  623. * @return bool true if number of users >= max registrations, false else.
  624. */
  625. function max_registrations_reached(): bool {
  626. $limit_registrations = FreshRSS_Context::systemConf()->limits['max_registrations'];
  627. $number_accounts = count(listUsers());
  628. return $limit_registrations > 0 && $number_accounts >= $limit_registrations;
  629. }
  630. /**
  631. * Register and return the configuration for a given user.
  632. *
  633. * Note this function has been created to generate temporary configuration
  634. * objects. If you need a long-time configuration, please don't use this function.
  635. *
  636. * @param string $username the name of the user of which we want the configuration.
  637. * @return FreshRSS_UserConfiguration|null object, or null if the configuration cannot be loaded.
  638. * @throws Minz_ConfigurationNamespaceException
  639. */
  640. function get_user_configuration(string $username): ?FreshRSS_UserConfiguration {
  641. if (!FreshRSS_user_Controller::checkUsername($username)) {
  642. return null;
  643. }
  644. $namespace = 'user_' . $username;
  645. try {
  646. FreshRSS_UserConfiguration::register($namespace,
  647. USERS_PATH . '/' . $username . '/config.php',
  648. FRESHRSS_PATH . '/config-user.default.php');
  649. } catch (Minz_FileNotExistException $e) {
  650. Minz_Log::warning($e->getMessage(), ADMIN_LOG);
  651. return null;
  652. }
  653. $user_conf = FreshRSS_UserConfiguration::get($namespace);
  654. return $user_conf;
  655. }
  656. /**
  657. * Converts an IP (v4 or v6) to a binary representation using inet_pton
  658. *
  659. * @param string $ip the IP to convert
  660. * @return string a binary representation of the specified IP
  661. */
  662. function ipToBits(string $ip): string {
  663. $binaryip = '';
  664. foreach (str_split(inet_pton($ip) ?: '') as $char) {
  665. $binaryip .= str_pad(decbin(ord($char)), 8, '0', STR_PAD_LEFT);
  666. }
  667. return $binaryip;
  668. }
  669. /**
  670. * Check if an ip belongs to the provided range (in CIDR format)
  671. *
  672. * @param string $ip the IP that we want to verify (ex: 192.168.16.1)
  673. * @param string $range the range to check against (ex: 192.168.16.0/24)
  674. * @return bool true if the IP is in the range, otherwise false
  675. */
  676. function checkCIDR(string $ip, string $range): bool {
  677. $binary_ip = ipToBits($ip);
  678. $split = explode('/', $range);
  679. $subnet = $split[0] ?? '';
  680. if ($subnet == '') {
  681. return false;
  682. }
  683. $binary_subnet = ipToBits($subnet);
  684. $mask_bits = $split[1] ?? '';
  685. $mask_bits = (int)$mask_bits;
  686. if ($mask_bits === 0) {
  687. $mask_bits = null;
  688. }
  689. $ip_net_bits = substr($binary_ip, 0, $mask_bits);
  690. $subnet_bits = substr($binary_subnet, 0, $mask_bits);
  691. return $ip_net_bits === $subnet_bits;
  692. }
  693. /**
  694. * Use CONN_REMOTE_ADDR (if available, to be robust even when using Apache mod_remoteip) or REMOTE_ADDR environment variable to determine the connection IP.
  695. */
  696. function connectionRemoteAddress(): string {
  697. $remoteIp = is_string($_SERVER['CONN_REMOTE_ADDR'] ?? null) ? $_SERVER['CONN_REMOTE_ADDR'] : '';
  698. if ($remoteIp == '') {
  699. $remoteIp = is_string($_SERVER['REMOTE_ADDR'] ?? null) ? $_SERVER['REMOTE_ADDR'] : '';
  700. }
  701. if ($remoteIp == 0) {
  702. $remoteIp = '';
  703. }
  704. return $remoteIp;
  705. }
  706. /**
  707. * Check if the client (e.g. last proxy) is allowed to send unsafe headers.
  708. * This uses the `TRUSTED_PROXY` environment variable or the `trusted_sources` configuration option to get an array of the authorized ranges,
  709. * The connection IP is obtained from the `CONN_REMOTE_ADDR` (if available, to be robust even when using Apache mod_remoteip) or `REMOTE_ADDR` environment variables.
  710. * @return bool true if the sender’s IP is in one of the ranges defined in the configuration, else false
  711. */
  712. function checkTrustedIP(): bool {
  713. if (!FreshRSS_Context::hasSystemConf()) {
  714. return false;
  715. }
  716. $remoteIp = connectionRemoteAddress();
  717. if ($remoteIp === '') {
  718. return false;
  719. }
  720. $trusted = getenv('TRUSTED_PROXY');
  721. if ($trusted != 0 && is_string($trusted)) {
  722. $trusted = preg_split('/\s+/', $trusted, -1, PREG_SPLIT_NO_EMPTY);
  723. }
  724. if (!is_array($trusted) || empty($trusted)) {
  725. $trusted = FreshRSS_Context::systemConf()->trusted_sources;
  726. }
  727. foreach ($trusted as $cidr) {
  728. if (checkCIDR($remoteIp, $cidr)) {
  729. return true;
  730. }
  731. }
  732. return false;
  733. }
  734. function httpAuthUser(bool $onlyTrusted = true): string {
  735. if (!empty($_SERVER['REMOTE_USER']) && is_string($_SERVER['REMOTE_USER'])) {
  736. return $_SERVER['REMOTE_USER'];
  737. }
  738. if (!empty($_SERVER['REDIRECT_REMOTE_USER']) && is_string($_SERVER['REDIRECT_REMOTE_USER'])) {
  739. return $_SERVER['REDIRECT_REMOTE_USER'];
  740. }
  741. if (!$onlyTrusted || checkTrustedIP()) {
  742. if (!empty($_SERVER['HTTP_REMOTE_USER']) && is_string($_SERVER['HTTP_REMOTE_USER'])) {
  743. return $_SERVER['HTTP_REMOTE_USER'];
  744. }
  745. if (!empty($_SERVER['HTTP_X_WEBAUTH_USER']) && is_string($_SERVER['HTTP_X_WEBAUTH_USER'])) {
  746. return $_SERVER['HTTP_X_WEBAUTH_USER'];
  747. }
  748. }
  749. return '';
  750. }
  751. function cryptAvailable(): bool {
  752. $hash = '$2y$04$usesomesillystringfore7hnbRJHxXVLeakoG8K30oukPsA.ztMG';
  753. return $hash === @crypt('password', $hash);
  754. }
  755. /**
  756. * Check PHP and its extensions are well-installed.
  757. *
  758. * @return array<string,bool> of tested values.
  759. */
  760. function check_install_php(): array {
  761. $pdo_mysql = extension_loaded('pdo_mysql');
  762. $pdo_pgsql = extension_loaded('pdo_pgsql');
  763. $pdo_sqlite = extension_loaded('pdo_sqlite');
  764. return [
  765. 'php' => version_compare(PHP_VERSION, FRESHRSS_MIN_PHP_VERSION) >= 0,
  766. 'curl' => extension_loaded('curl'),
  767. 'pdo' => $pdo_mysql || $pdo_sqlite || $pdo_pgsql,
  768. 'pcre' => extension_loaded('pcre'),
  769. 'ctype' => extension_loaded('ctype'),
  770. 'fileinfo' => extension_loaded('fileinfo'),
  771. 'dom' => class_exists('DOMDocument'),
  772. 'json' => extension_loaded('json'),
  773. 'mbstring' => extension_loaded('mbstring'),
  774. 'zip' => extension_loaded('zip'),
  775. ];
  776. }
  777. /**
  778. * Check different data files and directories exist.
  779. * @return array<string,bool> of tested values.
  780. */
  781. function check_install_files(): array {
  782. return [
  783. 'data' => is_dir(DATA_PATH) && touch(DATA_PATH . '/index.html'), // is_writable() is not reliable for a folder on NFS
  784. 'cache' => is_dir(CACHE_PATH) && touch(CACHE_PATH . '/index.html'),
  785. 'users' => is_dir(USERS_PATH) && touch(USERS_PATH . '/index.html'),
  786. 'favicons' => is_dir(DATA_PATH) && touch(DATA_PATH . '/favicons/index.html'),
  787. 'tokens' => is_dir(DATA_PATH) && touch(DATA_PATH . '/tokens/index.html'),
  788. ];
  789. }
  790. /**
  791. * Check database is well-installed.
  792. *
  793. * @return array<string,bool> of tested values.
  794. */
  795. function check_install_database(): array {
  796. $status = [
  797. 'connection' => true,
  798. 'tables' => false,
  799. 'categories' => false,
  800. 'feeds' => false,
  801. 'entries' => false,
  802. 'entrytmp' => false,
  803. 'tag' => false,
  804. 'entrytag' => false,
  805. ];
  806. try {
  807. $dbDAO = FreshRSS_Factory::createDatabaseDAO();
  808. $status['tables'] = $dbDAO->tablesAreCorrect();
  809. $status['categories'] = $dbDAO->categoryIsCorrect();
  810. $status['feeds'] = $dbDAO->feedIsCorrect();
  811. $status['entries'] = $dbDAO->entryIsCorrect();
  812. $status['entrytmp'] = $dbDAO->entrytmpIsCorrect();
  813. $status['tag'] = $dbDAO->tagIsCorrect();
  814. $status['entrytag'] = $dbDAO->entrytagIsCorrect();
  815. } catch (Minz_PDOConnectionException $e) {
  816. $status['connection'] = false;
  817. }
  818. return $status;
  819. }
  820. /**
  821. * Remove a directory recursively.
  822. * From http://php.net/rmdir#110489
  823. */
  824. function recursive_unlink(string $dir): bool {
  825. if (!is_dir($dir)) {
  826. return true;
  827. }
  828. $files = array_diff(scandir($dir) ?: [], ['.', '..']);
  829. foreach ($files as $filename) {
  830. $filename = $dir . '/' . $filename;
  831. if (is_dir($filename)) {
  832. @chmod($filename, 0777);
  833. recursive_unlink($filename);
  834. } else {
  835. unlink($filename);
  836. }
  837. }
  838. return rmdir($dir);
  839. }
  840. /**
  841. * Remove queries where $get is appearing.
  842. * @param string $get the get attribute which should be removed.
  843. * @param array<int,array{get?:string,name?:string,order?:string,search?:string,state?:int,url?:string}> $queries an array of queries.
  844. * @return array<int,array{get?:string,name?:string,order?:string,search?:string,state?:int,url?:string}> without queries where $get is appearing.
  845. */
  846. function remove_query_by_get(string $get, array $queries): array {
  847. $final_queries = [];
  848. foreach ($queries as $query) {
  849. if (empty($query['get']) || $query['get'] !== $get) {
  850. $final_queries[] = $query;
  851. }
  852. }
  853. return $final_queries;
  854. }
  855. function _i(string $icon, int $type = FreshRSS_Themes::ICON_DEFAULT): string {
  856. return FreshRSS_Themes::icon($icon, $type);
  857. }
  858. const SHORTCUT_KEYS = [
  859. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  860. 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  861. 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  862. 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12',
  863. 'ArrowDown', 'ArrowLeft', 'ArrowRight', 'ArrowUp', 'Backspace', 'Delete',
  864. 'End', 'Enter', 'Escape', 'Home', 'Insert', 'PageDown', 'PageUp', 'Space', 'Tab',
  865. ];
  866. /**
  867. * @param array<string> $shortcuts
  868. * @return list<string>
  869. */
  870. function getNonStandardShortcuts(array $shortcuts): array {
  871. $standard = strtolower(implode(' ', SHORTCUT_KEYS));
  872. $nonStandard = array_filter($shortcuts, static function (string $shortcut) use ($standard) {
  873. $shortcut = trim($shortcut);
  874. return $shortcut !== '' && stripos($standard, $shortcut) === false;
  875. });
  876. return array_values($nonStandard);
  877. }
  878. function errorMessageInfo(string $errorTitle, string $error = ''): string {
  879. $errorTitle = htmlspecialchars($errorTitle, ENT_NOQUOTES, 'UTF-8');
  880. $message = '';
  881. $details = '';
  882. $error = trim($error);
  883. // Prevent empty tags by checking if error is not empty first
  884. if ($error !== '') {
  885. $error = htmlspecialchars($error, ENT_NOQUOTES, 'UTF-8') . "\n";
  886. // First line is the main message, other lines are the details
  887. list($message, $details) = explode("\n", $error, 2);
  888. $message = "<h2>{$message}</h2>";
  889. $details = "<pre>{$details}</pre>";
  890. }
  891. header("Content-Security-Policy: default-src 'self'");
  892. return <<<MSG
  893. <!DOCTYPE html><html><header><title>HTTP 500: {$errorTitle}</title></header><body>
  894. <h1>HTTP 500: {$errorTitle}</h1>
  895. {$message}
  896. {$details}
  897. <hr />
  898. <small>For help see the documentation: <a href="https://freshrss.github.io/FreshRSS/en/admins/logs_and_errors.html" target="_blank">
  899. https://freshrss.github.io/FreshRSS/en/admins/logs_and_errors.html</a></small>
  900. </body></html>
  901. MSG;
  902. }