Search.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810
  1. <?php
  2. declare(strict_types=1);
  3. require_once(LIB_PATH . '/lib_date.php');
  4. /**
  5. * Contains a search from the search form.
  6. *
  7. * It allows to extract meaningful bits of the search and store them in a
  8. * convenient object
  9. */
  10. class FreshRSS_Search implements \Stringable {
  11. /**
  12. * This contains the user input string
  13. */
  14. private string $raw_input = '';
  15. // The following properties are extracted from the raw input
  16. /** @var array<string>|null */
  17. private ?array $entry_ids = null;
  18. /** @var array<int>|null */
  19. private ?array $feed_ids = null;
  20. /** @var array<int>|'*'|null */
  21. private $label_ids = null;
  22. /** @var array<string>|null */
  23. private ?array $label_names = null;
  24. /** @var array<string>|null */
  25. private ?array $intitle = null;
  26. /** @var array<string>|null */
  27. private ?array $intitle_regex = null;
  28. /** @var int|false|null */
  29. private $min_date = null;
  30. /** @var int|false|null */
  31. private $max_date = null;
  32. /** @var int|false|null */
  33. private $min_pubdate = null;
  34. /** @var int|false|null */
  35. private $max_pubdate = null;
  36. /** @var array<string>|null */
  37. private ?array $inurl = null;
  38. /** @var array<string>|null */
  39. private ?array $inurl_regex = null;
  40. /** @var array<string>|null */
  41. private ?array $author = null;
  42. /** @var array<string>|null */
  43. private ?array $author_regex = null;
  44. /** @var array<string>|null */
  45. private ?array $tags = null;
  46. /** @var array<string>|null */
  47. private ?array $tags_regex = null;
  48. /** @var array<string>|null */
  49. private ?array $search = null;
  50. /** @var array<string>|null */
  51. private ?array $search_regex = null;
  52. /** @var array<string>|null */
  53. private ?array $not_entry_ids = null;
  54. /** @var array<int>|null */
  55. private ?array $not_feed_ids = null;
  56. /** @var array<int>|'*'|null */
  57. private $not_label_ids = null;
  58. /** @var array<string>|null */
  59. private ?array $not_label_names = null;
  60. /** @var array<string>|null */
  61. private ?array $not_intitle = null;
  62. /** @var array<string>|null */
  63. private ?array $not_intitle_regex = null;
  64. /** @var int|false|null */
  65. private $not_min_date = null;
  66. /** @var int|false|null */
  67. private $not_max_date = null;
  68. /** @var int|false|null */
  69. private $not_min_pubdate = null;
  70. /** @var int|false|null */
  71. private $not_max_pubdate = null;
  72. /** @var array<string>|null */
  73. private ?array $not_inurl = null;
  74. /** @var array<string>|null */
  75. private ?array $not_inurl_regex = null;
  76. /** @var array<string>|null */
  77. private ?array $not_author = null;
  78. /** @var array<string>|null */
  79. private ?array $not_author_regex = null;
  80. /** @var array<string>|null */
  81. private ?array $not_tags = null;
  82. /** @var array<string>|null */
  83. private ?array $not_tags_regex = null;
  84. /** @var array<string>|null */
  85. private ?array $not_search = null;
  86. /** @var array<string>|null */
  87. private ?array $not_search_regex = null;
  88. public function __construct(string $input) {
  89. $input = self::cleanSearch($input);
  90. $input = self::unescape($input);
  91. $input = FreshRSS_BooleanSearch::unescapeLiteralParentheses($input);
  92. $this->raw_input = $input;
  93. $input = $this->parseNotEntryIds($input);
  94. $input = $this->parseNotFeedIds($input);
  95. $input = $this->parseNotLabelIds($input);
  96. $input = $this->parseNotLabelNames($input);
  97. $input = $this->parseNotPubdateSearch($input);
  98. $input = $this->parseNotDateSearch($input);
  99. $input = $this->parseNotIntitleSearch($input);
  100. $input = $this->parseNotAuthorSearch($input);
  101. $input = $this->parseNotInurlSearch($input);
  102. $input = $this->parseNotTagsSearch($input);
  103. $input = $this->parseEntryIds($input);
  104. $input = $this->parseFeedIds($input);
  105. $input = $this->parseLabelIds($input);
  106. $input = $this->parseLabelNames($input);
  107. $input = $this->parsePubdateSearch($input);
  108. $input = $this->parseDateSearch($input);
  109. $input = $this->parseIntitleSearch($input);
  110. $input = $this->parseAuthorSearch($input);
  111. $input = $this->parseInurlSearch($input);
  112. $input = $this->parseTagsSearch($input);
  113. $input = $this->parseQuotedSearch($input);
  114. $input = $this->parseNotSearch($input);
  115. $this->parseSearch($input);
  116. }
  117. #[\Override]
  118. public function __toString(): string {
  119. return $this->getRawInput();
  120. }
  121. public function getRawInput(): string {
  122. return $this->raw_input;
  123. }
  124. /** @return array<string>|null */
  125. public function getEntryIds(): ?array {
  126. return $this->entry_ids;
  127. }
  128. /** @return array<string>|null */
  129. public function getNotEntryIds(): ?array {
  130. return $this->not_entry_ids;
  131. }
  132. /** @return array<int>|null */
  133. public function getFeedIds(): ?array {
  134. return $this->feed_ids;
  135. }
  136. /** @return array<int>|null */
  137. public function getNotFeedIds(): ?array {
  138. return $this->not_feed_ids;
  139. }
  140. /** @return array<int>|'*'|null */
  141. public function getLabelIds(): array|string|null {
  142. return $this->label_ids;
  143. }
  144. /** @return array<int>|'*'|null */
  145. public function getNotLabelIds(): array|string|null {
  146. return $this->not_label_ids;
  147. }
  148. /** @return array<string>|null */
  149. public function getLabelNames(): ?array {
  150. return $this->label_names;
  151. }
  152. /** @return array<string>|null */
  153. public function getNotLabelNames(): ?array {
  154. return $this->not_label_names;
  155. }
  156. /** @return array<string>|null */
  157. public function getIntitle(): ?array {
  158. return $this->intitle;
  159. }
  160. /** @return array<string>|null */
  161. public function getIntitleRegex(): ?array {
  162. return $this->intitle_regex;
  163. }
  164. /** @return array<string>|null */
  165. public function getNotIntitle(): ?array {
  166. return $this->not_intitle;
  167. }
  168. /** @return array<string>|null */
  169. public function getNotIntitleRegex(): ?array {
  170. return $this->not_intitle_regex;
  171. }
  172. public function getMinDate(): ?int {
  173. return $this->min_date ?: null;
  174. }
  175. public function getNotMinDate(): ?int {
  176. return $this->not_min_date ?: null;
  177. }
  178. public function setMinDate(int $value): void {
  179. $this->min_date = $value;
  180. }
  181. public function getMaxDate(): ?int {
  182. return $this->max_date ?: null;
  183. }
  184. public function getNotMaxDate(): ?int {
  185. return $this->not_max_date ?: null;
  186. }
  187. public function setMaxDate(int $value): void {
  188. $this->max_date = $value;
  189. }
  190. public function getMinPubdate(): ?int {
  191. return $this->min_pubdate ?: null;
  192. }
  193. public function getNotMinPubdate(): ?int {
  194. return $this->not_min_pubdate ?: null;
  195. }
  196. public function getMaxPubdate(): ?int {
  197. return $this->max_pubdate ?: null;
  198. }
  199. public function getNotMaxPubdate(): ?int {
  200. return $this->not_max_pubdate ?: null;
  201. }
  202. /** @return array<string>|null */
  203. public function getInurl(): ?array {
  204. return $this->inurl;
  205. }
  206. /** @return array<string>|null */
  207. public function getInurlRegex(): ?array {
  208. return $this->inurl_regex;
  209. }
  210. /** @return array<string>|null */
  211. public function getNotInurl(): ?array {
  212. return $this->not_inurl;
  213. }
  214. /** @return array<string>|null */
  215. public function getNotInurlRegex(): ?array {
  216. return $this->not_inurl_regex;
  217. }
  218. /** @return array<string>|null */
  219. public function getAuthor(): ?array {
  220. return $this->author;
  221. }
  222. /** @return array<string>|null */
  223. public function getAuthorRegex(): ?array {
  224. return $this->author_regex;
  225. }
  226. /** @return array<string>|null */
  227. public function getNotAuthor(): ?array {
  228. return $this->not_author;
  229. }
  230. /** @return array<string>|null */
  231. public function getNotAuthorRegex(): ?array {
  232. return $this->not_author_regex;
  233. }
  234. /** @return array<string>|null */
  235. public function getTags(): ?array {
  236. return $this->tags;
  237. }
  238. /** @return array<string>|null */
  239. public function getTagsRegex(): ?array {
  240. return $this->tags_regex;
  241. }
  242. /** @return array<string>|null */
  243. public function getNotTags(): ?array {
  244. return $this->not_tags;
  245. }
  246. /** @return array<string>|null */
  247. public function getNotTagsRegex(): ?array {
  248. return $this->not_tags_regex;
  249. }
  250. /** @return array<string>|null */
  251. public function getSearch(): ?array {
  252. return $this->search;
  253. }
  254. /** @return array<string>|null */
  255. public function getSearchRegex(): ?array {
  256. return $this->search_regex;
  257. }
  258. /** @return array<string>|null */
  259. public function getNotSearch(): ?array {
  260. return $this->not_search;
  261. }
  262. /** @return array<string>|null */
  263. public function getNotSearchRegex(): ?array {
  264. return $this->not_search_regex;
  265. }
  266. /**
  267. * @param array<string>|null $anArray
  268. * @return array<string>
  269. */
  270. private static function removeEmptyValues(?array $anArray): array {
  271. return empty($anArray) ? [] : array_filter($anArray, static fn(string $value) => $value !== '');
  272. }
  273. /**
  274. * @param array<string>|string $value
  275. * @return ($value is array ? array<string> : string)
  276. */
  277. private static function decodeSpaces($value): array|string {
  278. if (is_array($value)) {
  279. for ($i = count($value) - 1; $i >= 0; $i--) {
  280. $value[$i] = self::decodeSpaces($value[$i]);
  281. }
  282. } else {
  283. $value = trim(str_replace('+', ' ', $value));
  284. }
  285. return $value;
  286. }
  287. /**
  288. * @param array<string> $strings
  289. * @return array<string>
  290. */
  291. private static function htmlspecialchars_decodes(array $strings): array {
  292. return array_map(static fn(string $s) => htmlspecialchars_decode($s, ENT_QUOTES), $strings);
  293. }
  294. /**
  295. * Parse the search string to find entry (article) IDs.
  296. */
  297. private function parseEntryIds(string $input): string {
  298. if (preg_match_all('/\\be:(?P<search>[0-9,]*)/', $input, $matches)) {
  299. $input = str_replace($matches[0], '', $input);
  300. $ids_lists = $matches['search'];
  301. $this->entry_ids = [];
  302. foreach ($ids_lists as $ids_list) {
  303. $entry_ids = explode(',', $ids_list);
  304. $entry_ids = self::removeEmptyValues($entry_ids);
  305. if (!empty($entry_ids)) {
  306. $this->entry_ids = array_merge($this->entry_ids, $entry_ids);
  307. }
  308. }
  309. }
  310. return $input;
  311. }
  312. private function parseNotEntryIds(string $input): string {
  313. if (preg_match_all('/(?<=[\\s(]|^)[!-]e:(?P<search>[0-9,]*)/', $input, $matches)) {
  314. $input = str_replace($matches[0], '', $input);
  315. $ids_lists = $matches['search'];
  316. $this->not_entry_ids = [];
  317. foreach ($ids_lists as $ids_list) {
  318. $entry_ids = explode(',', $ids_list);
  319. $entry_ids = self::removeEmptyValues($entry_ids);
  320. if (!empty($entry_ids)) {
  321. $this->not_entry_ids = array_merge($this->not_entry_ids, $entry_ids);
  322. }
  323. }
  324. }
  325. return $input;
  326. }
  327. private function parseFeedIds(string $input): string {
  328. if (preg_match_all('/\\bf:(?P<search>[0-9,]*)/', $input, $matches)) {
  329. $input = str_replace($matches[0], '', $input);
  330. $ids_lists = $matches['search'];
  331. $this->feed_ids = [];
  332. foreach ($ids_lists as $ids_list) {
  333. $feed_ids = explode(',', $ids_list);
  334. $feed_ids = self::removeEmptyValues($feed_ids);
  335. /** @var array<int> $feed_ids */
  336. $feed_ids = array_map('intval', $feed_ids);
  337. if (!empty($feed_ids)) {
  338. $this->feed_ids = array_merge($this->feed_ids, $feed_ids);
  339. }
  340. }
  341. }
  342. return $input;
  343. }
  344. private function parseNotFeedIds(string $input): string {
  345. if (preg_match_all('/(?<=[\\s(]|^)[!-]f:(?P<search>[0-9,]*)/', $input, $matches)) {
  346. $input = str_replace($matches[0], '', $input);
  347. $ids_lists = $matches['search'];
  348. $this->not_feed_ids = [];
  349. foreach ($ids_lists as $ids_list) {
  350. $feed_ids = explode(',', $ids_list);
  351. $feed_ids = self::removeEmptyValues($feed_ids);
  352. /** @var array<int> $feed_ids */
  353. $feed_ids = array_map('intval', $feed_ids);
  354. if (!empty($feed_ids)) {
  355. $this->not_feed_ids = array_merge($this->not_feed_ids, $feed_ids);
  356. }
  357. }
  358. }
  359. return $input;
  360. }
  361. /**
  362. * Parse the search string to find tags (labels) IDs.
  363. */
  364. private function parseLabelIds(string $input): string {
  365. if (preg_match_all('/\\b[lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  366. $input = str_replace($matches[0], '', $input);
  367. $ids_lists = $matches['search'];
  368. $this->label_ids = [];
  369. foreach ($ids_lists as $ids_list) {
  370. if ($ids_list === '*') {
  371. $this->label_ids = '*';
  372. break;
  373. }
  374. $label_ids = explode(',', $ids_list);
  375. $label_ids = self::removeEmptyValues($label_ids);
  376. /** @var array<int> $label_ids */
  377. $label_ids = array_map('intval', $label_ids);
  378. if (!empty($label_ids)) {
  379. $this->label_ids = array_merge($this->label_ids, $label_ids);
  380. }
  381. }
  382. }
  383. return $input;
  384. }
  385. private function parseNotLabelIds(string $input): string {
  386. if (preg_match_all('/(?<=[\\s(]|^)[!-][lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  387. $input = str_replace($matches[0], '', $input);
  388. $ids_lists = $matches['search'];
  389. $this->not_label_ids = [];
  390. foreach ($ids_lists as $ids_list) {
  391. if ($ids_list === '*') {
  392. $this->not_label_ids = '*';
  393. break;
  394. }
  395. $label_ids = explode(',', $ids_list);
  396. $label_ids = self::removeEmptyValues($label_ids);
  397. /** @var array<int> $label_ids */
  398. $label_ids = array_map('intval', $label_ids);
  399. if (!empty($label_ids)) {
  400. $this->not_label_ids = array_merge($this->not_label_ids, $label_ids);
  401. }
  402. }
  403. }
  404. return $input;
  405. }
  406. /**
  407. * Parse the search string to find tags (labels) names.
  408. */
  409. private function parseLabelNames(string $input): string {
  410. $names_lists = [];
  411. if (preg_match_all('/\\blabels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  412. $names_lists = $matches['search'];
  413. $input = str_replace($matches[0], '', $input);
  414. }
  415. if (preg_match_all('/\\blabels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  416. $names_lists = array_merge($names_lists, $matches['search']);
  417. $input = str_replace($matches[0], '', $input);
  418. }
  419. if (!empty($names_lists)) {
  420. $this->label_names = [];
  421. foreach ($names_lists as $names_list) {
  422. $names_array = explode(',', $names_list);
  423. $names_array = self::removeEmptyValues($names_array);
  424. if (!empty($names_array)) {
  425. $this->label_names = array_merge($this->label_names, $names_array);
  426. }
  427. }
  428. }
  429. return $input;
  430. }
  431. /**
  432. * Parse the search string to find tags (labels) names to exclude.
  433. */
  434. private function parseNotLabelNames(string $input): string {
  435. $names_lists = [];
  436. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  437. $names_lists = $matches['search'];
  438. $input = str_replace($matches[0], '', $input);
  439. }
  440. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<search>[^\\s"]*)/', $input, $matches)) {
  441. $names_lists = array_merge($names_lists, $matches['search']);
  442. $input = str_replace($matches[0], '', $input);
  443. }
  444. if (!empty($names_lists)) {
  445. $this->not_label_names = [];
  446. foreach ($names_lists as $names_list) {
  447. $names_array = explode(',', $names_list);
  448. $names_array = self::removeEmptyValues($names_array);
  449. if (!empty($names_array)) {
  450. $this->not_label_names = array_merge($this->not_label_names, $names_array);
  451. }
  452. }
  453. }
  454. return $input;
  455. }
  456. /**
  457. * Parse the search string to find intitle keyword and the search related to it.
  458. * The search is the first word following the keyword.
  459. */
  460. private function parseIntitleSearch(string $input): string {
  461. if (preg_match_all('#\\bintitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  462. $this->intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  463. $input = str_replace($matches[0], '', $input);
  464. }
  465. if (preg_match_all('/\\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  466. $this->intitle = $matches['search'];
  467. $input = str_replace($matches[0], '', $input);
  468. }
  469. if (preg_match_all('/\\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  470. $this->intitle = array_merge($this->intitle ?: [], $matches['search']);
  471. $input = str_replace($matches[0], '', $input);
  472. }
  473. $this->intitle = self::removeEmptyValues($this->intitle);
  474. if (empty($this->intitle)) {
  475. $this->intitle = null;
  476. }
  477. return $input;
  478. }
  479. private function parseNotIntitleSearch(string $input): string {
  480. if (preg_match_all('#(?<=[\\s(]|^)[!-]intitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  481. $this->not_intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  482. $input = str_replace($matches[0], '', $input);
  483. }
  484. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  485. $this->not_intitle = $matches['search'];
  486. $input = str_replace($matches[0], '', $input);
  487. }
  488. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  489. $this->not_intitle = array_merge($this->not_intitle ?: [], $matches['search']);
  490. $input = str_replace($matches[0], '', $input);
  491. }
  492. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  493. if (empty($this->not_intitle)) {
  494. $this->not_intitle = null;
  495. }
  496. return $input;
  497. }
  498. /**
  499. * Parse the search string to find author keyword and the search related to it.
  500. * The search is the first word following the keyword except when using
  501. * a delimiter. Supported delimiters are single quote (') and double quotes (").
  502. */
  503. private function parseAuthorSearch(string $input): string {
  504. if (preg_match_all('#\\bauthor:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  505. $this->author_regex = self::htmlspecialchars_decodes($matches['search']);
  506. $input = str_replace($matches[0], '', $input);
  507. }
  508. if (preg_match_all('/\\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  509. $this->author = $matches['search'];
  510. $input = str_replace($matches[0], '', $input);
  511. }
  512. if (preg_match_all('/\\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  513. $this->author = array_merge($this->author ?: [], $matches['search']);
  514. $input = str_replace($matches[0], '', $input);
  515. }
  516. $this->author = self::removeEmptyValues($this->author);
  517. if (empty($this->author)) {
  518. $this->author = null;
  519. }
  520. return $input;
  521. }
  522. private function parseNotAuthorSearch(string $input): string {
  523. if (preg_match_all('#(?<=[\\s(]|^)[!-]author:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  524. $this->not_author_regex = self::htmlspecialchars_decodes($matches['search']);
  525. $input = str_replace($matches[0], '', $input);
  526. }
  527. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  528. $this->not_author = $matches['search'];
  529. $input = str_replace($matches[0], '', $input);
  530. }
  531. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  532. $this->not_author = array_merge($this->not_author ?: [], $matches['search']);
  533. $input = str_replace($matches[0], '', $input);
  534. }
  535. $this->not_author = self::removeEmptyValues($this->not_author);
  536. if (empty($this->not_author)) {
  537. $this->not_author = null;
  538. }
  539. return $input;
  540. }
  541. /**
  542. * Parse the search string to find inurl keyword and the search related to it.
  543. * The search is the first word following the keyword.
  544. */
  545. private function parseInurlSearch(string $input): string {
  546. if (preg_match_all('#\\binurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  547. $this->inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  548. $input = str_replace($matches[0], '', $input);
  549. }
  550. if (preg_match_all('/\\binurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  551. $this->inurl = $matches['search'];
  552. $input = str_replace($matches[0], '', $input);
  553. }
  554. if (preg_match_all('/\\binurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  555. $this->inurl = $matches['search'];
  556. $input = str_replace($matches[0], '', $input);
  557. }
  558. $this->inurl = self::removeEmptyValues($this->inurl);
  559. if (empty($this->inurl)) {
  560. $this->inurl = null;
  561. }
  562. return $input;
  563. }
  564. private function parseNotInurlSearch(string $input): string {
  565. if (preg_match_all('#(?<=[\\s(]|^)[!-]inurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  566. $this->not_inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  567. $input = str_replace($matches[0], '', $input);
  568. }
  569. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  570. $this->not_inurl = $matches['search'];
  571. $input = str_replace($matches[0], '', $input);
  572. }
  573. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  574. $this->not_inurl = $matches['search'];
  575. $input = str_replace($matches[0], '', $input);
  576. }
  577. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  578. if (empty($this->not_inurl)) {
  579. $this->not_inurl = null;
  580. }
  581. return $input;
  582. }
  583. /**
  584. * Parse the search string to find date keyword and the search related to it.
  585. * The search is the first word following the keyword.
  586. */
  587. private function parseDateSearch(string $input): string {
  588. if (preg_match_all('/\\bdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  589. $input = str_replace($matches[0], '', $input);
  590. $dates = self::removeEmptyValues($matches['search']);
  591. if (!empty($dates[0])) {
  592. [$this->min_date, $this->max_date] = parseDateInterval($dates[0]);
  593. }
  594. }
  595. return $input;
  596. }
  597. private function parseNotDateSearch(string $input): string {
  598. if (preg_match_all('/(?<=[\\s(]|^)[!-]date:(?P<search>[^\\s]*)/', $input, $matches)) {
  599. $input = str_replace($matches[0], '', $input);
  600. $dates = self::removeEmptyValues($matches['search']);
  601. if (!empty($dates[0])) {
  602. [$this->not_min_date, $this->not_max_date] = parseDateInterval($dates[0]);
  603. }
  604. }
  605. return $input;
  606. }
  607. /**
  608. * Parse the search string to find pubdate keyword and the search related to it.
  609. * The search is the first word following the keyword.
  610. */
  611. private function parsePubdateSearch(string $input): string {
  612. if (preg_match_all('/\\bpubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  613. $input = str_replace($matches[0], '', $input);
  614. $dates = self::removeEmptyValues($matches['search']);
  615. if (!empty($dates[0])) {
  616. [$this->min_pubdate, $this->max_pubdate] = parseDateInterval($dates[0]);
  617. }
  618. }
  619. return $input;
  620. }
  621. private function parseNotPubdateSearch(string $input): string {
  622. if (preg_match_all('/(?<=[\\s(]|^)[!-]pubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  623. $input = str_replace($matches[0], '', $input);
  624. $dates = self::removeEmptyValues($matches['search']);
  625. if (!empty($dates[0])) {
  626. [$this->not_min_pubdate, $this->not_max_pubdate] = parseDateInterval($dates[0]);
  627. }
  628. }
  629. return $input;
  630. }
  631. /**
  632. * Parse the search string to find tags keyword (# followed by a word)
  633. * and the search related to it.
  634. * The search is the first word following the #.
  635. */
  636. private function parseTagsSearch(string $input): string {
  637. if (preg_match_all('%#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  638. $this->tags_regex = self::htmlspecialchars_decodes($matches['search']);
  639. $input = str_replace($matches[0], '', $input);
  640. }
  641. if (preg_match_all('/#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  642. $this->tags = $matches['search'];
  643. $input = str_replace($matches[0], '', $input);
  644. }
  645. if (preg_match_all('/#(?P<search>[^\\s]+)/', $input, $matches)) {
  646. $this->tags = $matches['search'];
  647. $input = str_replace($matches[0], '', $input);
  648. }
  649. $this->tags = self::removeEmptyValues($this->tags);
  650. if (empty($this->tags)) {
  651. $this->tags = null;
  652. } else {
  653. $this->tags = self::decodeSpaces($this->tags);
  654. }
  655. return $input;
  656. }
  657. private function parseNotTagsSearch(string $input): string {
  658. if (preg_match_all('%(?<=[\\s(]|^)[!-]#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  659. $this->not_tags_regex = self::htmlspecialchars_decodes($matches['search']);
  660. $input = str_replace($matches[0], '', $input);
  661. }
  662. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  663. $this->not_tags = $matches['search'];
  664. $input = str_replace($matches[0], '', $input);
  665. }
  666. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<search>[^\\s]+)/', $input, $matches)) {
  667. $this->not_tags = $matches['search'];
  668. $input = str_replace($matches[0], '', $input);
  669. }
  670. $this->not_tags = self::removeEmptyValues($this->not_tags);
  671. if (empty($this->not_tags)) {
  672. $this->not_tags = null;
  673. } else {
  674. $this->not_tags = self::decodeSpaces($this->not_tags);
  675. }
  676. return $input;
  677. }
  678. /**
  679. * Parse the search string to find search values.
  680. * Every word is a distinct search value using a delimiter.
  681. * Supported delimiters are single quote (') and double quotes (") and regex (/).
  682. */
  683. private function parseQuotedSearch(string $input): string {
  684. $input = self::cleanSearch($input);
  685. if ($input === '') {
  686. return '';
  687. }
  688. if (preg_match_all('#(?<=[\\s(]|^)(?<![!-\\\\])(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  689. $this->search_regex = self::htmlspecialchars_decodes($matches['search']);
  690. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  691. $input = str_replace($matches[0], '', $input);
  692. }
  693. if (preg_match_all('/(?<=[\\s(]|^)(?<![!-\\\\])(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  694. $this->search = $matches['search'];
  695. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  696. $input = str_replace($matches[0], '', $input);
  697. }
  698. return $input;
  699. }
  700. /**
  701. * Parse the search string to find search values.
  702. * Every word is a distinct search value.
  703. */
  704. private function parseSearch(string $input): string {
  705. $input = self::cleanSearch($input);
  706. if ($input === '') {
  707. return '';
  708. }
  709. if (is_array($this->search)) {
  710. $this->search = array_merge($this->search, explode(' ', $input));
  711. } else {
  712. $this->search = explode(' ', $input);
  713. }
  714. return $input;
  715. }
  716. private function parseNotSearch(string $input): string {
  717. $input = self::cleanSearch($input);
  718. if ($input === '') {
  719. return '';
  720. }
  721. if (preg_match_all('#(?<=[\\s(]|^)[!-](?P<search>(?<!\\\\)/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  722. $this->not_search_regex = self::htmlspecialchars_decodes($matches['search']);
  723. $input = str_replace($matches[0], '', $input);
  724. }
  725. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  726. $this->not_search = $matches['search'];
  727. $input = str_replace($matches[0], '', $input);
  728. }
  729. $input = self::cleanSearch($input);
  730. if ($input === '') {
  731. return '';
  732. }
  733. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<search>[^\\s]+)/', $input, $matches)) {
  734. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : [], $matches['search']);
  735. $input = str_replace($matches[0], '', $input);
  736. }
  737. $this->not_search = self::removeEmptyValues($this->not_search);
  738. return $input;
  739. }
  740. /**
  741. * Remove all unnecessary spaces in the search
  742. */
  743. private static function cleanSearch(string $input): string {
  744. $input = preg_replace('/\\s+/', ' ', $input);
  745. if (!is_string($input)) {
  746. return '';
  747. }
  748. return trim($input);
  749. }
  750. /** Remove escaping backslashes for parenthesis logic */
  751. private static function unescape(string $input): string {
  752. return str_replace(['\\(', '\\)'], ['(', ')'], $input);
  753. }
  754. }