Search.php 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984
  1. <?php
  2. declare(strict_types=1);
  3. require_once LIB_PATH . '/lib_date.php';
  4. /**
  5. * Contains a search from the search form.
  6. *
  7. * It allows to extract meaningful bits of the search and store them in a
  8. * convenient object
  9. */
  10. class FreshRSS_Search implements \Stringable {
  11. /**
  12. * This contains the user input string
  13. */
  14. private string $raw_input = '';
  15. // The following properties are extracted from the raw input
  16. /** @var list<string>|null */
  17. private ?array $entry_ids = null;
  18. /** @var list<int>|null */
  19. private ?array $feed_ids = null;
  20. /** @var list<int>|null */
  21. private ?array $category_ids = null;
  22. /** @var list<list<int>|'*'>|null */
  23. private $label_ids = null;
  24. /** @var list<list<string>>|null */
  25. private ?array $label_names = null;
  26. /** @var list<string>|null */
  27. private ?array $intitle = null;
  28. /** @var list<string>|null */
  29. private ?array $intitle_regex = null;
  30. /** @var list<string>|null */
  31. private ?array $intext = null;
  32. /** @var list<string>|null */
  33. private ?array $intext_regex = null;
  34. /** @var int|false|null */
  35. private $min_date = null;
  36. /** @var int|false|null */
  37. private $max_date = null;
  38. /** @var int|false|null */
  39. private $min_pubdate = null;
  40. /** @var int|false|null */
  41. private $max_pubdate = null;
  42. /** @var int|false|null */
  43. private $min_userdate = null;
  44. /** @var int|false|null */
  45. private $max_userdate = null;
  46. /** @var list<string>|null */
  47. private ?array $inurl = null;
  48. /** @var list<string>|null */
  49. private ?array $inurl_regex = null;
  50. /** @var list<string>|null */
  51. private ?array $author = null;
  52. /** @var list<string>|null */
  53. private ?array $author_regex = null;
  54. /** @var list<string>|null */
  55. private ?array $tags = null;
  56. /** @var list<string>|null */
  57. private ?array $tags_regex = null;
  58. /** @var list<string>|null */
  59. private ?array $search = null;
  60. /** @var list<string>|null */
  61. private ?array $search_regex = null;
  62. /** @var list<string>|null */
  63. private ?array $not_entry_ids = null;
  64. /** @var list<int>|null */
  65. private ?array $not_feed_ids = null;
  66. /** @var list<int>|null */
  67. private ?array $not_category_ids = null;
  68. /** @var list<list<int>|'*'>|null */
  69. private $not_label_ids = null;
  70. /** @var list<list<string>>|null */
  71. private ?array $not_label_names = null;
  72. /** @var list<string>|null */
  73. private ?array $not_intitle = null;
  74. /** @var list<string>|null */
  75. private ?array $not_intitle_regex = null;
  76. /** @var list<string>|null */
  77. private ?array $not_intext = null;
  78. /** @var list<string>|null */
  79. private ?array $not_intext_regex = null;
  80. /** @var int|false|null */
  81. private $not_min_date = null;
  82. /** @var int|false|null */
  83. private $not_max_date = null;
  84. /** @var int|false|null */
  85. private $not_min_pubdate = null;
  86. /** @var int|false|null */
  87. private $not_max_pubdate = null;
  88. /** @var int|false|null */
  89. private $not_min_userdate = null;
  90. /** @var int|false|null */
  91. private $not_max_userdate = null;
  92. /** @var list<string>|null */
  93. private ?array $not_inurl = null;
  94. /** @var list<string>|null */
  95. private ?array $not_inurl_regex = null;
  96. /** @var list<string>|null */
  97. private ?array $not_author = null;
  98. /** @var list<string>|null */
  99. private ?array $not_author_regex = null;
  100. /** @var list<string>|null */
  101. private ?array $not_tags = null;
  102. /** @var list<string>|null */
  103. private ?array $not_tags_regex = null;
  104. /** @var list<string>|null */
  105. private ?array $not_search = null;
  106. /** @var list<string>|null */
  107. private ?array $not_search_regex = null;
  108. public function __construct(string $input) {
  109. $input = self::cleanSearch($input);
  110. $input = self::unescape($input);
  111. $input = FreshRSS_BooleanSearch::unescapeLiteralParentheses($input);
  112. $this->raw_input = $input;
  113. $input = $this->parseNotEntryIds($input);
  114. $input = $this->parseNotFeedIds($input);
  115. $input = $this->parseNotCategoryIds($input);
  116. $input = $this->parseNotLabelIds($input);
  117. $input = $this->parseNotLabelNames($input);
  118. $input = $this->parseNotUserdateSearch($input);
  119. $input = $this->parseNotPubdateSearch($input);
  120. $input = $this->parseNotDateSearch($input);
  121. $input = $this->parseNotIntitleSearch($input);
  122. $input = $this->parseNotIntextSearch($input);
  123. $input = $this->parseNotAuthorSearch($input);
  124. $input = $this->parseNotInurlSearch($input);
  125. $input = $this->parseNotTagsSearch($input);
  126. $input = $this->parseEntryIds($input);
  127. $input = $this->parseFeedIds($input);
  128. $input = $this->parseCategoryIds($input);
  129. $input = $this->parseLabelIds($input);
  130. $input = $this->parseLabelNames($input);
  131. $input = $this->parseUserdateSearch($input);
  132. $input = $this->parsePubdateSearch($input);
  133. $input = $this->parseDateSearch($input);
  134. $input = $this->parseIntitleSearch($input);
  135. $input = $this->parseIntextSearch($input);
  136. $input = $this->parseAuthorSearch($input);
  137. $input = $this->parseInurlSearch($input);
  138. $input = $this->parseTagsSearch($input);
  139. $input = $this->parseQuotedSearch($input);
  140. $input = $this->parseNotSearch($input);
  141. $this->parseSearch($input);
  142. }
  143. #[\Override]
  144. public function __toString(): string {
  145. return $this->getRawInput();
  146. }
  147. public function getRawInput(): string {
  148. return $this->raw_input;
  149. }
  150. /** @return list<string>|null */
  151. public function getEntryIds(): ?array {
  152. return $this->entry_ids;
  153. }
  154. /** @return list<string>|null */
  155. public function getNotEntryIds(): ?array {
  156. return $this->not_entry_ids;
  157. }
  158. /** @return list<int>|null */
  159. public function getFeedIds(): ?array {
  160. return $this->feed_ids;
  161. }
  162. /** @return list<int>|null */
  163. public function getNotFeedIds(): ?array {
  164. return $this->not_feed_ids;
  165. }
  166. /** @return list<int>|null */
  167. public function getCategoryIds(): ?array {
  168. return $this->category_ids;
  169. }
  170. /** @return list<int>|null */
  171. public function getNotCategoryIds(): ?array {
  172. return $this->not_category_ids;
  173. }
  174. /** @return list<list<int>|'*'>|null */
  175. public function getLabelIds(): array|null {
  176. return $this->label_ids;
  177. }
  178. /** @return list<list<int>|'*'>|null */
  179. public function getNotLabelIds(): array|null {
  180. return $this->not_label_ids;
  181. }
  182. /** @return list<list<string>>|null */
  183. public function getLabelNames(): ?array {
  184. return $this->label_names;
  185. }
  186. /** @return list<list<string>>|null */
  187. public function getNotLabelNames(): ?array {
  188. return $this->not_label_names;
  189. }
  190. /** @return list<string>|null */
  191. public function getIntitle(): ?array {
  192. return $this->intitle;
  193. }
  194. /** @return list<string>|null */
  195. public function getIntitleRegex(): ?array {
  196. return $this->intitle_regex;
  197. }
  198. /** @return list<string>|null */
  199. public function getNotIntitle(): ?array {
  200. return $this->not_intitle;
  201. }
  202. /** @return list<string>|null */
  203. public function getNotIntitleRegex(): ?array {
  204. return $this->not_intitle_regex;
  205. }
  206. /** @return list<string>|null */
  207. public function getIntext(): ?array {
  208. return $this->intext;
  209. }
  210. /** @return list<string>|null */
  211. public function getIntextRegex(): ?array {
  212. return $this->intext_regex;
  213. }
  214. /** @return list<string>|null */
  215. public function getNotIntext(): ?array {
  216. return $this->not_intext;
  217. }
  218. /** @return list<string>|null */
  219. public function getNotIntextRegex(): ?array {
  220. return $this->not_intext_regex;
  221. }
  222. public function getMinDate(): ?int {
  223. return $this->min_date ?: null;
  224. }
  225. public function getNotMinDate(): ?int {
  226. return $this->not_min_date ?: null;
  227. }
  228. public function setMinDate(int $value): void {
  229. $this->min_date = $value;
  230. }
  231. public function getMaxDate(): ?int {
  232. return $this->max_date ?: null;
  233. }
  234. public function getNotMaxDate(): ?int {
  235. return $this->not_max_date ?: null;
  236. }
  237. public function setMaxDate(int $value): void {
  238. $this->max_date = $value;
  239. }
  240. public function getMinPubdate(): ?int {
  241. return $this->min_pubdate ?: null;
  242. }
  243. public function getNotMinPubdate(): ?int {
  244. return $this->not_min_pubdate ?: null;
  245. }
  246. public function getMaxPubdate(): ?int {
  247. return $this->max_pubdate ?: null;
  248. }
  249. public function getNotMaxPubdate(): ?int {
  250. return $this->not_max_pubdate ?: null;
  251. }
  252. public function setMaxPubdate(int $value): void {
  253. $this->max_pubdate = $value;
  254. }
  255. public function getMinUserdate(): ?int {
  256. return $this->min_userdate ?: null;
  257. }
  258. public function getNotMinUserdate(): ?int {
  259. return $this->not_min_userdate ?: null;
  260. }
  261. public function getMaxUserdate(): ?int {
  262. return $this->max_userdate ?: null;
  263. }
  264. public function getNotMaxUserdate(): ?int {
  265. return $this->not_max_userdate ?: null;
  266. }
  267. /** @return list<string>|null */
  268. public function getInurl(): ?array {
  269. return $this->inurl;
  270. }
  271. /** @return list<string>|null */
  272. public function getInurlRegex(): ?array {
  273. return $this->inurl_regex;
  274. }
  275. /** @return list<string>|null */
  276. public function getNotInurl(): ?array {
  277. return $this->not_inurl;
  278. }
  279. /** @return list<string>|null */
  280. public function getNotInurlRegex(): ?array {
  281. return $this->not_inurl_regex;
  282. }
  283. /** @return list<string>|null */
  284. public function getAuthor(): ?array {
  285. return $this->author;
  286. }
  287. /** @return list<string>|null */
  288. public function getAuthorRegex(): ?array {
  289. return $this->author_regex;
  290. }
  291. /** @return list<string>|null */
  292. public function getNotAuthor(): ?array {
  293. return $this->not_author;
  294. }
  295. /** @return list<string>|null */
  296. public function getNotAuthorRegex(): ?array {
  297. return $this->not_author_regex;
  298. }
  299. /** @return list<string>|null */
  300. public function getTags(): ?array {
  301. return $this->tags;
  302. }
  303. /** @return list<string>|null */
  304. public function getTagsRegex(): ?array {
  305. return $this->tags_regex;
  306. }
  307. /** @return list<string>|null */
  308. public function getNotTags(): ?array {
  309. return $this->not_tags;
  310. }
  311. /** @return list<string>|null */
  312. public function getNotTagsRegex(): ?array {
  313. return $this->not_tags_regex;
  314. }
  315. /** @return list<string>|null */
  316. public function getSearch(): ?array {
  317. return $this->search;
  318. }
  319. /** @return list<string>|null */
  320. public function getSearchRegex(): ?array {
  321. return $this->search_regex;
  322. }
  323. /** @return list<string>|null */
  324. public function getNotSearch(): ?array {
  325. return $this->not_search;
  326. }
  327. /** @return list<string>|null */
  328. public function getNotSearchRegex(): ?array {
  329. return $this->not_search_regex;
  330. }
  331. /**
  332. * @param list<string>|null $anArray
  333. * @return list<string>
  334. */
  335. private static function removeEmptyValues(?array $anArray): array {
  336. return empty($anArray) ? [] : array_values(array_filter($anArray, static fn(string $value) => $value !== ''));
  337. }
  338. /**
  339. * @param list<string>|string $value
  340. * @return ($value is string ? string : list<string>)
  341. */
  342. private static function decodeSpaces(array|string $value): array|string {
  343. if (is_array($value)) {
  344. foreach ($value as &$val) {
  345. $val = self::decodeSpaces($val);
  346. }
  347. } else {
  348. $value = trim(str_replace('+', ' ', $value));
  349. }
  350. // @phpstan-ignore return.type
  351. return $value;
  352. }
  353. /**
  354. * @param list<string> $strings
  355. * @return list<string>
  356. */
  357. private static function htmlspecialchars_decodes(array $strings): array {
  358. return array_map(static fn(string $s) => htmlspecialchars_decode($s, ENT_QUOTES), $strings);
  359. }
  360. /**
  361. * Parse the search string to find entry (article) IDs.
  362. */
  363. private function parseEntryIds(string $input): string {
  364. if (preg_match_all('/\\be:(?P<search>[0-9,]*)/', $input, $matches)) {
  365. $input = str_replace($matches[0], '', $input);
  366. $ids_lists = $matches['search'];
  367. $this->entry_ids = [];
  368. foreach ($ids_lists as $ids_list) {
  369. $entry_ids = explode(',', $ids_list);
  370. $entry_ids = self::removeEmptyValues($entry_ids);
  371. if (!empty($entry_ids)) {
  372. $this->entry_ids = array_merge($this->entry_ids, $entry_ids);
  373. }
  374. }
  375. }
  376. return $input;
  377. }
  378. private function parseNotEntryIds(string $input): string {
  379. if (preg_match_all('/(?<=[\\s(]|^)[!-]e:(?P<search>[0-9,]*)/', $input, $matches)) {
  380. $input = str_replace($matches[0], '', $input);
  381. $ids_lists = $matches['search'];
  382. $this->not_entry_ids = [];
  383. foreach ($ids_lists as $ids_list) {
  384. $entry_ids = explode(',', $ids_list);
  385. $entry_ids = self::removeEmptyValues($entry_ids);
  386. if (!empty($entry_ids)) {
  387. $this->not_entry_ids = array_merge($this->not_entry_ids, $entry_ids);
  388. }
  389. }
  390. }
  391. return $input;
  392. }
  393. private function parseFeedIds(string $input): string {
  394. if (preg_match_all('/\\bf:(?P<search>[0-9,]*)/', $input, $matches)) {
  395. $input = str_replace($matches[0], '', $input);
  396. $ids_lists = $matches['search'];
  397. $this->feed_ids = [];
  398. foreach ($ids_lists as $ids_list) {
  399. $feed_ids = explode(',', $ids_list);
  400. $feed_ids = self::removeEmptyValues($feed_ids);
  401. /** @var list<int> $feed_ids */
  402. $feed_ids = array_map('intval', $feed_ids);
  403. if (!empty($feed_ids)) {
  404. $this->feed_ids = array_merge($this->feed_ids, $feed_ids);
  405. }
  406. }
  407. }
  408. return $input;
  409. }
  410. private function parseNotFeedIds(string $input): string {
  411. if (preg_match_all('/(?<=[\\s(]|^)[!-]f:(?P<search>[0-9,]*)/', $input, $matches)) {
  412. $input = str_replace($matches[0], '', $input);
  413. $ids_lists = $matches['search'];
  414. $this->not_feed_ids = [];
  415. foreach ($ids_lists as $ids_list) {
  416. $feed_ids = explode(',', $ids_list);
  417. $feed_ids = self::removeEmptyValues($feed_ids);
  418. /** @var list<int> $feed_ids */
  419. $feed_ids = array_map('intval', $feed_ids);
  420. if (!empty($feed_ids)) {
  421. $this->not_feed_ids = array_merge($this->not_feed_ids, $feed_ids);
  422. }
  423. }
  424. }
  425. return $input;
  426. }
  427. private function parseCategoryIds(string $input): string {
  428. if (preg_match_all('/\\bc:(?P<search>[0-9,]*)/', $input, $matches)) {
  429. $input = str_replace($matches[0], '', $input);
  430. $ids_lists = $matches['search'];
  431. $this->category_ids = [];
  432. foreach ($ids_lists as $ids_list) {
  433. $category_ids = explode(',', $ids_list);
  434. $category_ids = self::removeEmptyValues($category_ids);
  435. /** @var list<int> $category_ids */
  436. $category_ids = array_map('intval', $category_ids);
  437. if (!empty($category_ids)) {
  438. $this->category_ids = array_merge($this->category_ids, $category_ids);
  439. }
  440. }
  441. }
  442. return $input;
  443. }
  444. private function parseNotCategoryIds(string $input): string {
  445. if (preg_match_all('/(?<=[\\s(]|^)[!-]c:(?P<search>[0-9,]*)/', $input, $matches)) {
  446. $input = str_replace($matches[0], '', $input);
  447. $ids_lists = $matches['search'];
  448. $this->not_category_ids = [];
  449. foreach ($ids_lists as $ids_list) {
  450. $category_ids = explode(',', $ids_list);
  451. $category_ids = self::removeEmptyValues($category_ids);
  452. /** @var list<int> $category_ids */
  453. $category_ids = array_map('intval', $category_ids);
  454. if (!empty($category_ids)) {
  455. $this->not_category_ids = array_merge($this->not_category_ids, $category_ids);
  456. }
  457. }
  458. }
  459. return $input;
  460. }
  461. /**
  462. * Parse the search string to find tags (labels) IDs.
  463. */
  464. private function parseLabelIds(string $input): string {
  465. if (preg_match_all('/\\b[lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  466. $input = str_replace($matches[0], '', $input);
  467. $ids_lists = $matches['search'];
  468. $this->label_ids = [];
  469. foreach ($ids_lists as $ids_list) {
  470. if ($ids_list === '*') {
  471. $this->label_ids[] = '*';
  472. break;
  473. }
  474. $label_ids = explode(',', $ids_list);
  475. $label_ids = self::removeEmptyValues($label_ids);
  476. /** @var list<int> $label_ids */
  477. $label_ids = array_map('intval', $label_ids);
  478. if (!empty($label_ids)) {
  479. $this->label_ids[] = $label_ids;
  480. }
  481. }
  482. }
  483. return $input;
  484. }
  485. private function parseNotLabelIds(string $input): string {
  486. if (preg_match_all('/(?<=[\\s(]|^)[!-][lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  487. $input = str_replace($matches[0], '', $input);
  488. $ids_lists = $matches['search'];
  489. $this->not_label_ids = [];
  490. foreach ($ids_lists as $ids_list) {
  491. if ($ids_list === '*') {
  492. $this->not_label_ids[] = '*';
  493. break;
  494. }
  495. $label_ids = explode(',', $ids_list);
  496. $label_ids = self::removeEmptyValues($label_ids);
  497. /** @var list<int> $label_ids */
  498. $label_ids = array_map('intval', $label_ids);
  499. if (!empty($label_ids)) {
  500. $this->not_label_ids[] = $label_ids;
  501. }
  502. }
  503. }
  504. return $input;
  505. }
  506. /**
  507. * Parse the search string to find tags (labels) names.
  508. */
  509. private function parseLabelNames(string $input): string {
  510. $names_lists = [];
  511. if (preg_match_all('/\\blabels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  512. $names_lists = $matches['search'];
  513. $input = str_replace($matches[0], '', $input);
  514. }
  515. if (preg_match_all('/\\blabels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  516. $names_lists = array_merge($names_lists, $matches['search']);
  517. $input = str_replace($matches[0], '', $input);
  518. }
  519. if (!empty($names_lists)) {
  520. $this->label_names = [];
  521. foreach ($names_lists as $names_list) {
  522. $names_array = explode(',', $names_list);
  523. $names_array = self::removeEmptyValues($names_array);
  524. if (!empty($names_array)) {
  525. $this->label_names[] = $names_array;
  526. }
  527. }
  528. }
  529. return $input;
  530. }
  531. /**
  532. * Parse the search string to find tags (labels) names to exclude.
  533. */
  534. private function parseNotLabelNames(string $input): string {
  535. $names_lists = [];
  536. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  537. $names_lists = $matches['search'];
  538. $input = str_replace($matches[0], '', $input);
  539. }
  540. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<search>[^\\s"]*)/', $input, $matches)) {
  541. $names_lists = array_merge($names_lists, $matches['search']);
  542. $input = str_replace($matches[0], '', $input);
  543. }
  544. if (!empty($names_lists)) {
  545. $this->not_label_names = [];
  546. foreach ($names_lists as $names_list) {
  547. $names_array = explode(',', $names_list);
  548. $names_array = self::removeEmptyValues($names_array);
  549. if (!empty($names_array)) {
  550. $this->not_label_names[] = $names_array;
  551. }
  552. }
  553. }
  554. return $input;
  555. }
  556. /**
  557. * Parse the search string to find intitle keyword and the search related to it.
  558. */
  559. private function parseIntitleSearch(string $input): string {
  560. if (preg_match_all('#\\bintitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  561. $this->intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  562. $input = str_replace($matches[0], '', $input);
  563. }
  564. if (preg_match_all('/\\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  565. $this->intitle = $matches['search'];
  566. $input = str_replace($matches[0], '', $input);
  567. }
  568. if (preg_match_all('/\\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  569. $this->intitle = array_merge($this->intitle ?? [], $matches['search']);
  570. $input = str_replace($matches[0], '', $input);
  571. }
  572. $this->intitle = self::removeEmptyValues($this->intitle);
  573. if (empty($this->intitle)) {
  574. $this->intitle = null;
  575. }
  576. return $input;
  577. }
  578. private function parseNotIntitleSearch(string $input): string {
  579. if (preg_match_all('#(?<=[\\s(]|^)[!-]intitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  580. $this->not_intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  581. $input = str_replace($matches[0], '', $input);
  582. }
  583. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  584. $this->not_intitle = $matches['search'];
  585. $input = str_replace($matches[0], '', $input);
  586. }
  587. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  588. $this->not_intitle = array_merge($this->not_intitle ?? [], $matches['search']);
  589. $input = str_replace($matches[0], '', $input);
  590. }
  591. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  592. if (empty($this->not_intitle)) {
  593. $this->not_intitle = null;
  594. }
  595. return $input;
  596. }
  597. /**
  598. * Parse the search string to find intext keyword and the search related to it.
  599. */
  600. private function parseIntextSearch(string $input): string {
  601. if (preg_match_all('#\\bintext:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  602. $this->intext_regex = self::htmlspecialchars_decodes($matches['search']);
  603. $input = str_replace($matches[0], '', $input);
  604. }
  605. if (preg_match_all('/\\bintext:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  606. $this->intext = $matches['search'];
  607. $input = str_replace($matches[0], '', $input);
  608. }
  609. if (preg_match_all('/\\bintext:(?P<search>[^\s"]*)/', $input, $matches)) {
  610. $this->intext = array_merge($this->intext ?? [], $matches['search']);
  611. $input = str_replace($matches[0], '', $input);
  612. }
  613. $this->intext = self::removeEmptyValues($this->intext);
  614. if (empty($this->intext)) {
  615. $this->intext = null;
  616. }
  617. return $input;
  618. }
  619. private function parseNotIntextSearch(string $input): string {
  620. if (preg_match_all('#(?<=[\\s(]|^)[!-]intext:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  621. $this->not_intext_regex = self::htmlspecialchars_decodes($matches['search']);
  622. $input = str_replace($matches[0], '', $input);
  623. }
  624. if (preg_match_all('/(?<=[\\s(]|^)[!-]intext:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  625. $this->not_intext = $matches['search'];
  626. $input = str_replace($matches[0], '', $input);
  627. }
  628. if (preg_match_all('/(?<=[\\s(]|^)[!-]intext:(?P<search>[^\s"]*)/', $input, $matches)) {
  629. $this->not_intext = array_merge($this->not_intext ?? [], $matches['search']);
  630. $input = str_replace($matches[0], '', $input);
  631. }
  632. $this->not_intext = self::removeEmptyValues($this->not_intext);
  633. if (empty($this->not_intext)) {
  634. $this->not_intext = null;
  635. }
  636. return $input;
  637. }
  638. /**
  639. * Parse the search string to find author keyword and the search related to it.
  640. * The search is the first word following the keyword except when using
  641. * a delimiter. Supported delimiters are single quote (') and double quotes (").
  642. */
  643. private function parseAuthorSearch(string $input): string {
  644. if (preg_match_all('#\\bauthor:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  645. $this->author_regex = self::htmlspecialchars_decodes($matches['search']);
  646. $input = str_replace($matches[0], '', $input);
  647. }
  648. if (preg_match_all('/\\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  649. $this->author = $matches['search'];
  650. $input = str_replace($matches[0], '', $input);
  651. }
  652. if (preg_match_all('/\\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  653. $this->author = array_merge($this->author ?? [], $matches['search']);
  654. $input = str_replace($matches[0], '', $input);
  655. }
  656. $this->author = self::removeEmptyValues($this->author);
  657. if (empty($this->author)) {
  658. $this->author = null;
  659. }
  660. return $input;
  661. }
  662. private function parseNotAuthorSearch(string $input): string {
  663. if (preg_match_all('#(?<=[\\s(]|^)[!-]author:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  664. $this->not_author_regex = self::htmlspecialchars_decodes($matches['search']);
  665. $input = str_replace($matches[0], '', $input);
  666. }
  667. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  668. $this->not_author = $matches['search'];
  669. $input = str_replace($matches[0], '', $input);
  670. }
  671. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  672. $this->not_author = array_merge($this->not_author ?? [], $matches['search']);
  673. $input = str_replace($matches[0], '', $input);
  674. }
  675. $this->not_author = self::removeEmptyValues($this->not_author);
  676. if (empty($this->not_author)) {
  677. $this->not_author = null;
  678. }
  679. return $input;
  680. }
  681. /**
  682. * Parse the search string to find inurl keyword and the search related to it.
  683. * The search is the first word following the keyword.
  684. */
  685. private function parseInurlSearch(string $input): string {
  686. if (preg_match_all('#\\binurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  687. $this->inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  688. $input = str_replace($matches[0], '', $input);
  689. }
  690. if (preg_match_all('/\\binurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  691. $this->inurl = $matches['search'];
  692. $input = str_replace($matches[0], '', $input);
  693. }
  694. if (preg_match_all('/\\binurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  695. $this->inurl = $matches['search'];
  696. $input = str_replace($matches[0], '', $input);
  697. }
  698. $this->inurl = self::removeEmptyValues($this->inurl);
  699. if (empty($this->inurl)) {
  700. $this->inurl = null;
  701. }
  702. return $input;
  703. }
  704. private function parseNotInurlSearch(string $input): string {
  705. if (preg_match_all('#(?<=[\\s(]|^)[!-]inurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  706. $this->not_inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  707. $input = str_replace($matches[0], '', $input);
  708. }
  709. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  710. $this->not_inurl = $matches['search'];
  711. $input = str_replace($matches[0], '', $input);
  712. }
  713. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  714. $this->not_inurl = $matches['search'];
  715. $input = str_replace($matches[0], '', $input);
  716. }
  717. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  718. if (empty($this->not_inurl)) {
  719. $this->not_inurl = null;
  720. }
  721. return $input;
  722. }
  723. /**
  724. * Parse the search string to find date keyword and the search related to it.
  725. * The search is the first word following the keyword.
  726. */
  727. private function parseDateSearch(string $input): string {
  728. if (preg_match_all('/\\bdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  729. $input = str_replace($matches[0], '', $input);
  730. $dates = self::removeEmptyValues($matches['search']);
  731. if (!empty($dates[0])) {
  732. [$this->min_date, $this->max_date] = parseDateInterval($dates[0]);
  733. }
  734. }
  735. return $input;
  736. }
  737. private function parseNotDateSearch(string $input): string {
  738. if (preg_match_all('/(?<=[\\s(]|^)[!-]date:(?P<search>[^\\s]*)/', $input, $matches)) {
  739. $input = str_replace($matches[0], '', $input);
  740. $dates = self::removeEmptyValues($matches['search']);
  741. if (!empty($dates[0])) {
  742. [$this->not_min_date, $this->not_max_date] = parseDateInterval($dates[0]);
  743. }
  744. }
  745. return $input;
  746. }
  747. /**
  748. * Parse the search string to find pubdate keyword and the search related to it.
  749. * The search is the first word following the keyword.
  750. */
  751. private function parsePubdateSearch(string $input): string {
  752. if (preg_match_all('/\\bpubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  753. $input = str_replace($matches[0], '', $input);
  754. $dates = self::removeEmptyValues($matches['search']);
  755. if (!empty($dates[0])) {
  756. [$this->min_pubdate, $this->max_pubdate] = parseDateInterval($dates[0]);
  757. }
  758. }
  759. return $input;
  760. }
  761. private function parseNotPubdateSearch(string $input): string {
  762. if (preg_match_all('/(?<=[\\s(]|^)[!-]pubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  763. $input = str_replace($matches[0], '', $input);
  764. $dates = self::removeEmptyValues($matches['search']);
  765. if (!empty($dates[0])) {
  766. [$this->not_min_pubdate, $this->not_max_pubdate] = parseDateInterval($dates[0]);
  767. }
  768. }
  769. return $input;
  770. }
  771. /**
  772. * Parse the search string to find userdate keyword and the search related to it.
  773. * The search is the first word following the keyword.
  774. */
  775. private function parseUserdateSearch(string $input): string {
  776. if (preg_match_all('/\\buserdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  777. $input = str_replace($matches[0], '', $input);
  778. $dates = self::removeEmptyValues($matches['search']);
  779. if (!empty($dates[0])) {
  780. [$this->min_userdate, $this->max_userdate] = parseDateInterval($dates[0]);
  781. }
  782. }
  783. return $input;
  784. }
  785. private function parseNotUserdateSearch(string $input): string {
  786. if (preg_match_all('/(?<=[\\s(]|^)[!-]userdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  787. $input = str_replace($matches[0], '', $input);
  788. $dates = self::removeEmptyValues($matches['search']);
  789. if (!empty($dates[0])) {
  790. [$this->not_min_userdate, $this->not_max_userdate] = parseDateInterval($dates[0]);
  791. }
  792. }
  793. return $input;
  794. }
  795. /**
  796. * Parse the search string to find tags keyword (# followed by a word)
  797. * and the search related to it.
  798. * The search is the first word following the #.
  799. */
  800. private function parseTagsSearch(string $input): string {
  801. if (preg_match_all('%#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  802. $this->tags_regex = self::htmlspecialchars_decodes($matches['search']);
  803. $input = str_replace($matches[0], '', $input);
  804. }
  805. if (preg_match_all('/#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  806. $this->tags = $matches['search'];
  807. $input = str_replace($matches[0], '', $input);
  808. }
  809. if (preg_match_all('/#(?P<search>[^\\s]+)/', $input, $matches)) {
  810. $this->tags = $matches['search'];
  811. $input = str_replace($matches[0], '', $input);
  812. }
  813. $this->tags = self::removeEmptyValues($this->tags);
  814. if (empty($this->tags)) {
  815. $this->tags = null;
  816. } else {
  817. $this->tags = self::decodeSpaces($this->tags);
  818. }
  819. return $input;
  820. }
  821. private function parseNotTagsSearch(string $input): string {
  822. if (preg_match_all('%(?<=[\\s(]|^)[!-]#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  823. $this->not_tags_regex = self::htmlspecialchars_decodes($matches['search']);
  824. $input = str_replace($matches[0], '', $input);
  825. }
  826. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  827. $this->not_tags = $matches['search'];
  828. $input = str_replace($matches[0], '', $input);
  829. }
  830. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<search>[^\\s]+)/', $input, $matches)) {
  831. $this->not_tags = $matches['search'];
  832. $input = str_replace($matches[0], '', $input);
  833. }
  834. $this->not_tags = self::removeEmptyValues($this->not_tags);
  835. if (empty($this->not_tags)) {
  836. $this->not_tags = null;
  837. } else {
  838. $this->not_tags = self::decodeSpaces($this->not_tags);
  839. }
  840. return $input;
  841. }
  842. /**
  843. * Parse the search string to find search values.
  844. * Every word is a distinct search value using a delimiter.
  845. * Supported delimiters are single quote (') and double quotes (") and regex (/).
  846. */
  847. private function parseQuotedSearch(string $input): string {
  848. $input = self::cleanSearch($input);
  849. if ($input === '') {
  850. return '';
  851. }
  852. if (preg_match_all('#(?<=[\\s(]|^)(?<![!-\\\\])(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  853. $this->search_regex = self::htmlspecialchars_decodes($matches['search']);
  854. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  855. $input = str_replace($matches[0], '', $input);
  856. }
  857. if (preg_match_all('/(?<=[\\s(]|^)(?<![!-\\\\])(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  858. $this->search = $matches['search'];
  859. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  860. $input = str_replace($matches[0], '', $input);
  861. }
  862. return $input;
  863. }
  864. /**
  865. * Parse the search string to find search values.
  866. * Every word is a distinct search value.
  867. */
  868. private function parseSearch(string $input): string {
  869. $input = self::cleanSearch($input);
  870. if ($input === '') {
  871. return '';
  872. }
  873. if (is_array($this->search)) {
  874. $this->search = array_merge($this->search, explode(' ', $input));
  875. } else {
  876. $this->search = explode(' ', $input);
  877. }
  878. return $input;
  879. }
  880. private function parseNotSearch(string $input): string {
  881. $input = self::cleanSearch($input);
  882. if ($input === '') {
  883. return '';
  884. }
  885. if (preg_match_all('#(?<=[\\s(]|^)[!-](?P<search>(?<!\\\\)/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  886. $this->not_search_regex = self::htmlspecialchars_decodes($matches['search']);
  887. $input = str_replace($matches[0], '', $input);
  888. }
  889. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  890. $this->not_search = $matches['search'];
  891. $input = str_replace($matches[0], '', $input);
  892. }
  893. $input = self::cleanSearch($input);
  894. if ($input === '') {
  895. return '';
  896. }
  897. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<search>[^\\s]+)/', $input, $matches)) {
  898. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : [], $matches['search']);
  899. $input = str_replace($matches[0], '', $input);
  900. }
  901. $this->not_search = self::removeEmptyValues($this->not_search);
  902. return $input;
  903. }
  904. /**
  905. * Remove all unnecessary spaces in the search
  906. */
  907. private static function cleanSearch(string $input): string {
  908. $input = preg_replace('/\\s+/', ' ', $input);
  909. if (!is_string($input)) {
  910. return '';
  911. }
  912. return trim($input);
  913. }
  914. /** Remove escaping backslashes for parenthesis logic */
  915. private static function unescape(string $input): string {
  916. return str_replace(['\\(', '\\)'], ['(', ')'], $input);
  917. }
  918. }