Search.php 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. <?php
  2. declare(strict_types=1);
  3. require_once(LIB_PATH . '/lib_date.php');
  4. /**
  5. * Contains a search from the search form.
  6. *
  7. * It allows to extract meaningful bits of the search and store them in a
  8. * convenient object
  9. */
  10. class FreshRSS_Search implements \Stringable {
  11. /**
  12. * This contains the user input string
  13. */
  14. private string $raw_input = '';
  15. // The following properties are extracted from the raw input
  16. /** @var list<string>|null */
  17. private ?array $entry_ids = null;
  18. /** @var list<int>|null */
  19. private ?array $feed_ids = null;
  20. /** @var list<int>|null */
  21. private ?array $category_ids = null;
  22. /** @var list<int>|'*'|null */
  23. private $label_ids = null;
  24. /** @var list<string>|null */
  25. private ?array $label_names = null;
  26. /** @var list<string>|null */
  27. private ?array $intitle = null;
  28. /** @var list<string>|null */
  29. private ?array $intitle_regex = null;
  30. /** @var list<string>|null */
  31. private ?array $intext = null;
  32. /** @var list<string>|null */
  33. private ?array $intext_regex = null;
  34. /** @var int|false|null */
  35. private $min_date = null;
  36. /** @var int|false|null */
  37. private $max_date = null;
  38. /** @var int|false|null */
  39. private $min_pubdate = null;
  40. /** @var int|false|null */
  41. private $max_pubdate = null;
  42. /** @var list<string>|null */
  43. private ?array $inurl = null;
  44. /** @var list<string>|null */
  45. private ?array $inurl_regex = null;
  46. /** @var list<string>|null */
  47. private ?array $author = null;
  48. /** @var list<string>|null */
  49. private ?array $author_regex = null;
  50. /** @var list<string>|null */
  51. private ?array $tags = null;
  52. /** @var list<string>|null */
  53. private ?array $tags_regex = null;
  54. /** @var list<string>|null */
  55. private ?array $search = null;
  56. /** @var list<string>|null */
  57. private ?array $search_regex = null;
  58. /** @var list<string>|null */
  59. private ?array $not_entry_ids = null;
  60. /** @var list<int>|null */
  61. private ?array $not_feed_ids = null;
  62. /** @var list<int>|null */
  63. private ?array $not_category_ids = null;
  64. /** @var list<int>|'*'|null */
  65. private $not_label_ids = null;
  66. /** @var list<string>|null */
  67. private ?array $not_label_names = null;
  68. /** @var list<string>|null */
  69. private ?array $not_intitle = null;
  70. /** @var list<string>|null */
  71. private ?array $not_intitle_regex = null;
  72. /** @var list<string>|null */
  73. private ?array $not_intext = null;
  74. /** @var list<string>|null */
  75. private ?array $not_intext_regex = null;
  76. /** @var int|false|null */
  77. private $not_min_date = null;
  78. /** @var int|false|null */
  79. private $not_max_date = null;
  80. /** @var int|false|null */
  81. private $not_min_pubdate = null;
  82. /** @var int|false|null */
  83. private $not_max_pubdate = null;
  84. /** @var list<string>|null */
  85. private ?array $not_inurl = null;
  86. /** @var list<string>|null */
  87. private ?array $not_inurl_regex = null;
  88. /** @var list<string>|null */
  89. private ?array $not_author = null;
  90. /** @var list<string>|null */
  91. private ?array $not_author_regex = null;
  92. /** @var list<string>|null */
  93. private ?array $not_tags = null;
  94. /** @var list<string>|null */
  95. private ?array $not_tags_regex = null;
  96. /** @var list<string>|null */
  97. private ?array $not_search = null;
  98. /** @var list<string>|null */
  99. private ?array $not_search_regex = null;
  100. public function __construct(string $input) {
  101. $input = self::cleanSearch($input);
  102. $input = self::unescape($input);
  103. $input = FreshRSS_BooleanSearch::unescapeLiteralParentheses($input);
  104. $this->raw_input = $input;
  105. $input = $this->parseNotEntryIds($input);
  106. $input = $this->parseNotFeedIds($input);
  107. $input = $this->parseNotCategoryIds($input);
  108. $input = $this->parseNotLabelIds($input);
  109. $input = $this->parseNotLabelNames($input);
  110. $input = $this->parseNotPubdateSearch($input);
  111. $input = $this->parseNotDateSearch($input);
  112. $input = $this->parseNotIntitleSearch($input);
  113. $input = $this->parseNotIntextSearch($input);
  114. $input = $this->parseNotAuthorSearch($input);
  115. $input = $this->parseNotInurlSearch($input);
  116. $input = $this->parseNotTagsSearch($input);
  117. $input = $this->parseEntryIds($input);
  118. $input = $this->parseFeedIds($input);
  119. $input = $this->parseCategoryIds($input);
  120. $input = $this->parseLabelIds($input);
  121. $input = $this->parseLabelNames($input);
  122. $input = $this->parsePubdateSearch($input);
  123. $input = $this->parseDateSearch($input);
  124. $input = $this->parseIntitleSearch($input);
  125. $input = $this->parseIntextSearch($input);
  126. $input = $this->parseAuthorSearch($input);
  127. $input = $this->parseInurlSearch($input);
  128. $input = $this->parseTagsSearch($input);
  129. $input = $this->parseQuotedSearch($input);
  130. $input = $this->parseNotSearch($input);
  131. $this->parseSearch($input);
  132. }
  133. #[\Override]
  134. public function __toString(): string {
  135. return $this->getRawInput();
  136. }
  137. public function getRawInput(): string {
  138. return $this->raw_input;
  139. }
  140. /** @return list<string>|null */
  141. public function getEntryIds(): ?array {
  142. return $this->entry_ids;
  143. }
  144. /** @return list<string>|null */
  145. public function getNotEntryIds(): ?array {
  146. return $this->not_entry_ids;
  147. }
  148. /** @return list<int>|null */
  149. public function getFeedIds(): ?array {
  150. return $this->feed_ids;
  151. }
  152. /** @return list<int>|null */
  153. public function getNotFeedIds(): ?array {
  154. return $this->not_feed_ids;
  155. }
  156. /** @return list<int>|null */
  157. public function getCategoryIds(): ?array {
  158. return $this->category_ids;
  159. }
  160. /** @return list<int>|null */
  161. public function getNotCategoryIds(): ?array {
  162. return $this->not_category_ids;
  163. }
  164. /** @return list<int>|'*'|null */
  165. public function getLabelIds(): array|string|null {
  166. return $this->label_ids;
  167. }
  168. /** @return list<int>|'*'|null */
  169. public function getNotLabelIds(): array|string|null {
  170. return $this->not_label_ids;
  171. }
  172. /** @return list<string>|null */
  173. public function getLabelNames(): ?array {
  174. return $this->label_names;
  175. }
  176. /** @return list<string>|null */
  177. public function getNotLabelNames(): ?array {
  178. return $this->not_label_names;
  179. }
  180. /** @return list<string>|null */
  181. public function getIntitle(): ?array {
  182. return $this->intitle;
  183. }
  184. /** @return list<string>|null */
  185. public function getIntitleRegex(): ?array {
  186. return $this->intitle_regex;
  187. }
  188. /** @return list<string>|null */
  189. public function getNotIntitle(): ?array {
  190. return $this->not_intitle;
  191. }
  192. /** @return list<string>|null */
  193. public function getNotIntitleRegex(): ?array {
  194. return $this->not_intitle_regex;
  195. }
  196. /** @return list<string>|null */
  197. public function getIntext(): ?array {
  198. return $this->intext;
  199. }
  200. /** @return list<string>|null */
  201. public function getIntextRegex(): ?array {
  202. return $this->intext_regex;
  203. }
  204. /** @return list<string>|null */
  205. public function getNotIntext(): ?array {
  206. return $this->not_intext;
  207. }
  208. /** @return list<string>|null */
  209. public function getNotIntextRegex(): ?array {
  210. return $this->not_intext_regex;
  211. }
  212. public function getMinDate(): ?int {
  213. return $this->min_date ?: null;
  214. }
  215. public function getNotMinDate(): ?int {
  216. return $this->not_min_date ?: null;
  217. }
  218. public function setMinDate(int $value): void {
  219. $this->min_date = $value;
  220. }
  221. public function getMaxDate(): ?int {
  222. return $this->max_date ?: null;
  223. }
  224. public function getNotMaxDate(): ?int {
  225. return $this->not_max_date ?: null;
  226. }
  227. public function setMaxDate(int $value): void {
  228. $this->max_date = $value;
  229. }
  230. public function getMinPubdate(): ?int {
  231. return $this->min_pubdate ?: null;
  232. }
  233. public function getNotMinPubdate(): ?int {
  234. return $this->not_min_pubdate ?: null;
  235. }
  236. public function getMaxPubdate(): ?int {
  237. return $this->max_pubdate ?: null;
  238. }
  239. public function getNotMaxPubdate(): ?int {
  240. return $this->not_max_pubdate ?: null;
  241. }
  242. /** @return list<string>|null */
  243. public function getInurl(): ?array {
  244. return $this->inurl;
  245. }
  246. /** @return list<string>|null */
  247. public function getInurlRegex(): ?array {
  248. return $this->inurl_regex;
  249. }
  250. /** @return list<string>|null */
  251. public function getNotInurl(): ?array {
  252. return $this->not_inurl;
  253. }
  254. /** @return list<string>|null */
  255. public function getNotInurlRegex(): ?array {
  256. return $this->not_inurl_regex;
  257. }
  258. /** @return list<string>|null */
  259. public function getAuthor(): ?array {
  260. return $this->author;
  261. }
  262. /** @return list<string>|null */
  263. public function getAuthorRegex(): ?array {
  264. return $this->author_regex;
  265. }
  266. /** @return list<string>|null */
  267. public function getNotAuthor(): ?array {
  268. return $this->not_author;
  269. }
  270. /** @return list<string>|null */
  271. public function getNotAuthorRegex(): ?array {
  272. return $this->not_author_regex;
  273. }
  274. /** @return list<string>|null */
  275. public function getTags(): ?array {
  276. return $this->tags;
  277. }
  278. /** @return list<string>|null */
  279. public function getTagsRegex(): ?array {
  280. return $this->tags_regex;
  281. }
  282. /** @return list<string>|null */
  283. public function getNotTags(): ?array {
  284. return $this->not_tags;
  285. }
  286. /** @return list<string>|null */
  287. public function getNotTagsRegex(): ?array {
  288. return $this->not_tags_regex;
  289. }
  290. /** @return list<string>|null */
  291. public function getSearch(): ?array {
  292. return $this->search;
  293. }
  294. /** @return list<string>|null */
  295. public function getSearchRegex(): ?array {
  296. return $this->search_regex;
  297. }
  298. /** @return list<string>|null */
  299. public function getNotSearch(): ?array {
  300. return $this->not_search;
  301. }
  302. /** @return list<string>|null */
  303. public function getNotSearchRegex(): ?array {
  304. return $this->not_search_regex;
  305. }
  306. /**
  307. * @param list<string>|null $anArray
  308. * @return list<string>
  309. */
  310. private static function removeEmptyValues(?array $anArray): array {
  311. return empty($anArray) ? [] : array_values(array_filter($anArray, static fn(string $value) => $value !== ''));
  312. }
  313. /**
  314. * @param list<string>|string $value
  315. * @return ($value is string ? string : list<string>)
  316. */
  317. private static function decodeSpaces(array|string $value): array|string {
  318. if (is_array($value)) {
  319. foreach ($value as &$val) {
  320. $val = self::decodeSpaces($val);
  321. }
  322. } else {
  323. $value = trim(str_replace('+', ' ', $value));
  324. }
  325. return $value;
  326. }
  327. /**
  328. * @param list<string> $strings
  329. * @return list<string>
  330. */
  331. private static function htmlspecialchars_decodes(array $strings): array {
  332. return array_map(static fn(string $s) => htmlspecialchars_decode($s, ENT_QUOTES), $strings);
  333. }
  334. /**
  335. * Parse the search string to find entry (article) IDs.
  336. */
  337. private function parseEntryIds(string $input): string {
  338. if (preg_match_all('/\\be:(?P<search>[0-9,]*)/', $input, $matches)) {
  339. $input = str_replace($matches[0], '', $input);
  340. $ids_lists = $matches['search'];
  341. $this->entry_ids = [];
  342. foreach ($ids_lists as $ids_list) {
  343. $entry_ids = explode(',', $ids_list);
  344. $entry_ids = self::removeEmptyValues($entry_ids);
  345. if (!empty($entry_ids)) {
  346. $this->entry_ids = array_merge($this->entry_ids, $entry_ids);
  347. }
  348. }
  349. }
  350. return $input;
  351. }
  352. private function parseNotEntryIds(string $input): string {
  353. if (preg_match_all('/(?<=[\\s(]|^)[!-]e:(?P<search>[0-9,]*)/', $input, $matches)) {
  354. $input = str_replace($matches[0], '', $input);
  355. $ids_lists = $matches['search'];
  356. $this->not_entry_ids = [];
  357. foreach ($ids_lists as $ids_list) {
  358. $entry_ids = explode(',', $ids_list);
  359. $entry_ids = self::removeEmptyValues($entry_ids);
  360. if (!empty($entry_ids)) {
  361. $this->not_entry_ids = array_merge($this->not_entry_ids, $entry_ids);
  362. }
  363. }
  364. }
  365. return $input;
  366. }
  367. private function parseFeedIds(string $input): string {
  368. if (preg_match_all('/\\bf:(?P<search>[0-9,]*)/', $input, $matches)) {
  369. $input = str_replace($matches[0], '', $input);
  370. $ids_lists = $matches['search'];
  371. $this->feed_ids = [];
  372. foreach ($ids_lists as $ids_list) {
  373. $feed_ids = explode(',', $ids_list);
  374. $feed_ids = self::removeEmptyValues($feed_ids);
  375. /** @var list<int> $feed_ids */
  376. $feed_ids = array_map('intval', $feed_ids);
  377. if (!empty($feed_ids)) {
  378. $this->feed_ids = array_merge($this->feed_ids, $feed_ids);
  379. }
  380. }
  381. }
  382. return $input;
  383. }
  384. private function parseNotFeedIds(string $input): string {
  385. if (preg_match_all('/(?<=[\\s(]|^)[!-]f:(?P<search>[0-9,]*)/', $input, $matches)) {
  386. $input = str_replace($matches[0], '', $input);
  387. $ids_lists = $matches['search'];
  388. $this->not_feed_ids = [];
  389. foreach ($ids_lists as $ids_list) {
  390. $feed_ids = explode(',', $ids_list);
  391. $feed_ids = self::removeEmptyValues($feed_ids);
  392. /** @var list<int> $feed_ids */
  393. $feed_ids = array_map('intval', $feed_ids);
  394. if (!empty($feed_ids)) {
  395. $this->not_feed_ids = array_merge($this->not_feed_ids, $feed_ids);
  396. }
  397. }
  398. }
  399. return $input;
  400. }
  401. private function parseCategoryIds(string $input): string {
  402. if (preg_match_all('/\\bc:(?P<search>[0-9,]*)/', $input, $matches)) {
  403. $input = str_replace($matches[0], '', $input);
  404. $ids_lists = $matches['search'];
  405. $this->category_ids = [];
  406. foreach ($ids_lists as $ids_list) {
  407. $category_ids = explode(',', $ids_list);
  408. $category_ids = self::removeEmptyValues($category_ids);
  409. /** @var list<int> $category_ids */
  410. $category_ids = array_map('intval', $category_ids);
  411. if (!empty($category_ids)) {
  412. $this->category_ids = array_merge($this->category_ids, $category_ids);
  413. }
  414. }
  415. }
  416. return $input;
  417. }
  418. private function parseNotCategoryIds(string $input): string {
  419. if (preg_match_all('/(?<=[\\s(]|^)[!-]c:(?P<search>[0-9,]*)/', $input, $matches)) {
  420. $input = str_replace($matches[0], '', $input);
  421. $ids_lists = $matches['search'];
  422. $this->not_category_ids = [];
  423. foreach ($ids_lists as $ids_list) {
  424. $category_ids = explode(',', $ids_list);
  425. $category_ids = self::removeEmptyValues($category_ids);
  426. /** @var list<int> $category_ids */
  427. $category_ids = array_map('intval', $category_ids);
  428. if (!empty($category_ids)) {
  429. $this->not_category_ids = array_merge($this->not_category_ids, $category_ids);
  430. }
  431. }
  432. }
  433. return $input;
  434. }
  435. /**
  436. * Parse the search string to find tags (labels) IDs.
  437. */
  438. private function parseLabelIds(string $input): string {
  439. if (preg_match_all('/\\b[lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  440. $input = str_replace($matches[0], '', $input);
  441. $ids_lists = $matches['search'];
  442. $this->label_ids = [];
  443. foreach ($ids_lists as $ids_list) {
  444. if ($ids_list === '*') {
  445. $this->label_ids = '*';
  446. break;
  447. }
  448. $label_ids = explode(',', $ids_list);
  449. $label_ids = self::removeEmptyValues($label_ids);
  450. /** @var list<int> $label_ids */
  451. $label_ids = array_map('intval', $label_ids);
  452. if (!empty($label_ids)) {
  453. $this->label_ids = array_merge($this->label_ids, $label_ids);
  454. }
  455. }
  456. }
  457. return $input;
  458. }
  459. private function parseNotLabelIds(string $input): string {
  460. if (preg_match_all('/(?<=[\\s(]|^)[!-][lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  461. $input = str_replace($matches[0], '', $input);
  462. $ids_lists = $matches['search'];
  463. $this->not_label_ids = [];
  464. foreach ($ids_lists as $ids_list) {
  465. if ($ids_list === '*') {
  466. $this->not_label_ids = '*';
  467. break;
  468. }
  469. $label_ids = explode(',', $ids_list);
  470. $label_ids = self::removeEmptyValues($label_ids);
  471. /** @var list<int> $label_ids */
  472. $label_ids = array_map('intval', $label_ids);
  473. if (!empty($label_ids)) {
  474. $this->not_label_ids = array_merge($this->not_label_ids, $label_ids);
  475. }
  476. }
  477. }
  478. return $input;
  479. }
  480. /**
  481. * Parse the search string to find tags (labels) names.
  482. */
  483. private function parseLabelNames(string $input): string {
  484. $names_lists = [];
  485. if (preg_match_all('/\\blabels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  486. $names_lists = $matches['search'];
  487. $input = str_replace($matches[0], '', $input);
  488. }
  489. if (preg_match_all('/\\blabels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  490. $names_lists = array_merge($names_lists, $matches['search']);
  491. $input = str_replace($matches[0], '', $input);
  492. }
  493. if (!empty($names_lists)) {
  494. $this->label_names = [];
  495. foreach ($names_lists as $names_list) {
  496. $names_array = explode(',', $names_list);
  497. $names_array = self::removeEmptyValues($names_array);
  498. if (!empty($names_array)) {
  499. $this->label_names = array_merge($this->label_names, $names_array);
  500. }
  501. }
  502. }
  503. return $input;
  504. }
  505. /**
  506. * Parse the search string to find tags (labels) names to exclude.
  507. */
  508. private function parseNotLabelNames(string $input): string {
  509. $names_lists = [];
  510. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  511. $names_lists = $matches['search'];
  512. $input = str_replace($matches[0], '', $input);
  513. }
  514. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<search>[^\\s"]*)/', $input, $matches)) {
  515. $names_lists = array_merge($names_lists, $matches['search']);
  516. $input = str_replace($matches[0], '', $input);
  517. }
  518. if (!empty($names_lists)) {
  519. $this->not_label_names = [];
  520. foreach ($names_lists as $names_list) {
  521. $names_array = explode(',', $names_list);
  522. $names_array = self::removeEmptyValues($names_array);
  523. if (!empty($names_array)) {
  524. $this->not_label_names = array_merge($this->not_label_names, $names_array);
  525. }
  526. }
  527. }
  528. return $input;
  529. }
  530. /**
  531. * Parse the search string to find intitle keyword and the search related to it.
  532. */
  533. private function parseIntitleSearch(string $input): string {
  534. if (preg_match_all('#\\bintitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  535. $this->intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  536. $input = str_replace($matches[0], '', $input);
  537. }
  538. if (preg_match_all('/\\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  539. $this->intitle = $matches['search'];
  540. $input = str_replace($matches[0], '', $input);
  541. }
  542. if (preg_match_all('/\\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  543. $this->intitle = array_merge($this->intitle ?? [], $matches['search']);
  544. $input = str_replace($matches[0], '', $input);
  545. }
  546. $this->intitle = self::removeEmptyValues($this->intitle);
  547. if (empty($this->intitle)) {
  548. $this->intitle = null;
  549. }
  550. return $input;
  551. }
  552. private function parseNotIntitleSearch(string $input): string {
  553. if (preg_match_all('#(?<=[\\s(]|^)[!-]intitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  554. $this->not_intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  555. $input = str_replace($matches[0], '', $input);
  556. }
  557. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  558. $this->not_intitle = $matches['search'];
  559. $input = str_replace($matches[0], '', $input);
  560. }
  561. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  562. $this->not_intitle = array_merge($this->not_intitle ?? [], $matches['search']);
  563. $input = str_replace($matches[0], '', $input);
  564. }
  565. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  566. if (empty($this->not_intitle)) {
  567. $this->not_intitle = null;
  568. }
  569. return $input;
  570. }
  571. /**
  572. * Parse the search string to find intext keyword and the search related to it.
  573. */
  574. private function parseIntextSearch(string $input): string {
  575. if (preg_match_all('#\\bintext:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  576. $this->intext_regex = self::htmlspecialchars_decodes($matches['search']);
  577. $input = str_replace($matches[0], '', $input);
  578. }
  579. if (preg_match_all('/\\bintext:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  580. $this->intext = $matches['search'];
  581. $input = str_replace($matches[0], '', $input);
  582. }
  583. if (preg_match_all('/\\bintext:(?P<search>[^\s"]*)/', $input, $matches)) {
  584. $this->intext = array_merge($this->intext ?? [], $matches['search']);
  585. $input = str_replace($matches[0], '', $input);
  586. }
  587. $this->intext = self::removeEmptyValues($this->intext);
  588. if (empty($this->intext)) {
  589. $this->intext = null;
  590. }
  591. return $input;
  592. }
  593. private function parseNotIntextSearch(string $input): string {
  594. if (preg_match_all('#(?<=[\\s(]|^)[!-]intext:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  595. $this->not_intext_regex = self::htmlspecialchars_decodes($matches['search']);
  596. $input = str_replace($matches[0], '', $input);
  597. }
  598. if (preg_match_all('/(?<=[\\s(]|^)[!-]intext:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  599. $this->not_intext = $matches['search'];
  600. $input = str_replace($matches[0], '', $input);
  601. }
  602. if (preg_match_all('/(?<=[\\s(]|^)[!-]intext:(?P<search>[^\s"]*)/', $input, $matches)) {
  603. $this->not_intext = array_merge($this->not_intext ?? [], $matches['search']);
  604. $input = str_replace($matches[0], '', $input);
  605. }
  606. $this->not_intext = self::removeEmptyValues($this->not_intext);
  607. if (empty($this->not_intext)) {
  608. $this->not_intext = null;
  609. }
  610. return $input;
  611. }
  612. /**
  613. * Parse the search string to find author keyword and the search related to it.
  614. * The search is the first word following the keyword except when using
  615. * a delimiter. Supported delimiters are single quote (') and double quotes (").
  616. */
  617. private function parseAuthorSearch(string $input): string {
  618. if (preg_match_all('#\\bauthor:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  619. $this->author_regex = self::htmlspecialchars_decodes($matches['search']);
  620. $input = str_replace($matches[0], '', $input);
  621. }
  622. if (preg_match_all('/\\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  623. $this->author = $matches['search'];
  624. $input = str_replace($matches[0], '', $input);
  625. }
  626. if (preg_match_all('/\\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  627. $this->author = array_merge($this->author ?? [], $matches['search']);
  628. $input = str_replace($matches[0], '', $input);
  629. }
  630. $this->author = self::removeEmptyValues($this->author);
  631. if (empty($this->author)) {
  632. $this->author = null;
  633. }
  634. return $input;
  635. }
  636. private function parseNotAuthorSearch(string $input): string {
  637. if (preg_match_all('#(?<=[\\s(]|^)[!-]author:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  638. $this->not_author_regex = self::htmlspecialchars_decodes($matches['search']);
  639. $input = str_replace($matches[0], '', $input);
  640. }
  641. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  642. $this->not_author = $matches['search'];
  643. $input = str_replace($matches[0], '', $input);
  644. }
  645. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  646. $this->not_author = array_merge($this->not_author ?? [], $matches['search']);
  647. $input = str_replace($matches[0], '', $input);
  648. }
  649. $this->not_author = self::removeEmptyValues($this->not_author);
  650. if (empty($this->not_author)) {
  651. $this->not_author = null;
  652. }
  653. return $input;
  654. }
  655. /**
  656. * Parse the search string to find inurl keyword and the search related to it.
  657. * The search is the first word following the keyword.
  658. */
  659. private function parseInurlSearch(string $input): string {
  660. if (preg_match_all('#\\binurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  661. $this->inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  662. $input = str_replace($matches[0], '', $input);
  663. }
  664. if (preg_match_all('/\\binurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  665. $this->inurl = $matches['search'];
  666. $input = str_replace($matches[0], '', $input);
  667. }
  668. if (preg_match_all('/\\binurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  669. $this->inurl = $matches['search'];
  670. $input = str_replace($matches[0], '', $input);
  671. }
  672. $this->inurl = self::removeEmptyValues($this->inurl);
  673. if (empty($this->inurl)) {
  674. $this->inurl = null;
  675. }
  676. return $input;
  677. }
  678. private function parseNotInurlSearch(string $input): string {
  679. if (preg_match_all('#(?<=[\\s(]|^)[!-]inurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  680. $this->not_inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  681. $input = str_replace($matches[0], '', $input);
  682. }
  683. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  684. $this->not_inurl = $matches['search'];
  685. $input = str_replace($matches[0], '', $input);
  686. }
  687. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  688. $this->not_inurl = $matches['search'];
  689. $input = str_replace($matches[0], '', $input);
  690. }
  691. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  692. if (empty($this->not_inurl)) {
  693. $this->not_inurl = null;
  694. }
  695. return $input;
  696. }
  697. /**
  698. * Parse the search string to find date keyword and the search related to it.
  699. * The search is the first word following the keyword.
  700. */
  701. private function parseDateSearch(string $input): string {
  702. if (preg_match_all('/\\bdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  703. $input = str_replace($matches[0], '', $input);
  704. $dates = self::removeEmptyValues($matches['search']);
  705. if (!empty($dates[0])) {
  706. [$this->min_date, $this->max_date] = parseDateInterval($dates[0]);
  707. }
  708. }
  709. return $input;
  710. }
  711. private function parseNotDateSearch(string $input): string {
  712. if (preg_match_all('/(?<=[\\s(]|^)[!-]date:(?P<search>[^\\s]*)/', $input, $matches)) {
  713. $input = str_replace($matches[0], '', $input);
  714. $dates = self::removeEmptyValues($matches['search']);
  715. if (!empty($dates[0])) {
  716. [$this->not_min_date, $this->not_max_date] = parseDateInterval($dates[0]);
  717. }
  718. }
  719. return $input;
  720. }
  721. /**
  722. * Parse the search string to find pubdate keyword and the search related to it.
  723. * The search is the first word following the keyword.
  724. */
  725. private function parsePubdateSearch(string $input): string {
  726. if (preg_match_all('/\\bpubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  727. $input = str_replace($matches[0], '', $input);
  728. $dates = self::removeEmptyValues($matches['search']);
  729. if (!empty($dates[0])) {
  730. [$this->min_pubdate, $this->max_pubdate] = parseDateInterval($dates[0]);
  731. }
  732. }
  733. return $input;
  734. }
  735. private function parseNotPubdateSearch(string $input): string {
  736. if (preg_match_all('/(?<=[\\s(]|^)[!-]pubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  737. $input = str_replace($matches[0], '', $input);
  738. $dates = self::removeEmptyValues($matches['search']);
  739. if (!empty($dates[0])) {
  740. [$this->not_min_pubdate, $this->not_max_pubdate] = parseDateInterval($dates[0]);
  741. }
  742. }
  743. return $input;
  744. }
  745. /**
  746. * Parse the search string to find tags keyword (# followed by a word)
  747. * and the search related to it.
  748. * The search is the first word following the #.
  749. */
  750. private function parseTagsSearch(string $input): string {
  751. if (preg_match_all('%#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  752. $this->tags_regex = self::htmlspecialchars_decodes($matches['search']);
  753. $input = str_replace($matches[0], '', $input);
  754. }
  755. if (preg_match_all('/#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  756. $this->tags = $matches['search'];
  757. $input = str_replace($matches[0], '', $input);
  758. }
  759. if (preg_match_all('/#(?P<search>[^\\s]+)/', $input, $matches)) {
  760. $this->tags = $matches['search'];
  761. $input = str_replace($matches[0], '', $input);
  762. }
  763. $this->tags = self::removeEmptyValues($this->tags);
  764. if (empty($this->tags)) {
  765. $this->tags = null;
  766. } else {
  767. $this->tags = self::decodeSpaces($this->tags);
  768. }
  769. return $input;
  770. }
  771. private function parseNotTagsSearch(string $input): string {
  772. if (preg_match_all('%(?<=[\\s(]|^)[!-]#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  773. $this->not_tags_regex = self::htmlspecialchars_decodes($matches['search']);
  774. $input = str_replace($matches[0], '', $input);
  775. }
  776. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  777. $this->not_tags = $matches['search'];
  778. $input = str_replace($matches[0], '', $input);
  779. }
  780. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<search>[^\\s]+)/', $input, $matches)) {
  781. $this->not_tags = $matches['search'];
  782. $input = str_replace($matches[0], '', $input);
  783. }
  784. $this->not_tags = self::removeEmptyValues($this->not_tags);
  785. if (empty($this->not_tags)) {
  786. $this->not_tags = null;
  787. } else {
  788. $this->not_tags = self::decodeSpaces($this->not_tags);
  789. }
  790. return $input;
  791. }
  792. /**
  793. * Parse the search string to find search values.
  794. * Every word is a distinct search value using a delimiter.
  795. * Supported delimiters are single quote (') and double quotes (") and regex (/).
  796. */
  797. private function parseQuotedSearch(string $input): string {
  798. $input = self::cleanSearch($input);
  799. if ($input === '') {
  800. return '';
  801. }
  802. if (preg_match_all('#(?<=[\\s(]|^)(?<![!-\\\\])(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  803. $this->search_regex = self::htmlspecialchars_decodes($matches['search']);
  804. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  805. $input = str_replace($matches[0], '', $input);
  806. }
  807. if (preg_match_all('/(?<=[\\s(]|^)(?<![!-\\\\])(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  808. $this->search = $matches['search'];
  809. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  810. $input = str_replace($matches[0], '', $input);
  811. }
  812. return $input;
  813. }
  814. /**
  815. * Parse the search string to find search values.
  816. * Every word is a distinct search value.
  817. */
  818. private function parseSearch(string $input): string {
  819. $input = self::cleanSearch($input);
  820. if ($input === '') {
  821. return '';
  822. }
  823. if (is_array($this->search)) {
  824. $this->search = array_merge($this->search, explode(' ', $input));
  825. } else {
  826. $this->search = explode(' ', $input);
  827. }
  828. return $input;
  829. }
  830. private function parseNotSearch(string $input): string {
  831. $input = self::cleanSearch($input);
  832. if ($input === '') {
  833. return '';
  834. }
  835. if (preg_match_all('#(?<=[\\s(]|^)[!-](?P<search>(?<!\\\\)/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  836. $this->not_search_regex = self::htmlspecialchars_decodes($matches['search']);
  837. $input = str_replace($matches[0], '', $input);
  838. }
  839. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  840. $this->not_search = $matches['search'];
  841. $input = str_replace($matches[0], '', $input);
  842. }
  843. $input = self::cleanSearch($input);
  844. if ($input === '') {
  845. return '';
  846. }
  847. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<search>[^\\s]+)/', $input, $matches)) {
  848. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : [], $matches['search']);
  849. $input = str_replace($matches[0], '', $input);
  850. }
  851. $this->not_search = self::removeEmptyValues($this->not_search);
  852. return $input;
  853. }
  854. /**
  855. * Remove all unnecessary spaces in the search
  856. */
  857. private static function cleanSearch(string $input): string {
  858. $input = preg_replace('/\\s+/', ' ', $input);
  859. if (!is_string($input)) {
  860. return '';
  861. }
  862. return trim($input);
  863. }
  864. /** Remove escaping backslashes for parenthesis logic */
  865. private static function unescape(string $input): string {
  866. return str_replace(['\\(', '\\)'], ['(', ')'], $input);
  867. }
  868. }