Search.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. <?php
  2. declare(strict_types=1);
  3. require_once(LIB_PATH . '/lib_date.php');
  4. /**
  5. * Contains a search from the search form.
  6. *
  7. * It allows to extract meaningful bits of the search and store them in a
  8. * convenient object
  9. */
  10. class FreshRSS_Search implements \Stringable {
  11. /**
  12. * This contains the user input string
  13. */
  14. private string $raw_input = '';
  15. // The following properties are extracted from the raw input
  16. /** @var list<string>|null */
  17. private ?array $entry_ids = null;
  18. /** @var list<int>|null */
  19. private ?array $feed_ids = null;
  20. /** @var list<int>|'*'|null */
  21. private $label_ids = null;
  22. /** @var list<string>|null */
  23. private ?array $label_names = null;
  24. /** @var list<string>|null */
  25. private ?array $intitle = null;
  26. /** @var list<string>|null */
  27. private ?array $intitle_regex = null;
  28. /** @var list<string>|null */
  29. private ?array $intext = null;
  30. /** @var list<string>|null */
  31. private ?array $intext_regex = null;
  32. /** @var int|false|null */
  33. private $min_date = null;
  34. /** @var int|false|null */
  35. private $max_date = null;
  36. /** @var int|false|null */
  37. private $min_pubdate = null;
  38. /** @var int|false|null */
  39. private $max_pubdate = null;
  40. /** @var list<string>|null */
  41. private ?array $inurl = null;
  42. /** @var list<string>|null */
  43. private ?array $inurl_regex = null;
  44. /** @var list<string>|null */
  45. private ?array $author = null;
  46. /** @var list<string>|null */
  47. private ?array $author_regex = null;
  48. /** @var list<string>|null */
  49. private ?array $tags = null;
  50. /** @var list<string>|null */
  51. private ?array $tags_regex = null;
  52. /** @var list<string>|null */
  53. private ?array $search = null;
  54. /** @var list<string>|null */
  55. private ?array $search_regex = null;
  56. /** @var list<string>|null */
  57. private ?array $not_entry_ids = null;
  58. /** @var list<int>|null */
  59. private ?array $not_feed_ids = null;
  60. /** @var list<int>|'*'|null */
  61. private $not_label_ids = null;
  62. /** @var list<string>|null */
  63. private ?array $not_label_names = null;
  64. /** @var list<string>|null */
  65. private ?array $not_intitle = null;
  66. /** @var list<string>|null */
  67. private ?array $not_intitle_regex = null;
  68. /** @var list<string>|null */
  69. private ?array $not_intext = null;
  70. /** @var list<string>|null */
  71. private ?array $not_intext_regex = null;
  72. /** @var int|false|null */
  73. private $not_min_date = null;
  74. /** @var int|false|null */
  75. private $not_max_date = null;
  76. /** @var int|false|null */
  77. private $not_min_pubdate = null;
  78. /** @var int|false|null */
  79. private $not_max_pubdate = null;
  80. /** @var list<string>|null */
  81. private ?array $not_inurl = null;
  82. /** @var list<string>|null */
  83. private ?array $not_inurl_regex = null;
  84. /** @var list<string>|null */
  85. private ?array $not_author = null;
  86. /** @var list<string>|null */
  87. private ?array $not_author_regex = null;
  88. /** @var list<string>|null */
  89. private ?array $not_tags = null;
  90. /** @var list<string>|null */
  91. private ?array $not_tags_regex = null;
  92. /** @var list<string>|null */
  93. private ?array $not_search = null;
  94. /** @var list<string>|null */
  95. private ?array $not_search_regex = null;
  96. public function __construct(string $input) {
  97. $input = self::cleanSearch($input);
  98. $input = self::unescape($input);
  99. $input = FreshRSS_BooleanSearch::unescapeLiteralParentheses($input);
  100. $this->raw_input = $input;
  101. $input = $this->parseNotEntryIds($input);
  102. $input = $this->parseNotFeedIds($input);
  103. $input = $this->parseNotLabelIds($input);
  104. $input = $this->parseNotLabelNames($input);
  105. $input = $this->parseNotPubdateSearch($input);
  106. $input = $this->parseNotDateSearch($input);
  107. $input = $this->parseNotIntitleSearch($input);
  108. $input = $this->parseNotIntextSearch($input);
  109. $input = $this->parseNotAuthorSearch($input);
  110. $input = $this->parseNotInurlSearch($input);
  111. $input = $this->parseNotTagsSearch($input);
  112. $input = $this->parseEntryIds($input);
  113. $input = $this->parseFeedIds($input);
  114. $input = $this->parseLabelIds($input);
  115. $input = $this->parseLabelNames($input);
  116. $input = $this->parsePubdateSearch($input);
  117. $input = $this->parseDateSearch($input);
  118. $input = $this->parseIntitleSearch($input);
  119. $input = $this->parseIntextSearch($input);
  120. $input = $this->parseAuthorSearch($input);
  121. $input = $this->parseInurlSearch($input);
  122. $input = $this->parseTagsSearch($input);
  123. $input = $this->parseQuotedSearch($input);
  124. $input = $this->parseNotSearch($input);
  125. $this->parseSearch($input);
  126. }
  127. #[\Override]
  128. public function __toString(): string {
  129. return $this->getRawInput();
  130. }
  131. public function getRawInput(): string {
  132. return $this->raw_input;
  133. }
  134. /** @return list<string>|null */
  135. public function getEntryIds(): ?array {
  136. return $this->entry_ids;
  137. }
  138. /** @return list<string>|null */
  139. public function getNotEntryIds(): ?array {
  140. return $this->not_entry_ids;
  141. }
  142. /** @return list<int>|null */
  143. public function getFeedIds(): ?array {
  144. return $this->feed_ids;
  145. }
  146. /** @return list<int>|null */
  147. public function getNotFeedIds(): ?array {
  148. return $this->not_feed_ids;
  149. }
  150. /** @return list<int>|'*'|null */
  151. public function getLabelIds(): array|string|null {
  152. return $this->label_ids;
  153. }
  154. /** @return list<int>|'*'|null */
  155. public function getNotLabelIds(): array|string|null {
  156. return $this->not_label_ids;
  157. }
  158. /** @return list<string>|null */
  159. public function getLabelNames(): ?array {
  160. return $this->label_names;
  161. }
  162. /** @return list<string>|null */
  163. public function getNotLabelNames(): ?array {
  164. return $this->not_label_names;
  165. }
  166. /** @return list<string>|null */
  167. public function getIntitle(): ?array {
  168. return $this->intitle;
  169. }
  170. /** @return list<string>|null */
  171. public function getIntitleRegex(): ?array {
  172. return $this->intitle_regex;
  173. }
  174. /** @return list<string>|null */
  175. public function getNotIntitle(): ?array {
  176. return $this->not_intitle;
  177. }
  178. /** @return list<string>|null */
  179. public function getNotIntitleRegex(): ?array {
  180. return $this->not_intitle_regex;
  181. }
  182. /** @return list<string>|null */
  183. public function getIntext(): ?array {
  184. return $this->intext;
  185. }
  186. /** @return list<string>|null */
  187. public function getIntextRegex(): ?array {
  188. return $this->intext_regex;
  189. }
  190. /** @return list<string>|null */
  191. public function getNotIntext(): ?array {
  192. return $this->not_intext;
  193. }
  194. /** @return list<string>|null */
  195. public function getNotIntextRegex(): ?array {
  196. return $this->not_intext_regex;
  197. }
  198. public function getMinDate(): ?int {
  199. return $this->min_date ?: null;
  200. }
  201. public function getNotMinDate(): ?int {
  202. return $this->not_min_date ?: null;
  203. }
  204. public function setMinDate(int $value): void {
  205. $this->min_date = $value;
  206. }
  207. public function getMaxDate(): ?int {
  208. return $this->max_date ?: null;
  209. }
  210. public function getNotMaxDate(): ?int {
  211. return $this->not_max_date ?: null;
  212. }
  213. public function setMaxDate(int $value): void {
  214. $this->max_date = $value;
  215. }
  216. public function getMinPubdate(): ?int {
  217. return $this->min_pubdate ?: null;
  218. }
  219. public function getNotMinPubdate(): ?int {
  220. return $this->not_min_pubdate ?: null;
  221. }
  222. public function getMaxPubdate(): ?int {
  223. return $this->max_pubdate ?: null;
  224. }
  225. public function getNotMaxPubdate(): ?int {
  226. return $this->not_max_pubdate ?: null;
  227. }
  228. /** @return list<string>|null */
  229. public function getInurl(): ?array {
  230. return $this->inurl;
  231. }
  232. /** @return list<string>|null */
  233. public function getInurlRegex(): ?array {
  234. return $this->inurl_regex;
  235. }
  236. /** @return list<string>|null */
  237. public function getNotInurl(): ?array {
  238. return $this->not_inurl;
  239. }
  240. /** @return list<string>|null */
  241. public function getNotInurlRegex(): ?array {
  242. return $this->not_inurl_regex;
  243. }
  244. /** @return list<string>|null */
  245. public function getAuthor(): ?array {
  246. return $this->author;
  247. }
  248. /** @return list<string>|null */
  249. public function getAuthorRegex(): ?array {
  250. return $this->author_regex;
  251. }
  252. /** @return list<string>|null */
  253. public function getNotAuthor(): ?array {
  254. return $this->not_author;
  255. }
  256. /** @return list<string>|null */
  257. public function getNotAuthorRegex(): ?array {
  258. return $this->not_author_regex;
  259. }
  260. /** @return list<string>|null */
  261. public function getTags(): ?array {
  262. return $this->tags;
  263. }
  264. /** @return list<string>|null */
  265. public function getTagsRegex(): ?array {
  266. return $this->tags_regex;
  267. }
  268. /** @return list<string>|null */
  269. public function getNotTags(): ?array {
  270. return $this->not_tags;
  271. }
  272. /** @return list<string>|null */
  273. public function getNotTagsRegex(): ?array {
  274. return $this->not_tags_regex;
  275. }
  276. /** @return list<string>|null */
  277. public function getSearch(): ?array {
  278. return $this->search;
  279. }
  280. /** @return list<string>|null */
  281. public function getSearchRegex(): ?array {
  282. return $this->search_regex;
  283. }
  284. /** @return list<string>|null */
  285. public function getNotSearch(): ?array {
  286. return $this->not_search;
  287. }
  288. /** @return list<string>|null */
  289. public function getNotSearchRegex(): ?array {
  290. return $this->not_search_regex;
  291. }
  292. /**
  293. * @param list<string>|null $anArray
  294. * @return list<string>
  295. */
  296. private static function removeEmptyValues(?array $anArray): array {
  297. return empty($anArray) ? [] : array_values(array_filter($anArray, static fn(string $value) => $value !== ''));
  298. }
  299. /**
  300. * @param list<string>|string $value
  301. * @return ($value is string ? string : list<string>)
  302. */
  303. private static function decodeSpaces(array|string $value): array|string {
  304. if (is_array($value)) {
  305. foreach ($value as &$val) {
  306. $val = self::decodeSpaces($val);
  307. }
  308. } else {
  309. $value = trim(str_replace('+', ' ', $value));
  310. }
  311. return $value;
  312. }
  313. /**
  314. * @param list<string> $strings
  315. * @return list<string>
  316. */
  317. private static function htmlspecialchars_decodes(array $strings): array {
  318. return array_map(static fn(string $s) => htmlspecialchars_decode($s, ENT_QUOTES), $strings);
  319. }
  320. /**
  321. * Parse the search string to find entry (article) IDs.
  322. */
  323. private function parseEntryIds(string $input): string {
  324. if (preg_match_all('/\\be:(?P<search>[0-9,]*)/', $input, $matches)) {
  325. $input = str_replace($matches[0], '', $input);
  326. $ids_lists = $matches['search'];
  327. $this->entry_ids = [];
  328. foreach ($ids_lists as $ids_list) {
  329. $entry_ids = explode(',', $ids_list);
  330. $entry_ids = self::removeEmptyValues($entry_ids);
  331. if (!empty($entry_ids)) {
  332. $this->entry_ids = array_merge($this->entry_ids, $entry_ids);
  333. }
  334. }
  335. }
  336. return $input;
  337. }
  338. private function parseNotEntryIds(string $input): string {
  339. if (preg_match_all('/(?<=[\\s(]|^)[!-]e:(?P<search>[0-9,]*)/', $input, $matches)) {
  340. $input = str_replace($matches[0], '', $input);
  341. $ids_lists = $matches['search'];
  342. $this->not_entry_ids = [];
  343. foreach ($ids_lists as $ids_list) {
  344. $entry_ids = explode(',', $ids_list);
  345. $entry_ids = self::removeEmptyValues($entry_ids);
  346. if (!empty($entry_ids)) {
  347. $this->not_entry_ids = array_merge($this->not_entry_ids, $entry_ids);
  348. }
  349. }
  350. }
  351. return $input;
  352. }
  353. private function parseFeedIds(string $input): string {
  354. if (preg_match_all('/\\bf:(?P<search>[0-9,]*)/', $input, $matches)) {
  355. $input = str_replace($matches[0], '', $input);
  356. $ids_lists = $matches['search'];
  357. $this->feed_ids = [];
  358. foreach ($ids_lists as $ids_list) {
  359. $feed_ids = explode(',', $ids_list);
  360. $feed_ids = self::removeEmptyValues($feed_ids);
  361. /** @var list<int> $feed_ids */
  362. $feed_ids = array_map('intval', $feed_ids);
  363. if (!empty($feed_ids)) {
  364. $this->feed_ids = array_merge($this->feed_ids, $feed_ids);
  365. }
  366. }
  367. }
  368. return $input;
  369. }
  370. private function parseNotFeedIds(string $input): string {
  371. if (preg_match_all('/(?<=[\\s(]|^)[!-]f:(?P<search>[0-9,]*)/', $input, $matches)) {
  372. $input = str_replace($matches[0], '', $input);
  373. $ids_lists = $matches['search'];
  374. $this->not_feed_ids = [];
  375. foreach ($ids_lists as $ids_list) {
  376. $feed_ids = explode(',', $ids_list);
  377. $feed_ids = self::removeEmptyValues($feed_ids);
  378. /** @var list<int> $feed_ids */
  379. $feed_ids = array_map('intval', $feed_ids);
  380. if (!empty($feed_ids)) {
  381. $this->not_feed_ids = array_merge($this->not_feed_ids, $feed_ids);
  382. }
  383. }
  384. }
  385. return $input;
  386. }
  387. /**
  388. * Parse the search string to find tags (labels) IDs.
  389. */
  390. private function parseLabelIds(string $input): string {
  391. if (preg_match_all('/\\b[lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  392. $input = str_replace($matches[0], '', $input);
  393. $ids_lists = $matches['search'];
  394. $this->label_ids = [];
  395. foreach ($ids_lists as $ids_list) {
  396. if ($ids_list === '*') {
  397. $this->label_ids = '*';
  398. break;
  399. }
  400. $label_ids = explode(',', $ids_list);
  401. $label_ids = self::removeEmptyValues($label_ids);
  402. /** @var list<int> $label_ids */
  403. $label_ids = array_map('intval', $label_ids);
  404. if (!empty($label_ids)) {
  405. $this->label_ids = array_merge($this->label_ids, $label_ids);
  406. }
  407. }
  408. }
  409. return $input;
  410. }
  411. private function parseNotLabelIds(string $input): string {
  412. if (preg_match_all('/(?<=[\\s(]|^)[!-][lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  413. $input = str_replace($matches[0], '', $input);
  414. $ids_lists = $matches['search'];
  415. $this->not_label_ids = [];
  416. foreach ($ids_lists as $ids_list) {
  417. if ($ids_list === '*') {
  418. $this->not_label_ids = '*';
  419. break;
  420. }
  421. $label_ids = explode(',', $ids_list);
  422. $label_ids = self::removeEmptyValues($label_ids);
  423. /** @var list<int> $label_ids */
  424. $label_ids = array_map('intval', $label_ids);
  425. if (!empty($label_ids)) {
  426. $this->not_label_ids = array_merge($this->not_label_ids, $label_ids);
  427. }
  428. }
  429. }
  430. return $input;
  431. }
  432. /**
  433. * Parse the search string to find tags (labels) names.
  434. */
  435. private function parseLabelNames(string $input): string {
  436. $names_lists = [];
  437. if (preg_match_all('/\\blabels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  438. $names_lists = $matches['search'];
  439. $input = str_replace($matches[0], '', $input);
  440. }
  441. if (preg_match_all('/\\blabels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  442. $names_lists = array_merge($names_lists, $matches['search']);
  443. $input = str_replace($matches[0], '', $input);
  444. }
  445. if (!empty($names_lists)) {
  446. $this->label_names = [];
  447. foreach ($names_lists as $names_list) {
  448. $names_array = explode(',', $names_list);
  449. $names_array = self::removeEmptyValues($names_array);
  450. if (!empty($names_array)) {
  451. $this->label_names = array_merge($this->label_names, $names_array);
  452. }
  453. }
  454. }
  455. return $input;
  456. }
  457. /**
  458. * Parse the search string to find tags (labels) names to exclude.
  459. */
  460. private function parseNotLabelNames(string $input): string {
  461. $names_lists = [];
  462. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  463. $names_lists = $matches['search'];
  464. $input = str_replace($matches[0], '', $input);
  465. }
  466. if (preg_match_all('/(?<=[\\s(]|^)[!-]labels?:(?P<search>[^\\s"]*)/', $input, $matches)) {
  467. $names_lists = array_merge($names_lists, $matches['search']);
  468. $input = str_replace($matches[0], '', $input);
  469. }
  470. if (!empty($names_lists)) {
  471. $this->not_label_names = [];
  472. foreach ($names_lists as $names_list) {
  473. $names_array = explode(',', $names_list);
  474. $names_array = self::removeEmptyValues($names_array);
  475. if (!empty($names_array)) {
  476. $this->not_label_names = array_merge($this->not_label_names, $names_array);
  477. }
  478. }
  479. }
  480. return $input;
  481. }
  482. /**
  483. * Parse the search string to find intitle keyword and the search related to it.
  484. */
  485. private function parseIntitleSearch(string $input): string {
  486. if (preg_match_all('#\\bintitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  487. $this->intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  488. $input = str_replace($matches[0], '', $input);
  489. }
  490. if (preg_match_all('/\\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  491. $this->intitle = $matches['search'];
  492. $input = str_replace($matches[0], '', $input);
  493. }
  494. if (preg_match_all('/\\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  495. $this->intitle = array_merge($this->intitle ?? [], $matches['search']);
  496. $input = str_replace($matches[0], '', $input);
  497. }
  498. $this->intitle = self::removeEmptyValues($this->intitle);
  499. if (empty($this->intitle)) {
  500. $this->intitle = null;
  501. }
  502. return $input;
  503. }
  504. private function parseNotIntitleSearch(string $input): string {
  505. if (preg_match_all('#(?<=[\\s(]|^)[!-]intitle:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  506. $this->not_intitle_regex = self::htmlspecialchars_decodes($matches['search']);
  507. $input = str_replace($matches[0], '', $input);
  508. }
  509. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  510. $this->not_intitle = $matches['search'];
  511. $input = str_replace($matches[0], '', $input);
  512. }
  513. if (preg_match_all('/(?<=[\\s(]|^)[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  514. $this->not_intitle = array_merge($this->not_intitle ?? [], $matches['search']);
  515. $input = str_replace($matches[0], '', $input);
  516. }
  517. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  518. if (empty($this->not_intitle)) {
  519. $this->not_intitle = null;
  520. }
  521. return $input;
  522. }
  523. /**
  524. * Parse the search string to find intext keyword and the search related to it.
  525. */
  526. private function parseIntextSearch(string $input): string {
  527. if (preg_match_all('#\\bintext:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  528. $this->intext_regex = self::htmlspecialchars_decodes($matches['search']);
  529. $input = str_replace($matches[0], '', $input);
  530. }
  531. if (preg_match_all('/\\bintext:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  532. $this->intext = $matches['search'];
  533. $input = str_replace($matches[0], '', $input);
  534. }
  535. if (preg_match_all('/\\bintext:(?P<search>[^\s"]*)/', $input, $matches)) {
  536. $this->intext = array_merge($this->intext ?? [], $matches['search']);
  537. $input = str_replace($matches[0], '', $input);
  538. }
  539. $this->intext = self::removeEmptyValues($this->intext);
  540. if (empty($this->intext)) {
  541. $this->intext = null;
  542. }
  543. return $input;
  544. }
  545. private function parseNotIntextSearch(string $input): string {
  546. if (preg_match_all('#(?<=[\\s(]|^)[!-]intext:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  547. $this->not_intext_regex = self::htmlspecialchars_decodes($matches['search']);
  548. $input = str_replace($matches[0], '', $input);
  549. }
  550. if (preg_match_all('/(?<=[\\s(]|^)[!-]intext:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  551. $this->not_intext = $matches['search'];
  552. $input = str_replace($matches[0], '', $input);
  553. }
  554. if (preg_match_all('/(?<=[\\s(]|^)[!-]intext:(?P<search>[^\s"]*)/', $input, $matches)) {
  555. $this->not_intext = array_merge($this->not_intext ?? [], $matches['search']);
  556. $input = str_replace($matches[0], '', $input);
  557. }
  558. $this->not_intext = self::removeEmptyValues($this->not_intext);
  559. if (empty($this->not_intext)) {
  560. $this->not_intext = null;
  561. }
  562. return $input;
  563. }
  564. /**
  565. * Parse the search string to find author keyword and the search related to it.
  566. * The search is the first word following the keyword except when using
  567. * a delimiter. Supported delimiters are single quote (') and double quotes (").
  568. */
  569. private function parseAuthorSearch(string $input): string {
  570. if (preg_match_all('#\\bauthor:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  571. $this->author_regex = self::htmlspecialchars_decodes($matches['search']);
  572. $input = str_replace($matches[0], '', $input);
  573. }
  574. if (preg_match_all('/\\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  575. $this->author = $matches['search'];
  576. $input = str_replace($matches[0], '', $input);
  577. }
  578. if (preg_match_all('/\\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  579. $this->author = array_merge($this->author ?? [], $matches['search']);
  580. $input = str_replace($matches[0], '', $input);
  581. }
  582. $this->author = self::removeEmptyValues($this->author);
  583. if (empty($this->author)) {
  584. $this->author = null;
  585. }
  586. return $input;
  587. }
  588. private function parseNotAuthorSearch(string $input): string {
  589. if (preg_match_all('#(?<=[\\s(]|^)[!-]author:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  590. $this->not_author_regex = self::htmlspecialchars_decodes($matches['search']);
  591. $input = str_replace($matches[0], '', $input);
  592. }
  593. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  594. $this->not_author = $matches['search'];
  595. $input = str_replace($matches[0], '', $input);
  596. }
  597. if (preg_match_all('/(?<=[\\s(]|^)[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  598. $this->not_author = array_merge($this->not_author ?? [], $matches['search']);
  599. $input = str_replace($matches[0], '', $input);
  600. }
  601. $this->not_author = self::removeEmptyValues($this->not_author);
  602. if (empty($this->not_author)) {
  603. $this->not_author = null;
  604. }
  605. return $input;
  606. }
  607. /**
  608. * Parse the search string to find inurl keyword and the search related to it.
  609. * The search is the first word following the keyword.
  610. */
  611. private function parseInurlSearch(string $input): string {
  612. if (preg_match_all('#\\binurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  613. $this->inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  614. $input = str_replace($matches[0], '', $input);
  615. }
  616. if (preg_match_all('/\\binurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  617. $this->inurl = $matches['search'];
  618. $input = str_replace($matches[0], '', $input);
  619. }
  620. if (preg_match_all('/\\binurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  621. $this->inurl = $matches['search'];
  622. $input = str_replace($matches[0], '', $input);
  623. }
  624. $this->inurl = self::removeEmptyValues($this->inurl);
  625. if (empty($this->inurl)) {
  626. $this->inurl = null;
  627. }
  628. return $input;
  629. }
  630. private function parseNotInurlSearch(string $input): string {
  631. if (preg_match_all('#(?<=[\\s(]|^)[!-]inurl:(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  632. $this->not_inurl_regex = self::htmlspecialchars_decodes($matches['search']);
  633. $input = str_replace($matches[0], '', $input);
  634. }
  635. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  636. $this->not_inurl = $matches['search'];
  637. $input = str_replace($matches[0], '', $input);
  638. }
  639. if (preg_match_all('/(?<=[\\s(]|^)[!-]inurl:(?P<search>[^\\s]*)/', $input, $matches)) {
  640. $this->not_inurl = $matches['search'];
  641. $input = str_replace($matches[0], '', $input);
  642. }
  643. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  644. if (empty($this->not_inurl)) {
  645. $this->not_inurl = null;
  646. }
  647. return $input;
  648. }
  649. /**
  650. * Parse the search string to find date keyword and the search related to it.
  651. * The search is the first word following the keyword.
  652. */
  653. private function parseDateSearch(string $input): string {
  654. if (preg_match_all('/\\bdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  655. $input = str_replace($matches[0], '', $input);
  656. $dates = self::removeEmptyValues($matches['search']);
  657. if (!empty($dates[0])) {
  658. [$this->min_date, $this->max_date] = parseDateInterval($dates[0]);
  659. }
  660. }
  661. return $input;
  662. }
  663. private function parseNotDateSearch(string $input): string {
  664. if (preg_match_all('/(?<=[\\s(]|^)[!-]date:(?P<search>[^\\s]*)/', $input, $matches)) {
  665. $input = str_replace($matches[0], '', $input);
  666. $dates = self::removeEmptyValues($matches['search']);
  667. if (!empty($dates[0])) {
  668. [$this->not_min_date, $this->not_max_date] = parseDateInterval($dates[0]);
  669. }
  670. }
  671. return $input;
  672. }
  673. /**
  674. * Parse the search string to find pubdate keyword and the search related to it.
  675. * The search is the first word following the keyword.
  676. */
  677. private function parsePubdateSearch(string $input): string {
  678. if (preg_match_all('/\\bpubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  679. $input = str_replace($matches[0], '', $input);
  680. $dates = self::removeEmptyValues($matches['search']);
  681. if (!empty($dates[0])) {
  682. [$this->min_pubdate, $this->max_pubdate] = parseDateInterval($dates[0]);
  683. }
  684. }
  685. return $input;
  686. }
  687. private function parseNotPubdateSearch(string $input): string {
  688. if (preg_match_all('/(?<=[\\s(]|^)[!-]pubdate:(?P<search>[^\\s]*)/', $input, $matches)) {
  689. $input = str_replace($matches[0], '', $input);
  690. $dates = self::removeEmptyValues($matches['search']);
  691. if (!empty($dates[0])) {
  692. [$this->not_min_pubdate, $this->not_max_pubdate] = parseDateInterval($dates[0]);
  693. }
  694. }
  695. return $input;
  696. }
  697. /**
  698. * Parse the search string to find tags keyword (# followed by a word)
  699. * and the search related to it.
  700. * The search is the first word following the #.
  701. */
  702. private function parseTagsSearch(string $input): string {
  703. if (preg_match_all('%#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  704. $this->tags_regex = self::htmlspecialchars_decodes($matches['search']);
  705. $input = str_replace($matches[0], '', $input);
  706. }
  707. if (preg_match_all('/#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  708. $this->tags = $matches['search'];
  709. $input = str_replace($matches[0], '', $input);
  710. }
  711. if (preg_match_all('/#(?P<search>[^\\s]+)/', $input, $matches)) {
  712. $this->tags = $matches['search'];
  713. $input = str_replace($matches[0], '', $input);
  714. }
  715. $this->tags = self::removeEmptyValues($this->tags);
  716. if (empty($this->tags)) {
  717. $this->tags = null;
  718. } else {
  719. $this->tags = self::decodeSpaces($this->tags);
  720. }
  721. return $input;
  722. }
  723. private function parseNotTagsSearch(string $input): string {
  724. if (preg_match_all('%(?<=[\\s(]|^)[!-]#(?P<search>/.*?(?<!\\\\)/[im]*)%', $input, $matches)) {
  725. $this->not_tags_regex = self::htmlspecialchars_decodes($matches['search']);
  726. $input = str_replace($matches[0], '', $input);
  727. }
  728. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  729. $this->not_tags = $matches['search'];
  730. $input = str_replace($matches[0], '', $input);
  731. }
  732. if (preg_match_all('/(?<=[\\s(]|^)[!-]#(?P<search>[^\\s]+)/', $input, $matches)) {
  733. $this->not_tags = $matches['search'];
  734. $input = str_replace($matches[0], '', $input);
  735. }
  736. $this->not_tags = self::removeEmptyValues($this->not_tags);
  737. if (empty($this->not_tags)) {
  738. $this->not_tags = null;
  739. } else {
  740. $this->not_tags = self::decodeSpaces($this->not_tags);
  741. }
  742. return $input;
  743. }
  744. /**
  745. * Parse the search string to find search values.
  746. * Every word is a distinct search value using a delimiter.
  747. * Supported delimiters are single quote (') and double quotes (") and regex (/).
  748. */
  749. private function parseQuotedSearch(string $input): string {
  750. $input = self::cleanSearch($input);
  751. if ($input === '') {
  752. return '';
  753. }
  754. if (preg_match_all('#(?<=[\\s(]|^)(?<![!-\\\\])(?P<search>/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  755. $this->search_regex = self::htmlspecialchars_decodes($matches['search']);
  756. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  757. $input = str_replace($matches[0], '', $input);
  758. }
  759. if (preg_match_all('/(?<=[\\s(]|^)(?<![!-\\\\])(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  760. $this->search = $matches['search'];
  761. //TODO: Replace all those str_replace with PREG_OFFSET_CAPTURE
  762. $input = str_replace($matches[0], '', $input);
  763. }
  764. return $input;
  765. }
  766. /**
  767. * Parse the search string to find search values.
  768. * Every word is a distinct search value.
  769. */
  770. private function parseSearch(string $input): string {
  771. $input = self::cleanSearch($input);
  772. if ($input === '') {
  773. return '';
  774. }
  775. if (is_array($this->search)) {
  776. $this->search = array_merge($this->search, explode(' ', $input));
  777. } else {
  778. $this->search = explode(' ', $input);
  779. }
  780. return $input;
  781. }
  782. private function parseNotSearch(string $input): string {
  783. $input = self::cleanSearch($input);
  784. if ($input === '') {
  785. return '';
  786. }
  787. if (preg_match_all('#(?<=[\\s(]|^)[!-](?P<search>(?<!\\\\)/.*?(?<!\\\\)/[im]*)#', $input, $matches)) {
  788. $this->not_search_regex = self::htmlspecialchars_decodes($matches['search']);
  789. $input = str_replace($matches[0], '', $input);
  790. }
  791. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  792. $this->not_search = $matches['search'];
  793. $input = str_replace($matches[0], '', $input);
  794. }
  795. $input = self::cleanSearch($input);
  796. if ($input === '') {
  797. return '';
  798. }
  799. if (preg_match_all('/(?<=[\\s(]|^)[!-](?P<search>[^\\s]+)/', $input, $matches)) {
  800. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : [], $matches['search']);
  801. $input = str_replace($matches[0], '', $input);
  802. }
  803. $this->not_search = self::removeEmptyValues($this->not_search);
  804. return $input;
  805. }
  806. /**
  807. * Remove all unnecessary spaces in the search
  808. */
  809. private static function cleanSearch(string $input): string {
  810. $input = preg_replace('/\\s+/', ' ', $input);
  811. if (!is_string($input)) {
  812. return '';
  813. }
  814. return trim($input);
  815. }
  816. /** Remove escaping backslashes for parenthesis logic */
  817. private static function unescape(string $input): string {
  818. return str_replace(['\\(', '\\)'], ['(', ')'], $input);
  819. }
  820. }