Search.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. <?php
  2. require_once(LIB_PATH . '/lib_date.php');
  3. /**
  4. * Contains a search from the search form.
  5. *
  6. * It allows to extract meaningful bits of the search and store them in a
  7. * convenient object
  8. */
  9. class FreshRSS_Search {
  10. // This contains the user input string
  11. private $raw_input = '';
  12. // The following properties are extracted from the raw input
  13. private $entry_ids;
  14. private $feed_ids;
  15. private $label_ids;
  16. private $label_names;
  17. private $intitle;
  18. private $min_date;
  19. private $max_date;
  20. private $min_pubdate;
  21. private $max_pubdate;
  22. private $inurl;
  23. private $author;
  24. private $tags;
  25. private $search;
  26. private $not_entry_ids;
  27. private $not_feed_ids;
  28. private $not_label_ids;
  29. private $not_label_names;
  30. private $not_intitle;
  31. private $not_min_date;
  32. private $not_max_date;
  33. private $not_min_pubdate;
  34. private $not_max_pubdate;
  35. private $not_inurl;
  36. private $not_author;
  37. private $not_tags;
  38. private $not_search;
  39. /**
  40. * @param string|null $input
  41. */
  42. public function __construct($input) {
  43. if ($input == '') {
  44. return;
  45. }
  46. $this->raw_input = $input;
  47. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  48. $input = $this->parseNotEntryIds($input);
  49. $input = $this->parseNotFeedIds($input);
  50. $input = $this->parseNotLabelIds($input);
  51. $input = $this->parseNotLabelNames($input);
  52. $input = $this->parseNotPubdateSearch($input);
  53. $input = $this->parseNotDateSearch($input);
  54. $input = $this->parseNotIntitleSearch($input);
  55. $input = $this->parseNotAuthorSearch($input);
  56. $input = $this->parseNotInurlSearch($input);
  57. $input = $this->parseNotTagsSearch($input);
  58. $input = $this->parseEntryIds($input);
  59. $input = $this->parseFeedIds($input);
  60. $input = $this->parseLabelIds($input);
  61. $input = $this->parseLabelNames($input);
  62. $input = $this->parsePubdateSearch($input);
  63. $input = $this->parseDateSearch($input);
  64. $input = $this->parseIntitleSearch($input);
  65. $input = $this->parseAuthorSearch($input);
  66. $input = $this->parseInurlSearch($input);
  67. $input = $this->parseTagsSearch($input);
  68. $input = $this->parseNotSearch($input);
  69. $this->parseSearch($input);
  70. }
  71. public function __toString() {
  72. return $this->getRawInput();
  73. }
  74. public function getRawInput() {
  75. return $this->raw_input;
  76. }
  77. public function getEntryIds() {
  78. return $this->entry_ids;
  79. }
  80. public function getNotEntryIds() {
  81. return $this->not_entry_ids;
  82. }
  83. public function getFeedIds() {
  84. return $this->feed_ids;
  85. }
  86. public function getNotFeedIds() {
  87. return $this->not_feed_ids;
  88. }
  89. public function getLabelIds() {
  90. return $this->label_ids;
  91. }
  92. public function getNotlabelIds() {
  93. return $this->not_label_ids;
  94. }
  95. public function getLabelNames() {
  96. return $this->label_names;
  97. }
  98. public function getNotlabelNames() {
  99. return $this->not_label_names;
  100. }
  101. public function getIntitle() {
  102. return $this->intitle;
  103. }
  104. public function getNotIntitle() {
  105. return $this->not_intitle;
  106. }
  107. public function getMinDate() {
  108. return $this->min_date;
  109. }
  110. public function getNotMinDate() {
  111. return $this->not_min_date;
  112. }
  113. public function setMinDate($value) {
  114. return $this->min_date = $value;
  115. }
  116. public function getMaxDate() {
  117. return $this->max_date;
  118. }
  119. public function getNotMaxDate() {
  120. return $this->not_max_date;
  121. }
  122. public function setMaxDate($value) {
  123. return $this->max_date = $value;
  124. }
  125. public function getMinPubdate() {
  126. return $this->min_pubdate;
  127. }
  128. public function getNotMinPubdate() {
  129. return $this->not_min_pubdate;
  130. }
  131. public function getMaxPubdate() {
  132. return $this->max_pubdate;
  133. }
  134. public function getNotMaxPubdate() {
  135. return $this->not_max_pubdate;
  136. }
  137. public function getInurl() {
  138. return $this->inurl;
  139. }
  140. public function getNotInurl() {
  141. return $this->not_inurl;
  142. }
  143. public function getAuthor() {
  144. return $this->author;
  145. }
  146. public function getNotAuthor() {
  147. return $this->not_author;
  148. }
  149. public function getTags() {
  150. return $this->tags;
  151. }
  152. public function getNotTags() {
  153. return $this->not_tags;
  154. }
  155. public function getSearch() {
  156. return $this->search;
  157. }
  158. public function getNotSearch() {
  159. return $this->not_search;
  160. }
  161. private static function removeEmptyValues($anArray) {
  162. return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array();
  163. }
  164. private static function decodeSpaces($value) {
  165. if (is_array($value)) {
  166. for ($i = count($value) - 1; $i >= 0; $i--) {
  167. $value[$i] = self::decodeSpaces($value[$i]);
  168. }
  169. } else {
  170. $value = trim(str_replace('+', ' ', $value));
  171. }
  172. return $value;
  173. }
  174. /**
  175. * Parse the search string to find entry (article) IDs.
  176. */
  177. private function parseEntryIds(string $input): string {
  178. if (preg_match_all('/\be:(?P<search>[0-9,]*)/', $input, $matches)) {
  179. $input = str_replace($matches[0], '', $input);
  180. $ids_lists = $matches['search'];
  181. $this->entry_ids = [];
  182. foreach ($ids_lists as $ids_list) {
  183. $entry_ids = explode(',', $ids_list);
  184. $entry_ids = self::removeEmptyValues($entry_ids);
  185. if (!empty($entry_ids)) {
  186. $this->entry_ids[] = $entry_ids;
  187. }
  188. }
  189. }
  190. return $input;
  191. }
  192. private function parseNotEntryIds(string $input): string {
  193. if (preg_match_all('/[!-]e:(?P<search>[0-9,]*)/', $input, $matches)) {
  194. $input = str_replace($matches[0], '', $input);
  195. $ids_lists = $matches['search'];
  196. $this->not_entry_ids = [];
  197. foreach ($ids_lists as $ids_list) {
  198. $entry_ids = explode(',', $ids_list);
  199. $entry_ids = self::removeEmptyValues($entry_ids);
  200. if (!empty($entry_ids)) {
  201. $this->not_entry_ids[] = $entry_ids;
  202. }
  203. }
  204. }
  205. return $input;
  206. }
  207. private function parseFeedIds(string $input): string {
  208. if (preg_match_all('/\bf:(?P<search>[0-9,]*)/', $input, $matches)) {
  209. $input = str_replace($matches[0], '', $input);
  210. $ids_lists = $matches['search'];
  211. $this->feed_ids = [];
  212. foreach ($ids_lists as $ids_list) {
  213. $feed_ids = explode(',', $ids_list);
  214. $feed_ids = self::removeEmptyValues($feed_ids);
  215. if (!empty($feed_ids)) {
  216. $this->feed_ids[] = $feed_ids;
  217. }
  218. }
  219. }
  220. return $input;
  221. }
  222. private function parseNotFeedIds(string $input): string {
  223. if (preg_match_all('/[!-]f:(?P<search>[0-9,]*)/', $input, $matches)) {
  224. $input = str_replace($matches[0], '', $input);
  225. $ids_lists = $matches['search'];
  226. $this->not_feed_ids = [];
  227. foreach ($ids_lists as $ids_list) {
  228. $feed_ids = explode(',', $ids_list);
  229. $feed_ids = self::removeEmptyValues($feed_ids);
  230. if (!empty($feed_ids)) {
  231. $this->not_feed_ids[] = $feed_ids;
  232. }
  233. }
  234. }
  235. return $input;
  236. }
  237. /**
  238. * Parse the search string to find tags (labels) IDs.
  239. */
  240. private function parseLabelIds(string $input): string {
  241. if (preg_match_all('/\b[lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  242. $input = str_replace($matches[0], '', $input);
  243. $ids_lists = $matches['search'];
  244. $this->label_ids = [];
  245. foreach ($ids_lists as $ids_list) {
  246. if ($ids_list === '*') {
  247. $this->label_ids[] = '*';
  248. break;
  249. }
  250. $label_ids = explode(',', $ids_list);
  251. $label_ids = self::removeEmptyValues($label_ids);
  252. if (!empty($label_ids)) {
  253. $this->label_ids[] = $label_ids;
  254. }
  255. }
  256. }
  257. return $input;
  258. }
  259. private function parseNotLabelIds(string $input): string {
  260. if (preg_match_all('/[!-][lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  261. $input = str_replace($matches[0], '', $input);
  262. $ids_lists = $matches['search'];
  263. $this->not_label_ids = [];
  264. foreach ($ids_lists as $ids_list) {
  265. if ($ids_list === '*') {
  266. $this->not_label_ids[] = '*';
  267. break;
  268. }
  269. $label_ids = explode(',', $ids_list);
  270. $label_ids = self::removeEmptyValues($label_ids);
  271. if (!empty($label_ids)) {
  272. $this->not_label_ids[] = $label_ids;
  273. }
  274. }
  275. }
  276. return $input;
  277. }
  278. /**
  279. * Parse the search string to find tags (labels) names.
  280. */
  281. private function parseLabelNames(string $input): string {
  282. $names_lists = [];
  283. if (preg_match_all('/\blabels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  284. $names_lists = $matches['search'];
  285. $input = str_replace($matches[0], '', $input);
  286. }
  287. if (preg_match_all('/\blabels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  288. $names_lists = array_merge($names_lists, $matches['search']);
  289. $input = str_replace($matches[0], '', $input);
  290. }
  291. if (!empty($names_lists)) {
  292. $this->label_names = [];
  293. foreach ($names_lists as $names_list) {
  294. $names_array = explode(',', $names_list);
  295. $names_array = self::removeEmptyValues($names_array);
  296. if (!empty($names_array)) {
  297. $this->label_names[] = $names_array;
  298. }
  299. }
  300. }
  301. return $input;
  302. }
  303. /**
  304. * Parse the search string to find tags (labels) names to exclude.
  305. */
  306. private function parseNotLabelNames(string $input): string {
  307. $names_lists = [];
  308. if (preg_match_all('/[!-]labels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  309. $names_lists = $matches['search'];
  310. $input = str_replace($matches[0], '', $input);
  311. }
  312. if (preg_match_all('/[!-]labels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  313. $names_lists = array_merge($names_lists, $matches['search']);
  314. $input = str_replace($matches[0], '', $input);
  315. }
  316. if (!empty($names_lists)) {
  317. $this->not_label_names = [];
  318. foreach ($names_lists as $names_list) {
  319. $names_array = explode(',', $names_list);
  320. $names_array = self::removeEmptyValues($names_array);
  321. if (!empty($names_array)) {
  322. $this->not_label_names[] = $names_array;
  323. }
  324. }
  325. }
  326. return $input;
  327. }
  328. /**
  329. * Parse the search string to find intitle keyword and the search related to it.
  330. * The search is the first word following the keyword.
  331. */
  332. private function parseIntitleSearch(string $input): string {
  333. if (preg_match_all('/\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  334. $this->intitle = $matches['search'];
  335. $input = str_replace($matches[0], '', $input);
  336. }
  337. if (preg_match_all('/\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  338. $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']);
  339. $input = str_replace($matches[0], '', $input);
  340. }
  341. $this->intitle = self::removeEmptyValues($this->intitle);
  342. return $input;
  343. }
  344. private function parseNotIntitleSearch(string $input): string {
  345. if (preg_match_all('/[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  346. $this->not_intitle = $matches['search'];
  347. $input = str_replace($matches[0], '', $input);
  348. }
  349. if (preg_match_all('/[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  350. $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']);
  351. $input = str_replace($matches[0], '', $input);
  352. }
  353. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  354. return $input;
  355. }
  356. /**
  357. * Parse the search string to find author keyword and the search related to it.
  358. * The search is the first word following the keyword except when using
  359. * a delimiter. Supported delimiters are single quote (') and double quotes (").
  360. */
  361. private function parseAuthorSearch(string $input): string {
  362. if (preg_match_all('/\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  363. $this->author = $matches['search'];
  364. $input = str_replace($matches[0], '', $input);
  365. }
  366. if (preg_match_all('/\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  367. $this->author = array_merge($this->author ? $this->author : array(), $matches['search']);
  368. $input = str_replace($matches[0], '', $input);
  369. }
  370. $this->author = self::removeEmptyValues($this->author);
  371. return $input;
  372. }
  373. private function parseNotAuthorSearch(string $input): string {
  374. if (preg_match_all('/[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  375. $this->not_author = $matches['search'];
  376. $input = str_replace($matches[0], '', $input);
  377. }
  378. if (preg_match_all('/[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  379. $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']);
  380. $input = str_replace($matches[0], '', $input);
  381. }
  382. $this->not_author = self::removeEmptyValues($this->not_author);
  383. return $input;
  384. }
  385. /**
  386. * Parse the search string to find inurl keyword and the search related to it.
  387. * The search is the first word following the keyword.
  388. */
  389. private function parseInurlSearch(string $input): string {
  390. if (preg_match_all('/\binurl:(?P<search>[^\s]*)/', $input, $matches)) {
  391. $this->inurl = $matches['search'];
  392. $input = str_replace($matches[0], '', $input);
  393. }
  394. $this->inurl = self::removeEmptyValues($this->inurl);
  395. return $input;
  396. }
  397. private function parseNotInurlSearch(string $input): string {
  398. if (preg_match_all('/[!-]inurl:(?P<search>[^\s]*)/', $input, $matches)) {
  399. $this->not_inurl = $matches['search'];
  400. $input = str_replace($matches[0], '', $input);
  401. }
  402. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  403. return $input;
  404. }
  405. /**
  406. * Parse the search string to find date keyword and the search related to it.
  407. * The search is the first word following the keyword.
  408. */
  409. private function parseDateSearch(string $input): string {
  410. if (preg_match_all('/\bdate:(?P<search>[^\s]*)/', $input, $matches)) {
  411. $input = str_replace($matches[0], '', $input);
  412. $dates = self::removeEmptyValues($matches['search']);
  413. if (!empty($dates[0])) {
  414. list($this->min_date, $this->max_date) = parseDateInterval($dates[0]);
  415. }
  416. }
  417. return $input;
  418. }
  419. private function parseNotDateSearch(string $input): string {
  420. if (preg_match_all('/[!-]date:(?P<search>[^\s]*)/', $input, $matches)) {
  421. $input = str_replace($matches[0], '', $input);
  422. $dates = self::removeEmptyValues($matches['search']);
  423. if (!empty($dates[0])) {
  424. list($this->not_min_date, $this->not_max_date) = parseDateInterval($dates[0]);
  425. }
  426. }
  427. return $input;
  428. }
  429. /**
  430. * Parse the search string to find pubdate keyword and the search related to it.
  431. * The search is the first word following the keyword.
  432. */
  433. private function parsePubdateSearch(string $input): string {
  434. if (preg_match_all('/\bpubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  435. $input = str_replace($matches[0], '', $input);
  436. $dates = self::removeEmptyValues($matches['search']);
  437. if (!empty($dates[0])) {
  438. list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]);
  439. }
  440. }
  441. return $input;
  442. }
  443. private function parseNotPubdateSearch(string $input): string {
  444. if (preg_match_all('/[!-]pubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  445. $input = str_replace($matches[0], '', $input);
  446. $dates = self::removeEmptyValues($matches['search']);
  447. if (!empty($dates[0])) {
  448. list($this->not_min_pubdate, $this->not_max_pubdate) = parseDateInterval($dates[0]);
  449. }
  450. }
  451. return $input;
  452. }
  453. /**
  454. * Parse the search string to find tags keyword (# followed by a word)
  455. * and the search related to it.
  456. * The search is the first word following the #.
  457. */
  458. private function parseTagsSearch(string $input): string {
  459. if (preg_match_all('/#(?P<search>[^\s]+)/', $input, $matches)) {
  460. $this->tags = $matches['search'];
  461. $input = str_replace($matches[0], '', $input);
  462. }
  463. $this->tags = self::removeEmptyValues($this->tags);
  464. $this->tags = self::decodeSpaces($this->tags);
  465. return $input;
  466. }
  467. private function parseNotTagsSearch(string $input): string {
  468. if (preg_match_all('/[!-]#(?P<search>[^\s]+)/', $input, $matches)) {
  469. $this->not_tags = $matches['search'];
  470. $input = str_replace($matches[0], '', $input);
  471. }
  472. $this->not_tags = self::removeEmptyValues($this->not_tags);
  473. $this->not_tags = self::decodeSpaces($this->not_tags);
  474. return $input;
  475. }
  476. /**
  477. * Parse the search string to find search values.
  478. * Every word is a distinct search value, except when using a delimiter.
  479. * Supported delimiters are single quote (') and double quotes (").
  480. * @return void
  481. */
  482. private function parseSearch(string $input) {
  483. $input = self::cleanSearch($input);
  484. if ($input == '') {
  485. return;
  486. }
  487. if (preg_match_all('/(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  488. $this->search = $matches['search'];
  489. $input = str_replace($matches[0], '', $input);
  490. }
  491. $input = self::cleanSearch($input);
  492. if ($input == '') {
  493. return;
  494. }
  495. if (is_array($this->search)) {
  496. $this->search = array_merge($this->search, explode(' ', $input));
  497. } else {
  498. $this->search = explode(' ', $input);
  499. }
  500. }
  501. private function parseNotSearch(string $input): string {
  502. $input = self::cleanSearch($input);
  503. if ($input == '') {
  504. return '';
  505. }
  506. if (preg_match_all('/[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  507. $this->not_search = $matches['search'];
  508. $input = str_replace($matches[0], '', $input);
  509. }
  510. if ($input == '') {
  511. return '';
  512. }
  513. if (preg_match_all('/[!-](?P<search>[^\s]+)/', $input, $matches)) {
  514. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']);
  515. $input = str_replace($matches[0], '', $input);
  516. }
  517. $this->not_search = self::removeEmptyValues($this->not_search);
  518. return $input;
  519. }
  520. /**
  521. * Remove all unnecessary spaces in the search
  522. */
  523. private static function cleanSearch(string $input): string {
  524. $input = preg_replace('/\s+/', ' ', $input);
  525. return trim($input);
  526. }
  527. }