Search.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. <?php
  2. require_once(LIB_PATH . '/lib_date.php');
  3. /**
  4. * Contains a search from the search form.
  5. *
  6. * It allows to extract meaningful bits of the search and store them in a
  7. * convenient object
  8. */
  9. class FreshRSS_Search {
  10. // This contains the user input string
  11. private $raw_input = '';
  12. // The following properties are extracted from the raw input
  13. private $entry_ids;
  14. private $feed_ids;
  15. private $label_ids;
  16. private $label_names;
  17. private $intitle;
  18. private $min_date;
  19. private $max_date;
  20. private $min_pubdate;
  21. private $max_pubdate;
  22. private $inurl;
  23. private $author;
  24. private $tags;
  25. private $search;
  26. private $not_entry_ids;
  27. private $not_feed_ids;
  28. private $not_label_ids;
  29. private $not_label_names;
  30. private $not_intitle;
  31. private $not_min_date;
  32. private $not_max_date;
  33. private $not_min_pubdate;
  34. private $not_max_pubdate;
  35. private $not_inurl;
  36. private $not_author;
  37. private $not_tags;
  38. private $not_search;
  39. public function __construct($input) {
  40. if ($input == '') {
  41. return;
  42. }
  43. $this->raw_input = $input;
  44. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  45. $input = $this->parseNotEntryIds($input);
  46. $input = $this->parseNotFeedIds($input);
  47. $input = $this->parseNotLabelIds($input);
  48. $input = $this->parseNotLabelNames($input);
  49. $input = $this->parseNotPubdateSearch($input);
  50. $input = $this->parseNotDateSearch($input);
  51. $input = $this->parseNotIntitleSearch($input);
  52. $input = $this->parseNotAuthorSearch($input);
  53. $input = $this->parseNotInurlSearch($input);
  54. $input = $this->parseNotTagsSearch($input);
  55. $input = $this->parseEntryIds($input);
  56. $input = $this->parseFeedIds($input);
  57. $input = $this->parseLabelIds($input);
  58. $input = $this->parseLabelNames($input);
  59. $input = $this->parsePubdateSearch($input);
  60. $input = $this->parseDateSearch($input);
  61. $input = $this->parseIntitleSearch($input);
  62. $input = $this->parseAuthorSearch($input);
  63. $input = $this->parseInurlSearch($input);
  64. $input = $this->parseTagsSearch($input);
  65. $input = $this->parseNotSearch($input);
  66. $input = $this->parseSearch($input);
  67. }
  68. public function __toString() {
  69. return $this->getRawInput();
  70. }
  71. public function getRawInput() {
  72. return $this->raw_input;
  73. }
  74. public function getEntryIds() {
  75. return $this->entry_ids;
  76. }
  77. public function getNotEntryIds() {
  78. return $this->not_entry_ids;
  79. }
  80. public function getFeedIds() {
  81. return $this->feed_ids;
  82. }
  83. public function getNotFeedIds() {
  84. return $this->not_feed_ids;
  85. }
  86. public function getLabelIds() {
  87. return $this->label_ids;
  88. }
  89. public function getNotlabelIds() {
  90. return $this->not_label_ids;
  91. }
  92. public function getLabelNames() {
  93. return $this->label_names;
  94. }
  95. public function getNotlabelNames() {
  96. return $this->not_label_names;
  97. }
  98. public function getIntitle() {
  99. return $this->intitle;
  100. }
  101. public function getNotIntitle() {
  102. return $this->not_intitle;
  103. }
  104. public function getMinDate() {
  105. return $this->min_date;
  106. }
  107. public function getNotMinDate() {
  108. return $this->not_min_date;
  109. }
  110. public function setMinDate($value) {
  111. return $this->min_date = $value;
  112. }
  113. public function getMaxDate() {
  114. return $this->max_date;
  115. }
  116. public function getNotMaxDate() {
  117. return $this->not_max_date;
  118. }
  119. public function setMaxDate($value) {
  120. return $this->max_date = $value;
  121. }
  122. public function getMinPubdate() {
  123. return $this->min_pubdate;
  124. }
  125. public function getNotMinPubdate() {
  126. return $this->not_min_pubdate;
  127. }
  128. public function getMaxPubdate() {
  129. return $this->max_pubdate;
  130. }
  131. public function getNotMaxPubdate() {
  132. return $this->not_max_pubdate;
  133. }
  134. public function getInurl() {
  135. return $this->inurl;
  136. }
  137. public function getNotInurl() {
  138. return $this->not_inurl;
  139. }
  140. public function getAuthor() {
  141. return $this->author;
  142. }
  143. public function getNotAuthor() {
  144. return $this->not_author;
  145. }
  146. public function getTags() {
  147. return $this->tags;
  148. }
  149. public function getNotTags() {
  150. return $this->not_tags;
  151. }
  152. public function getSearch() {
  153. return $this->search;
  154. }
  155. public function getNotSearch() {
  156. return $this->not_search;
  157. }
  158. private static function removeEmptyValues($anArray) {
  159. return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array();
  160. }
  161. private static function decodeSpaces($value) {
  162. if (is_array($value)) {
  163. for ($i = count($value) - 1; $i >= 0; $i--) {
  164. $value[$i] = self::decodeSpaces($value[$i]);
  165. }
  166. } else {
  167. $value = trim(str_replace('+', ' ', $value));
  168. }
  169. return $value;
  170. }
  171. /**
  172. * Parse the search string to find entry (article) IDs.
  173. *
  174. * @param string $input
  175. * @return string
  176. */
  177. private function parseEntryIds($input) {
  178. if (preg_match_all('/\be:(?P<search>[0-9,]*)/', $input, $matches)) {
  179. $input = str_replace($matches[0], '', $input);
  180. $ids_lists = $matches['search'];
  181. $this->entry_ids = [];
  182. foreach ($ids_lists as $ids_list) {
  183. $entry_ids = explode(',', $ids_list);
  184. $entry_ids = self::removeEmptyValues($entry_ids);
  185. if (!empty($entry_ids)) {
  186. $this->entry_ids[] = $entry_ids;
  187. }
  188. }
  189. }
  190. return $input;
  191. }
  192. private function parseNotEntryIds($input) {
  193. if (preg_match_all('/[!-]e:(?P<search>[0-9,]*)/', $input, $matches)) {
  194. $input = str_replace($matches[0], '', $input);
  195. $ids_lists = $matches['search'];
  196. $this->not_entry_ids = [];
  197. foreach ($ids_lists as $ids_list) {
  198. $entry_ids = explode(',', $ids_list);
  199. $entry_ids = self::removeEmptyValues($entry_ids);
  200. if (!empty($entry_ids)) {
  201. $this->not_entry_ids[] = $entry_ids;
  202. }
  203. }
  204. }
  205. return $input;
  206. }
  207. /**
  208. * Parse the search string to find feed IDs.
  209. *
  210. * @param string $input
  211. * @return string
  212. */
  213. private function parseFeedIds($input) {
  214. if (preg_match_all('/\bf:(?P<search>[0-9,]*)/', $input, $matches)) {
  215. $input = str_replace($matches[0], '', $input);
  216. $ids_lists = $matches['search'];
  217. $this->feed_ids = [];
  218. foreach ($ids_lists as $ids_list) {
  219. $feed_ids = explode(',', $ids_list);
  220. $feed_ids = self::removeEmptyValues($feed_ids);
  221. if (!empty($feed_ids)) {
  222. $this->feed_ids[] = $feed_ids;
  223. }
  224. }
  225. }
  226. return $input;
  227. }
  228. private function parseNotFeedIds($input) {
  229. if (preg_match_all('/[!-]f:(?P<search>[0-9,]*)/', $input, $matches)) {
  230. $input = str_replace($matches[0], '', $input);
  231. $ids_lists = $matches['search'];
  232. $this->not_feed_ids = [];
  233. foreach ($ids_lists as $ids_list) {
  234. $feed_ids = explode(',', $ids_list);
  235. $feed_ids = self::removeEmptyValues($feed_ids);
  236. if (!empty($feed_ids)) {
  237. $this->not_feed_ids[] = $feed_ids;
  238. }
  239. }
  240. }
  241. return $input;
  242. }
  243. /**
  244. * Parse the search string to find tags (labels) IDs.
  245. *
  246. * @param string $input
  247. * @return string
  248. */
  249. private function parseLabelIds($input) {
  250. if (preg_match_all('/\b[lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  251. $input = str_replace($matches[0], '', $input);
  252. $ids_lists = $matches['search'];
  253. $this->label_ids = [];
  254. foreach ($ids_lists as $ids_list) {
  255. if ($ids_list === '*') {
  256. $this->label_ids[] = '*';
  257. break;
  258. }
  259. $label_ids = explode(',', $ids_list);
  260. $label_ids = self::removeEmptyValues($label_ids);
  261. if (!empty($label_ids)) {
  262. $this->label_ids[] = $label_ids;
  263. }
  264. }
  265. }
  266. return $input;
  267. }
  268. private function parseNotLabelIds($input) {
  269. if (preg_match_all('/[!-][lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  270. $input = str_replace($matches[0], '', $input);
  271. $ids_lists = $matches['search'];
  272. $this->not_label_ids = [];
  273. foreach ($ids_lists as $ids_list) {
  274. if ($ids_list === '*') {
  275. $this->not_label_ids[] = '*';
  276. break;
  277. }
  278. $label_ids = explode(',', $ids_list);
  279. $label_ids = self::removeEmptyValues($label_ids);
  280. if (!empty($label_ids)) {
  281. $this->not_label_ids[] = $label_ids;
  282. }
  283. }
  284. }
  285. return $input;
  286. }
  287. /**
  288. * Parse the search string to find tags (labels) names.
  289. *
  290. * @param string $input
  291. * @return string
  292. */
  293. private function parseLabelNames($input) {
  294. $names_lists = [];
  295. if (preg_match_all('/\blabels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  296. $names_lists = $matches['search'];
  297. $input = str_replace($matches[0], '', $input);
  298. }
  299. if (preg_match_all('/\blabels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  300. $names_lists = array_merge($names_lists, $matches['search']);
  301. $input = str_replace($matches[0], '', $input);
  302. }
  303. if (!empty($names_lists)) {
  304. $this->label_names = [];
  305. foreach ($names_lists as $names_list) {
  306. $names_array = explode(',', $names_list);
  307. $names_array = self::removeEmptyValues($names_array);
  308. if (!empty($names_array)) {
  309. $this->label_names[] = $names_array;
  310. }
  311. }
  312. }
  313. return $input;
  314. }
  315. /**
  316. * Parse the search string to find tags (labels) names to exclude.
  317. *
  318. * @param string $input
  319. * @return string
  320. */
  321. private function parseNotLabelNames($input) {
  322. $names_lists = [];
  323. if (preg_match_all('/[!-]labels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  324. $names_lists = $matches['search'];
  325. $input = str_replace($matches[0], '', $input);
  326. }
  327. if (preg_match_all('/[!-]labels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  328. $names_lists = array_merge($names_lists, $matches['search']);
  329. $input = str_replace($matches[0], '', $input);
  330. }
  331. if (!empty($names_lists)) {
  332. $this->not_label_names = [];
  333. foreach ($names_lists as $names_list) {
  334. $names_array = explode(',', $names_list);
  335. $names_array = self::removeEmptyValues($names_array);
  336. if (!empty($names_array)) {
  337. $this->not_label_names[] = $names_array;
  338. }
  339. }
  340. }
  341. return $input;
  342. }
  343. /**
  344. * Parse the search string to find intitle keyword and the search related
  345. * to it.
  346. * The search is the first word following the keyword.
  347. *
  348. * @param string $input
  349. * @return string
  350. */
  351. private function parseIntitleSearch($input) {
  352. if (preg_match_all('/\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  353. $this->intitle = $matches['search'];
  354. $input = str_replace($matches[0], '', $input);
  355. }
  356. if (preg_match_all('/\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  357. $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']);
  358. $input = str_replace($matches[0], '', $input);
  359. }
  360. $this->intitle = self::removeEmptyValues($this->intitle);
  361. return $input;
  362. }
  363. private function parseNotIntitleSearch($input) {
  364. if (preg_match_all('/[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  365. $this->not_intitle = $matches['search'];
  366. $input = str_replace($matches[0], '', $input);
  367. }
  368. if (preg_match_all('/[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  369. $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']);
  370. $input = str_replace($matches[0], '', $input);
  371. }
  372. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  373. return $input;
  374. }
  375. /**
  376. * Parse the search string to find author keyword and the search related
  377. * to it.
  378. * The search is the first word following the keyword except when using
  379. * a delimiter. Supported delimiters are single quote (') and double
  380. * quotes (").
  381. *
  382. * @param string $input
  383. * @return string
  384. */
  385. private function parseAuthorSearch($input) {
  386. if (preg_match_all('/\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  387. $this->author = $matches['search'];
  388. $input = str_replace($matches[0], '', $input);
  389. }
  390. if (preg_match_all('/\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  391. $this->author = array_merge($this->author ? $this->author : array(), $matches['search']);
  392. $input = str_replace($matches[0], '', $input);
  393. }
  394. $this->author = self::removeEmptyValues($this->author);
  395. return $input;
  396. }
  397. private function parseNotAuthorSearch($input) {
  398. if (preg_match_all('/[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  399. $this->not_author = $matches['search'];
  400. $input = str_replace($matches[0], '', $input);
  401. }
  402. if (preg_match_all('/[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  403. $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']);
  404. $input = str_replace($matches[0], '', $input);
  405. }
  406. $this->not_author = self::removeEmptyValues($this->not_author);
  407. return $input;
  408. }
  409. /**
  410. * Parse the search string to find inurl keyword and the search related
  411. * to it.
  412. * The search is the first word following the keyword.
  413. *
  414. * @param string $input
  415. * @return string
  416. */
  417. private function parseInurlSearch($input) {
  418. if (preg_match_all('/\binurl:(?P<search>[^\s]*)/', $input, $matches)) {
  419. $this->inurl = $matches['search'];
  420. $input = str_replace($matches[0], '', $input);
  421. }
  422. $this->inurl = self::removeEmptyValues($this->inurl);
  423. return $input;
  424. }
  425. private function parseNotInurlSearch($input) {
  426. if (preg_match_all('/[!-]inurl:(?P<search>[^\s]*)/', $input, $matches)) {
  427. $this->not_inurl = $matches['search'];
  428. $input = str_replace($matches[0], '', $input);
  429. }
  430. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  431. return $input;
  432. }
  433. /**
  434. * Parse the search string to find date keyword and the search related
  435. * to it.
  436. * The search is the first word following the keyword.
  437. *
  438. * @param string $input
  439. * @return string
  440. */
  441. private function parseDateSearch($input) {
  442. if (preg_match_all('/\bdate:(?P<search>[^\s]*)/', $input, $matches)) {
  443. $input = str_replace($matches[0], '', $input);
  444. $dates = self::removeEmptyValues($matches['search']);
  445. if (!empty($dates[0])) {
  446. list($this->min_date, $this->max_date) = parseDateInterval($dates[0]);
  447. }
  448. }
  449. return $input;
  450. }
  451. private function parseNotDateSearch($input) {
  452. if (preg_match_all('/[!-]date:(?P<search>[^\s]*)/', $input, $matches)) {
  453. $input = str_replace($matches[0], '', $input);
  454. $dates = self::removeEmptyValues($matches['search']);
  455. if (!empty($dates[0])) {
  456. list($this->not_min_date, $this->not_max_date) = parseDateInterval($dates[0]);
  457. }
  458. }
  459. return $input;
  460. }
  461. /**
  462. * Parse the search string to find pubdate keyword and the search related
  463. * to it.
  464. * The search is the first word following the keyword.
  465. *
  466. * @param string $input
  467. * @return string
  468. */
  469. private function parsePubdateSearch($input) {
  470. if (preg_match_all('/\bpubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  471. $input = str_replace($matches[0], '', $input);
  472. $dates = self::removeEmptyValues($matches['search']);
  473. if (!empty($dates[0])) {
  474. list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]);
  475. }
  476. }
  477. return $input;
  478. }
  479. private function parseNotPubdateSearch($input) {
  480. if (preg_match_all('/[!-]pubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  481. $input = str_replace($matches[0], '', $input);
  482. $dates = self::removeEmptyValues($matches['search']);
  483. if (!empty($dates[0])) {
  484. list($this->not_min_pubdate, $this->not_max_pubdate) = parseDateInterval($dates[0]);
  485. }
  486. }
  487. return $input;
  488. }
  489. /**
  490. * Parse the search string to find tags keyword (# followed by a word)
  491. * and the search related to it.
  492. * The search is the first word following the #.
  493. *
  494. * @param string $input
  495. * @return string
  496. */
  497. private function parseTagsSearch($input) {
  498. if (preg_match_all('/#(?P<search>[^\s]+)/', $input, $matches)) {
  499. $this->tags = $matches['search'];
  500. $input = str_replace($matches[0], '', $input);
  501. }
  502. $this->tags = self::removeEmptyValues($this->tags);
  503. $this->tags = self::decodeSpaces($this->tags);
  504. return $input;
  505. }
  506. private function parseNotTagsSearch($input) {
  507. if (preg_match_all('/[!-]#(?P<search>[^\s]+)/', $input, $matches)) {
  508. $this->not_tags = $matches['search'];
  509. $input = str_replace($matches[0], '', $input);
  510. }
  511. $this->not_tags = self::removeEmptyValues($this->not_tags);
  512. $this->not_tags = self::decodeSpaces($this->not_tags);
  513. return $input;
  514. }
  515. /**
  516. * Parse the search string to find search values.
  517. * Every word is a distinct search value, except when using a delimiter.
  518. * Supported delimiters are single quote (') and double quotes (").
  519. *
  520. * @param string $input
  521. * @return string
  522. */
  523. private function parseSearch($input) {
  524. $input = self::cleanSearch($input);
  525. if ($input == '') {
  526. return;
  527. }
  528. if (preg_match_all('/(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  529. $this->search = $matches['search'];
  530. $input = str_replace($matches[0], '', $input);
  531. }
  532. $input = self::cleanSearch($input);
  533. if ($input == '') {
  534. return;
  535. }
  536. if (is_array($this->search)) {
  537. $this->search = array_merge($this->search, explode(' ', $input));
  538. } else {
  539. $this->search = explode(' ', $input);
  540. }
  541. }
  542. private function parseNotSearch($input) {
  543. $input = self::cleanSearch($input);
  544. if ($input == '') {
  545. return;
  546. }
  547. if (preg_match_all('/[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  548. $this->not_search = $matches['search'];
  549. $input = str_replace($matches[0], '', $input);
  550. }
  551. if ($input == '') {
  552. return;
  553. }
  554. if (preg_match_all('/[!-](?P<search>[^\s]+)/', $input, $matches)) {
  555. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']);
  556. $input = str_replace($matches[0], '', $input);
  557. }
  558. $this->not_search = self::removeEmptyValues($this->not_search);
  559. return $input;
  560. }
  561. /**
  562. * Remove all unnecessary spaces in the search
  563. *
  564. * @param string $input
  565. * @return string
  566. */
  567. private static function cleanSearch($input) {
  568. $input = preg_replace('/\s+/', ' ', $input);
  569. return trim($input);
  570. }
  571. }