Search.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. <?php
  2. require_once(LIB_PATH . '/lib_date.php');
  3. /**
  4. * Contains a search from the search form.
  5. *
  6. * It allows to extract meaningful bits of the search and store them in a
  7. * convenient object
  8. */
  9. class FreshRSS_Search {
  10. // This contains the user input string
  11. private $raw_input = '';
  12. // The following properties are extracted from the raw input
  13. private $feed_ids;
  14. private $label_ids;
  15. private $label_names;
  16. private $intitle;
  17. private $min_date;
  18. private $max_date;
  19. private $min_pubdate;
  20. private $max_pubdate;
  21. private $inurl;
  22. private $author;
  23. private $tags;
  24. private $search;
  25. private $not_feed_ids;
  26. private $not_label_ids;
  27. private $not_label_names;
  28. private $not_intitle;
  29. private $not_min_date;
  30. private $not_max_date;
  31. private $not_min_pubdate;
  32. private $not_max_pubdate;
  33. private $not_inurl;
  34. private $not_author;
  35. private $not_tags;
  36. private $not_search;
  37. public function __construct($input) {
  38. if ($input == '') {
  39. return;
  40. }
  41. $this->raw_input = $input;
  42. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  43. $input = $this->parseNotFeedIds($input);
  44. $input = $this->parseNotLabelIds($input);
  45. $input = $this->parseNotLabelNames($input);
  46. $input = $this->parseNotPubdateSearch($input);
  47. $input = $this->parseNotDateSearch($input);
  48. $input = $this->parseNotIntitleSearch($input);
  49. $input = $this->parseNotAuthorSearch($input);
  50. $input = $this->parseNotInurlSearch($input);
  51. $input = $this->parseNotTagsSearch($input);
  52. $input = $this->parseFeedIds($input);
  53. $input = $this->parseLabelIds($input);
  54. $input = $this->parseLabelNames($input);
  55. $input = $this->parsePubdateSearch($input);
  56. $input = $this->parseDateSearch($input);
  57. $input = $this->parseIntitleSearch($input);
  58. $input = $this->parseAuthorSearch($input);
  59. $input = $this->parseInurlSearch($input);
  60. $input = $this->parseTagsSearch($input);
  61. $input = $this->parseNotSearch($input);
  62. $input = $this->parseSearch($input);
  63. }
  64. public function __toString() {
  65. return $this->getRawInput();
  66. }
  67. public function getRawInput() {
  68. return $this->raw_input;
  69. }
  70. public function getFeedIds() {
  71. return $this->feed_ids;
  72. }
  73. public function getNotFeedIds() {
  74. return $this->not_feed_ids;
  75. }
  76. public function getLabelIds() {
  77. return $this->label_ids;
  78. }
  79. public function getNotlabelIds() {
  80. return $this->not_label_ids;
  81. }
  82. public function getLabelNames() {
  83. return $this->label_names;
  84. }
  85. public function getNotlabelNames() {
  86. return $this->not_label_names;
  87. }
  88. public function getIntitle() {
  89. return $this->intitle;
  90. }
  91. public function getNotIntitle() {
  92. return $this->not_intitle;
  93. }
  94. public function getMinDate() {
  95. return $this->min_date;
  96. }
  97. public function getNotMinDate() {
  98. return $this->not_min_date;
  99. }
  100. public function setMinDate($value) {
  101. return $this->min_date = $value;
  102. }
  103. public function getMaxDate() {
  104. return $this->max_date;
  105. }
  106. public function getNotMaxDate() {
  107. return $this->not_max_date;
  108. }
  109. public function setMaxDate($value) {
  110. return $this->max_date = $value;
  111. }
  112. public function getMinPubdate() {
  113. return $this->min_pubdate;
  114. }
  115. public function getNotMinPubdate() {
  116. return $this->not_min_pubdate;
  117. }
  118. public function getMaxPubdate() {
  119. return $this->max_pubdate;
  120. }
  121. public function getNotMaxPubdate() {
  122. return $this->not_max_pubdate;
  123. }
  124. public function getInurl() {
  125. return $this->inurl;
  126. }
  127. public function getNotInurl() {
  128. return $this->not_inurl;
  129. }
  130. public function getAuthor() {
  131. return $this->author;
  132. }
  133. public function getNotAuthor() {
  134. return $this->not_author;
  135. }
  136. public function getTags() {
  137. return $this->tags;
  138. }
  139. public function getNotTags() {
  140. return $this->not_tags;
  141. }
  142. public function getSearch() {
  143. return $this->search;
  144. }
  145. public function getNotSearch() {
  146. return $this->not_search;
  147. }
  148. private static function removeEmptyValues($anArray) {
  149. return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array();
  150. }
  151. private static function decodeSpaces($value) {
  152. if (is_array($value)) {
  153. for ($i = count($value) - 1; $i >= 0; $i--) {
  154. $value[$i] = self::decodeSpaces($value[$i]);
  155. }
  156. } else {
  157. $value = trim(str_replace('+', ' ', $value));
  158. }
  159. return $value;
  160. }
  161. /**
  162. * Parse the search string to find feed IDs.
  163. *
  164. * @param string $input
  165. * @return string
  166. */
  167. private function parseFeedIds($input) {
  168. if (preg_match_all('/\bf:(?P<search>[0-9,]*)/', $input, $matches)) {
  169. $input = str_replace($matches[0], '', $input);
  170. $ids_lists = $matches['search'];
  171. $this->feed_ids = [];
  172. foreach ($ids_lists as $ids_list) {
  173. $feed_ids = explode(',', $ids_list);
  174. $feed_ids = self::removeEmptyValues($feed_ids);
  175. if (!empty($feed_ids)) {
  176. $this->feed_ids[] = $feed_ids;
  177. }
  178. }
  179. }
  180. return $input;
  181. }
  182. private function parseNotFeedIds($input) {
  183. if (preg_match_all('/[!-]f:(?P<search>[0-9,]*)/', $input, $matches)) {
  184. $input = str_replace($matches[0], '', $input);
  185. $ids_lists = $matches['search'];
  186. $this->not_feed_ids = [];
  187. foreach ($ids_lists as $ids_list) {
  188. $feed_ids = explode(',', $ids_list);
  189. $feed_ids = self::removeEmptyValues($feed_ids);
  190. if (!empty($feed_ids)) {
  191. $this->not_feed_ids[] = $feed_ids;
  192. }
  193. }
  194. }
  195. return $input;
  196. }
  197. /**
  198. * Parse the search string to find tags (labels) IDs.
  199. *
  200. * @param string $input
  201. * @return string
  202. */
  203. private function parseLabelIds($input) {
  204. if (preg_match_all('/\b[lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  205. $input = str_replace($matches[0], '', $input);
  206. $ids_lists = $matches['search'];
  207. $this->label_ids = [];
  208. foreach ($ids_lists as $ids_list) {
  209. if ($ids_list === '*') {
  210. $this->label_ids[] = '*';
  211. break;
  212. }
  213. $label_ids = explode(',', $ids_list);
  214. $label_ids = self::removeEmptyValues($label_ids);
  215. if (!empty($label_ids)) {
  216. $this->label_ids[] = $label_ids;
  217. }
  218. }
  219. }
  220. return $input;
  221. }
  222. private function parseNotLabelIds($input) {
  223. if (preg_match_all('/[!-][lL]:(?P<search>[0-9,]+|[*])/', $input, $matches)) {
  224. $input = str_replace($matches[0], '', $input);
  225. $ids_lists = $matches['search'];
  226. $this->not_label_ids = [];
  227. foreach ($ids_lists as $ids_list) {
  228. if ($ids_list === '*') {
  229. $this->not_label_ids[] = '*';
  230. break;
  231. }
  232. $label_ids = explode(',', $ids_list);
  233. $label_ids = self::removeEmptyValues($label_ids);
  234. if (!empty($label_ids)) {
  235. $this->not_label_ids[] = $label_ids;
  236. }
  237. }
  238. }
  239. return $input;
  240. }
  241. /**
  242. * Parse the search string to find tags (labels) names.
  243. *
  244. * @param string $input
  245. * @return string
  246. */
  247. private function parseLabelNames($input) {
  248. $names_lists = [];
  249. if (preg_match_all('/\blabels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  250. $names_lists = $matches['search'];
  251. $input = str_replace($matches[0], '', $input);
  252. }
  253. if (preg_match_all('/\blabels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  254. $names_lists = array_merge($names_lists, $matches['search']);
  255. $input = str_replace($matches[0], '', $input);
  256. }
  257. if (!empty($names_lists)) {
  258. $this->label_names = [];
  259. foreach ($names_lists as $names_list) {
  260. $names_array = explode(',', $names_list);
  261. $names_array = self::removeEmptyValues($names_array);
  262. if (!empty($names_array)) {
  263. $this->label_names[] = $names_array;
  264. }
  265. }
  266. }
  267. return $input;
  268. }
  269. /**
  270. * Parse the search string to find tags (labels) names to exclude.
  271. *
  272. * @param string $input
  273. * @return string
  274. */
  275. private function parseNotLabelNames($input) {
  276. $names_lists = [];
  277. if (preg_match_all('/[!-]labels?:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  278. $names_lists = $matches['search'];
  279. $input = str_replace($matches[0], '', $input);
  280. }
  281. if (preg_match_all('/[!-]labels?:(?P<search>[^\s"]*)/', $input, $matches)) {
  282. $names_lists = array_merge($names_lists, $matches['search']);
  283. $input = str_replace($matches[0], '', $input);
  284. }
  285. if (!empty($names_lists)) {
  286. $this->not_label_names = [];
  287. foreach ($names_lists as $names_list) {
  288. $names_array = explode(',', $names_list);
  289. $names_array = self::removeEmptyValues($names_array);
  290. if (!empty($names_array)) {
  291. $this->not_label_names[] = $names_array;
  292. }
  293. }
  294. }
  295. return $input;
  296. }
  297. /**
  298. * Parse the search string to find intitle keyword and the search related
  299. * to it.
  300. * The search is the first word following the keyword.
  301. *
  302. * @param string $input
  303. * @return string
  304. */
  305. private function parseIntitleSearch($input) {
  306. if (preg_match_all('/\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  307. $this->intitle = $matches['search'];
  308. $input = str_replace($matches[0], '', $input);
  309. }
  310. if (preg_match_all('/\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  311. $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']);
  312. $input = str_replace($matches[0], '', $input);
  313. }
  314. $this->intitle = self::removeEmptyValues($this->intitle);
  315. return $input;
  316. }
  317. private function parseNotIntitleSearch($input) {
  318. if (preg_match_all('/[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  319. $this->not_intitle = $matches['search'];
  320. $input = str_replace($matches[0], '', $input);
  321. }
  322. if (preg_match_all('/[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  323. $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']);
  324. $input = str_replace($matches[0], '', $input);
  325. }
  326. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  327. return $input;
  328. }
  329. /**
  330. * Parse the search string to find author keyword and the search related
  331. * to it.
  332. * The search is the first word following the keyword except when using
  333. * a delimiter. Supported delimiters are single quote (') and double
  334. * quotes (").
  335. *
  336. * @param string $input
  337. * @return string
  338. */
  339. private function parseAuthorSearch($input) {
  340. if (preg_match_all('/\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  341. $this->author = $matches['search'];
  342. $input = str_replace($matches[0], '', $input);
  343. }
  344. if (preg_match_all('/\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  345. $this->author = array_merge($this->author ? $this->author : array(), $matches['search']);
  346. $input = str_replace($matches[0], '', $input);
  347. }
  348. $this->author = self::removeEmptyValues($this->author);
  349. return $input;
  350. }
  351. private function parseNotAuthorSearch($input) {
  352. if (preg_match_all('/[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  353. $this->not_author = $matches['search'];
  354. $input = str_replace($matches[0], '', $input);
  355. }
  356. if (preg_match_all('/[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  357. $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']);
  358. $input = str_replace($matches[0], '', $input);
  359. }
  360. $this->not_author = self::removeEmptyValues($this->not_author);
  361. return $input;
  362. }
  363. /**
  364. * Parse the search string to find inurl keyword and the search related
  365. * to it.
  366. * The search is the first word following the keyword.
  367. *
  368. * @param string $input
  369. * @return string
  370. */
  371. private function parseInurlSearch($input) {
  372. if (preg_match_all('/\binurl:(?P<search>[^\s]*)/', $input, $matches)) {
  373. $this->inurl = $matches['search'];
  374. $input = str_replace($matches[0], '', $input);
  375. }
  376. $this->inurl = self::removeEmptyValues($this->inurl);
  377. return $input;
  378. }
  379. private function parseNotInurlSearch($input) {
  380. if (preg_match_all('/[!-]inurl:(?P<search>[^\s]*)/', $input, $matches)) {
  381. $this->not_inurl = $matches['search'];
  382. $input = str_replace($matches[0], '', $input);
  383. }
  384. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  385. return $input;
  386. }
  387. /**
  388. * Parse the search string to find date keyword and the search related
  389. * to it.
  390. * The search is the first word following the keyword.
  391. *
  392. * @param string $input
  393. * @return string
  394. */
  395. private function parseDateSearch($input) {
  396. if (preg_match_all('/\bdate:(?P<search>[^\s]*)/', $input, $matches)) {
  397. $input = str_replace($matches[0], '', $input);
  398. $dates = self::removeEmptyValues($matches['search']);
  399. if (!empty($dates[0])) {
  400. list($this->min_date, $this->max_date) = parseDateInterval($dates[0]);
  401. }
  402. }
  403. return $input;
  404. }
  405. private function parseNotDateSearch($input) {
  406. if (preg_match_all('/[!-]date:(?P<search>[^\s]*)/', $input, $matches)) {
  407. $input = str_replace($matches[0], '', $input);
  408. $dates = self::removeEmptyValues($matches['search']);
  409. if (!empty($dates[0])) {
  410. list($this->not_min_date, $this->not_max_date) = parseDateInterval($dates[0]);
  411. }
  412. }
  413. return $input;
  414. }
  415. /**
  416. * Parse the search string to find pubdate keyword and the search related
  417. * to it.
  418. * The search is the first word following the keyword.
  419. *
  420. * @param string $input
  421. * @return string
  422. */
  423. private function parsePubdateSearch($input) {
  424. if (preg_match_all('/\bpubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  425. $input = str_replace($matches[0], '', $input);
  426. $dates = self::removeEmptyValues($matches['search']);
  427. if (!empty($dates[0])) {
  428. list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]);
  429. }
  430. }
  431. return $input;
  432. }
  433. private function parseNotPubdateSearch($input) {
  434. if (preg_match_all('/[!-]pubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  435. $input = str_replace($matches[0], '', $input);
  436. $dates = self::removeEmptyValues($matches['search']);
  437. if (!empty($dates[0])) {
  438. list($this->not_min_pubdate, $this->not_max_pubdate) = parseDateInterval($dates[0]);
  439. }
  440. }
  441. return $input;
  442. }
  443. /**
  444. * Parse the search string to find tags keyword (# followed by a word)
  445. * and the search related to it.
  446. * The search is the first word following the #.
  447. *
  448. * @param string $input
  449. * @return string
  450. */
  451. private function parseTagsSearch($input) {
  452. if (preg_match_all('/#(?P<search>[^\s]+)/', $input, $matches)) {
  453. $this->tags = $matches['search'];
  454. $input = str_replace($matches[0], '', $input);
  455. }
  456. $this->tags = self::removeEmptyValues($this->tags);
  457. $this->tags = self::decodeSpaces($this->tags);
  458. return $input;
  459. }
  460. private function parseNotTagsSearch($input) {
  461. if (preg_match_all('/[!-]#(?P<search>[^\s]+)/', $input, $matches)) {
  462. $this->not_tags = $matches['search'];
  463. $input = str_replace($matches[0], '', $input);
  464. }
  465. $this->not_tags = self::removeEmptyValues($this->not_tags);
  466. $this->not_tags = self::decodeSpaces($this->not_tags);
  467. return $input;
  468. }
  469. /**
  470. * Parse the search string to find search values.
  471. * Every word is a distinct search value, except when using a delimiter.
  472. * Supported delimiters are single quote (') and double quotes (").
  473. *
  474. * @param string $input
  475. * @return string
  476. */
  477. private function parseSearch($input) {
  478. $input = self::cleanSearch($input);
  479. if ($input == '') {
  480. return;
  481. }
  482. if (preg_match_all('/(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  483. $this->search = $matches['search'];
  484. $input = str_replace($matches[0], '', $input);
  485. }
  486. $input = self::cleanSearch($input);
  487. if ($input == '') {
  488. return;
  489. }
  490. if (is_array($this->search)) {
  491. $this->search = array_merge($this->search, explode(' ', $input));
  492. } else {
  493. $this->search = explode(' ', $input);
  494. }
  495. }
  496. private function parseNotSearch($input) {
  497. $input = self::cleanSearch($input);
  498. if ($input == '') {
  499. return;
  500. }
  501. if (preg_match_all('/[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  502. $this->not_search = $matches['search'];
  503. $input = str_replace($matches[0], '', $input);
  504. }
  505. if ($input == '') {
  506. return;
  507. }
  508. if (preg_match_all('/[!-](?P<search>[^\s]+)/', $input, $matches)) {
  509. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']);
  510. $input = str_replace($matches[0], '', $input);
  511. }
  512. $this->not_search = self::removeEmptyValues($this->not_search);
  513. return $input;
  514. }
  515. /**
  516. * Remove all unnecessary spaces in the search
  517. *
  518. * @param string $input
  519. * @return string
  520. */
  521. private static function cleanSearch($input) {
  522. $input = preg_replace('/\s+/', ' ', $input);
  523. return trim($input);
  524. }
  525. }