Search.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. <?php
  2. require_once(LIB_PATH . '/lib_date.php');
  3. /**
  4. * Contains a search from the search form.
  5. *
  6. * It allows to extract meaningful bits of the search and store them in a
  7. * convenient object
  8. */
  9. class FreshRSS_Search {
  10. // This contains the user input string
  11. private $raw_input = '';
  12. // The following properties are extracted from the raw input
  13. private $intitle;
  14. private $min_date;
  15. private $max_date;
  16. private $min_pubdate;
  17. private $max_pubdate;
  18. private $inurl;
  19. private $author;
  20. private $tags;
  21. private $search;
  22. private $not_intitle;
  23. private $not_min_date;
  24. private $not_max_date;
  25. private $not_min_pubdate;
  26. private $not_max_pubdate;
  27. private $not_inurl;
  28. private $not_author;
  29. private $not_tags;
  30. private $not_search;
  31. public function __construct($input) {
  32. if ($input == '') {
  33. return;
  34. }
  35. $this->raw_input = $input;
  36. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  37. $input = $this->parseNotPubdateSearch($input);
  38. $input = $this->parseNotDateSearch($input);
  39. $input = $this->parseNotIntitleSearch($input);
  40. $input = $this->parseNotAuthorSearch($input);
  41. $input = $this->parseNotInurlSearch($input);
  42. $input = $this->parseNotTagsSearch($input);
  43. $input = $this->parsePubdateSearch($input);
  44. $input = $this->parseDateSearch($input);
  45. $input = $this->parseIntitleSearch($input);
  46. $input = $this->parseAuthorSearch($input);
  47. $input = $this->parseInurlSearch($input);
  48. $input = $this->parseTagsSearch($input);
  49. $input = $this->parseNotSearch($input);
  50. $input = $this->parseSearch($input);
  51. }
  52. public function __toString() {
  53. return $this->getRawInput();
  54. }
  55. public function getRawInput() {
  56. return $this->raw_input;
  57. }
  58. public function getIntitle() {
  59. return $this->intitle;
  60. }
  61. public function getNotIntitle() {
  62. return $this->not_intitle;
  63. }
  64. public function getMinDate() {
  65. return $this->min_date;
  66. }
  67. public function getNotMinDate() {
  68. return $this->not_min_date;
  69. }
  70. public function setMinDate($value) {
  71. return $this->min_date = $value;
  72. }
  73. public function getMaxDate() {
  74. return $this->max_date;
  75. }
  76. public function getNotMaxDate() {
  77. return $this->not_max_date;
  78. }
  79. public function setMaxDate($value) {
  80. return $this->max_date = $value;
  81. }
  82. public function getMinPubdate() {
  83. return $this->min_pubdate;
  84. }
  85. public function getNotMinPubdate() {
  86. return $this->not_min_pubdate;
  87. }
  88. public function getMaxPubdate() {
  89. return $this->max_pubdate;
  90. }
  91. public function getNotMaxPubdate() {
  92. return $this->not_max_pubdate;
  93. }
  94. public function getInurl() {
  95. return $this->inurl;
  96. }
  97. public function getNotInurl() {
  98. return $this->not_inurl;
  99. }
  100. public function getAuthor() {
  101. return $this->author;
  102. }
  103. public function getNotAuthor() {
  104. return $this->not_author;
  105. }
  106. public function getTags() {
  107. return $this->tags;
  108. }
  109. public function getNotTags() {
  110. return $this->not_tags;
  111. }
  112. public function getSearch() {
  113. return $this->search;
  114. }
  115. public function getNotSearch() {
  116. return $this->not_search;
  117. }
  118. private static function removeEmptyValues($anArray) {
  119. return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array();
  120. }
  121. private static function decodeSpaces($value) {
  122. if (is_array($value)) {
  123. for ($i = count($value) - 1; $i >= 0; $i--) {
  124. $value[$i] = self::decodeSpaces($value[$i]);
  125. }
  126. } else {
  127. $value = trim(str_replace('+', ' ', $value));
  128. }
  129. return $value;
  130. }
  131. /**
  132. * Parse the search string to find intitle keyword and the search related
  133. * to it.
  134. * The search is the first word following the keyword.
  135. *
  136. * @param string $input
  137. * @return string
  138. */
  139. private function parseIntitleSearch($input) {
  140. if (preg_match_all('/\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  141. $this->intitle = $matches['search'];
  142. $input = str_replace($matches[0], '', $input);
  143. }
  144. if (preg_match_all('/\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  145. $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']);
  146. $input = str_replace($matches[0], '', $input);
  147. }
  148. $this->intitle = self::removeEmptyValues($this->intitle);
  149. $this->intitle = self::decodeSpaces($this->intitle);
  150. return $input;
  151. }
  152. private function parseNotIntitleSearch($input) {
  153. if (preg_match_all('/[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  154. $this->not_intitle = $matches['search'];
  155. $input = str_replace($matches[0], '', $input);
  156. }
  157. if (preg_match_all('/[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  158. $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']);
  159. $input = str_replace($matches[0], '', $input);
  160. }
  161. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  162. $this->not_intitle = self::decodeSpaces($this->not_intitle);
  163. return $input;
  164. }
  165. /**
  166. * Parse the search string to find author keyword and the search related
  167. * to it.
  168. * The search is the first word following the keyword except when using
  169. * a delimiter. Supported delimiters are single quote (') and double
  170. * quotes (").
  171. *
  172. * @param string $input
  173. * @return string
  174. */
  175. private function parseAuthorSearch($input) {
  176. if (preg_match_all('/\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  177. $this->author = $matches['search'];
  178. $input = str_replace($matches[0], '', $input);
  179. }
  180. if (preg_match_all('/\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  181. $this->author = array_merge($this->author ? $this->author : array(), $matches['search']);
  182. $input = str_replace($matches[0], '', $input);
  183. }
  184. $this->author = self::removeEmptyValues($this->author);
  185. $this->author = self::decodeSpaces($this->author);
  186. return $input;
  187. }
  188. private function parseNotAuthorSearch($input) {
  189. if (preg_match_all('/[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  190. $this->not_author = $matches['search'];
  191. $input = str_replace($matches[0], '', $input);
  192. }
  193. if (preg_match_all('/[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  194. $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']);
  195. $input = str_replace($matches[0], '', $input);
  196. }
  197. $this->not_author = self::removeEmptyValues($this->not_author);
  198. $this->not_author = self::decodeSpaces($this->not_author);
  199. return $input;
  200. }
  201. /**
  202. * Parse the search string to find inurl keyword and the search related
  203. * to it.
  204. * The search is the first word following the keyword.
  205. *
  206. * @param string $input
  207. * @return string
  208. */
  209. private function parseInurlSearch($input) {
  210. if (preg_match_all('/\binurl:(?P<search>[^\s]*)/', $input, $matches)) {
  211. $this->inurl = $matches['search'];
  212. $input = str_replace($matches[0], '', $input);
  213. }
  214. $this->inurl = self::removeEmptyValues($this->inurl);
  215. $this->inurl = self::decodeSpaces($this->inurl);
  216. return $input;
  217. }
  218. private function parseNotInurlSearch($input) {
  219. if (preg_match_all('/[!-]inurl:(?P<search>[^\s]*)/', $input, $matches)) {
  220. $this->not_inurl = $matches['search'];
  221. $input = str_replace($matches[0], '', $input);
  222. }
  223. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  224. $this->not_inurl = self::decodeSpaces($this->not_inurl);
  225. return $input;
  226. }
  227. /**
  228. * Parse the search string to find date keyword and the search related
  229. * to it.
  230. * The search is the first word following the keyword.
  231. *
  232. * @param string $input
  233. * @return string
  234. */
  235. private function parseDateSearch($input) {
  236. if (preg_match_all('/\bdate:(?P<search>[^\s]*)/', $input, $matches)) {
  237. $input = str_replace($matches[0], '', $input);
  238. $dates = self::removeEmptyValues($matches['search']);
  239. if (!empty($dates[0])) {
  240. list($this->min_date, $this->max_date) = parseDateInterval($dates[0]);
  241. }
  242. }
  243. return $input;
  244. }
  245. private function parseNotDateSearch($input) {
  246. if (preg_match_all('/[!-]date:(?P<search>[^\s]*)/', $input, $matches)) {
  247. $input = str_replace($matches[0], '', $input);
  248. $dates = self::removeEmptyValues($matches['search']);
  249. if (!empty($dates[0])) {
  250. list($this->not_min_date, $this->not_max_date) = parseDateInterval($dates[0]);
  251. }
  252. }
  253. return $input;
  254. }
  255. /**
  256. * Parse the search string to find pubdate keyword and the search related
  257. * to it.
  258. * The search is the first word following the keyword.
  259. *
  260. * @param string $input
  261. * @return string
  262. */
  263. private function parsePubdateSearch($input) {
  264. if (preg_match_all('/\bpubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  265. $input = str_replace($matches[0], '', $input);
  266. $dates = self::removeEmptyValues($matches['search']);
  267. if (!empty($dates[0])) {
  268. list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]);
  269. }
  270. }
  271. return $input;
  272. }
  273. private function parseNotPubdateSearch($input) {
  274. if (preg_match_all('/[!-]pubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  275. $input = str_replace($matches[0], '', $input);
  276. $dates = self::removeEmptyValues($matches['search']);
  277. if (!empty($dates[0])) {
  278. list($this->not_min_pubdate, $this->not_max_pubdate) = parseDateInterval($dates[0]);
  279. }
  280. }
  281. return $input;
  282. }
  283. /**
  284. * Parse the search string to find tags keyword (# followed by a word)
  285. * and the search related to it.
  286. * The search is the first word following the #.
  287. *
  288. * @param string $input
  289. * @return string
  290. */
  291. private function parseTagsSearch($input) {
  292. if (preg_match_all('/#(?P<search>[^\s]+)/', $input, $matches)) {
  293. $this->tags = $matches['search'];
  294. $input = str_replace($matches[0], '', $input);
  295. }
  296. $this->tags = self::removeEmptyValues($this->tags);
  297. $this->tags = self::decodeSpaces($this->tags);
  298. return $input;
  299. }
  300. private function parseNotTagsSearch($input) {
  301. if (preg_match_all('/[!-]#(?P<search>[^\s]+)/', $input, $matches)) {
  302. $this->not_tags = $matches['search'];
  303. $input = str_replace($matches[0], '', $input);
  304. }
  305. $this->not_tags = self::removeEmptyValues($this->not_tags);
  306. $this->not_tags = self::decodeSpaces($this->not_tags);
  307. return $input;
  308. }
  309. /**
  310. * Parse the search string to find search values.
  311. * Every word is a distinct search value, except when using a delimiter.
  312. * Supported delimiters are single quote (') and double quotes (").
  313. *
  314. * @param string $input
  315. * @return string
  316. */
  317. private function parseSearch($input) {
  318. $input = self::cleanSearch($input);
  319. if ($input == '') {
  320. return;
  321. }
  322. if (preg_match_all('/(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  323. $this->search = $matches['search'];
  324. $input = str_replace($matches[0], '', $input);
  325. }
  326. $input = self::cleanSearch($input);
  327. if ($input == '') {
  328. return;
  329. }
  330. if (is_array($this->search)) {
  331. $this->search = array_merge($this->search, explode(' ', $input));
  332. } else {
  333. $this->search = explode(' ', $input);
  334. }
  335. $this->search = self::decodeSpaces($this->search);
  336. }
  337. private function parseNotSearch($input) {
  338. $input = self::cleanSearch($input);
  339. if ($input == '') {
  340. return;
  341. }
  342. if (preg_match_all('/[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  343. $this->not_search = $matches['search'];
  344. $input = str_replace($matches[0], '', $input);
  345. }
  346. if ($input == '') {
  347. return;
  348. }
  349. if (preg_match_all('/[!-](?P<search>[^\s]+)/', $input, $matches)) {
  350. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']);
  351. $input = str_replace($matches[0], '', $input);
  352. }
  353. $this->not_search = self::removeEmptyValues($this->not_search);
  354. $this->not_search = self::decodeSpaces($this->not_search);
  355. return $input;
  356. }
  357. /**
  358. * Remove all unnecessary spaces in the search
  359. *
  360. * @param string $input
  361. * @return string
  362. */
  363. private static function cleanSearch($input) {
  364. $input = preg_replace('/\s+/', ' ', $input);
  365. return trim($input);
  366. }
  367. }