Search.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. <?php
  2. require_once(LIB_PATH . '/lib_date.php');
  3. /**
  4. * Contains a search from the search form.
  5. *
  6. * It allows to extract meaningful bits of the search and store them in a
  7. * convenient object
  8. */
  9. class FreshRSS_Search {
  10. // This contains the user input string
  11. private $raw_input = '';
  12. // The following properties are extracted from the raw input
  13. private $intitle;
  14. private $min_date;
  15. private $max_date;
  16. private $min_pubdate;
  17. private $max_pubdate;
  18. private $inurl;
  19. private $author;
  20. private $tags;
  21. private $search;
  22. private $not_intitle;
  23. private $not_inurl;
  24. private $not_author;
  25. private $not_tags;
  26. private $not_search;
  27. public function __construct($input) {
  28. if ($input == '') {
  29. return;
  30. }
  31. $this->raw_input = $input;
  32. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  33. $input = $this->parseNotIntitleSearch($input);
  34. $input = $this->parseNotAuthorSearch($input);
  35. $input = $this->parseNotInurlSearch($input);
  36. $input = $this->parseNotTagsSearch($input);
  37. $input = $this->parsePubdateSearch($input);
  38. $input = $this->parseDateSearch($input);
  39. $input = $this->parseIntitleSearch($input);
  40. $input = $this->parseAuthorSearch($input);
  41. $input = $this->parseInurlSearch($input);
  42. $input = $this->parseTagsSearch($input);
  43. $input = $this->parseNotSearch($input);
  44. $input = $this->parseSearch($input);
  45. }
  46. public function __toString() {
  47. return $this->getRawInput();
  48. }
  49. public function getRawInput() {
  50. return $this->raw_input;
  51. }
  52. public function getIntitle() {
  53. return $this->intitle;
  54. }
  55. public function getNotIntitle() {
  56. return $this->not_intitle;
  57. }
  58. public function getMinDate() {
  59. return $this->min_date;
  60. }
  61. public function getMaxDate() {
  62. return $this->max_date;
  63. }
  64. public function getMinPubdate() {
  65. return $this->min_pubdate;
  66. }
  67. public function getMaxPubdate() {
  68. return $this->max_pubdate;
  69. }
  70. public function getInurl() {
  71. return $this->inurl;
  72. }
  73. public function getNotInurl() {
  74. return $this->not_inurl;
  75. }
  76. public function getAuthor() {
  77. return $this->author;
  78. }
  79. public function getNotAuthor() {
  80. return $this->not_author;
  81. }
  82. public function getTags() {
  83. return $this->tags;
  84. }
  85. public function getNotTags() {
  86. return $this->not_tags;
  87. }
  88. public function getSearch() {
  89. return $this->search;
  90. }
  91. public function getNotSearch() {
  92. return $this->not_search;
  93. }
  94. private static function removeEmptyValues($anArray) {
  95. return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array();
  96. }
  97. private static function decodeSpaces($value) {
  98. if (is_array($value)) {
  99. for ($i = count($value) - 1; $i >= 0; $i--) {
  100. $value[$i] = self::decodeSpaces($value[$i]);
  101. }
  102. } else {
  103. $value = trim(str_replace('+', ' ', $value));
  104. }
  105. return $value;
  106. }
  107. /**
  108. * Parse the search string to find intitle keyword and the search related
  109. * to it.
  110. * The search is the first word following the keyword.
  111. *
  112. * @param string $input
  113. * @return string
  114. */
  115. private function parseIntitleSearch($input) {
  116. if (preg_match_all('/\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  117. $this->intitle = $matches['search'];
  118. $input = str_replace($matches[0], '', $input);
  119. }
  120. if (preg_match_all('/\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  121. $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']);
  122. $input = str_replace($matches[0], '', $input);
  123. }
  124. $this->intitle = self::removeEmptyValues($this->intitle);
  125. $this->intitle = self::decodeSpaces($this->intitle);
  126. return $input;
  127. }
  128. private function parseNotIntitleSearch($input) {
  129. if (preg_match_all('/[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  130. $this->not_intitle = $matches['search'];
  131. $input = str_replace($matches[0], '', $input);
  132. }
  133. if (preg_match_all('/[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  134. $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']);
  135. $input = str_replace($matches[0], '', $input);
  136. }
  137. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  138. $this->not_intitle = self::decodeSpaces($this->not_intitle);
  139. return $input;
  140. }
  141. /**
  142. * Parse the search string to find author keyword and the search related
  143. * to it.
  144. * The search is the first word following the keyword except when using
  145. * a delimiter. Supported delimiters are single quote (') and double
  146. * quotes (").
  147. *
  148. * @param string $input
  149. * @return string
  150. */
  151. private function parseAuthorSearch($input) {
  152. if (preg_match_all('/\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  153. $this->author = $matches['search'];
  154. $input = str_replace($matches[0], '', $input);
  155. }
  156. if (preg_match_all('/\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  157. $this->author = array_merge($this->author ? $this->author : array(), $matches['search']);
  158. $input = str_replace($matches[0], '', $input);
  159. }
  160. $this->author = self::removeEmptyValues($this->author);
  161. $this->author = self::decodeSpaces($this->author);
  162. return $input;
  163. }
  164. private function parseNotAuthorSearch($input) {
  165. if (preg_match_all('/[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  166. $this->not_author = $matches['search'];
  167. $input = str_replace($matches[0], '', $input);
  168. }
  169. if (preg_match_all('/[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  170. $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']);
  171. $input = str_replace($matches[0], '', $input);
  172. }
  173. $this->not_author = self::removeEmptyValues($this->not_author);
  174. $this->not_author = self::decodeSpaces($this->not_author);
  175. return $input;
  176. }
  177. /**
  178. * Parse the search string to find inurl keyword and the search related
  179. * to it.
  180. * The search is the first word following the keyword.
  181. *
  182. * @param string $input
  183. * @return string
  184. */
  185. private function parseInurlSearch($input) {
  186. if (preg_match_all('/\binurl:(?P<search>[^\s]*)/', $input, $matches)) {
  187. $this->inurl = $matches['search'];
  188. $input = str_replace($matches[0], '', $input);
  189. }
  190. $this->inurl = self::removeEmptyValues($this->inurl);
  191. $this->inurl = self::decodeSpaces($this->inurl);
  192. return $input;
  193. }
  194. private function parseNotInurlSearch($input) {
  195. if (preg_match_all('/[!-]inurl:(?P<search>[^\s]*)/', $input, $matches)) {
  196. $this->not_inurl = $matches['search'];
  197. $input = str_replace($matches[0], '', $input);
  198. }
  199. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  200. $this->not_inurl = self::decodeSpaces($this->not_inurl);
  201. return $input;
  202. }
  203. /**
  204. * Parse the search string to find date keyword and the search related
  205. * to it.
  206. * The search is the first word following the keyword.
  207. *
  208. * @param string $input
  209. * @return string
  210. */
  211. private function parseDateSearch($input) {
  212. if (preg_match_all('/\bdate:(?P<search>[^\s]*)/', $input, $matches)) {
  213. $input = str_replace($matches[0], '', $input);
  214. $dates = self::removeEmptyValues($matches['search']);
  215. if (!empty($dates[0])) {
  216. list($this->min_date, $this->max_date) = parseDateInterval($dates[0]);
  217. }
  218. }
  219. return $input;
  220. }
  221. /**
  222. * Parse the search string to find pubdate keyword and the search related
  223. * to it.
  224. * The search is the first word following the keyword.
  225. *
  226. * @param string $input
  227. * @return string
  228. */
  229. private function parsePubdateSearch($input) {
  230. if (preg_match_all('/\bpubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  231. $input = str_replace($matches[0], '', $input);
  232. $dates = self::removeEmptyValues($matches['search']);
  233. if (!empty($dates[0])) {
  234. list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]);
  235. }
  236. }
  237. return $input;
  238. }
  239. /**
  240. * Parse the search string to find tags keyword (# followed by a word)
  241. * and the search related to it.
  242. * The search is the first word following the #.
  243. *
  244. * @param string $input
  245. * @return string
  246. */
  247. private function parseTagsSearch($input) {
  248. if (preg_match_all('/#(?P<search>[^\s]+)/', $input, $matches)) {
  249. $this->tags = $matches['search'];
  250. $input = str_replace($matches[0], '', $input);
  251. }
  252. $this->tags = self::removeEmptyValues($this->tags);
  253. $this->tags = self::decodeSpaces($this->tags);
  254. return $input;
  255. }
  256. private function parseNotTagsSearch($input) {
  257. if (preg_match_all('/[!-]#(?P<search>[^\s]+)/', $input, $matches)) {
  258. $this->not_tags = $matches['search'];
  259. $input = str_replace($matches[0], '', $input);
  260. }
  261. $this->not_tags = self::removeEmptyValues($this->not_tags);
  262. $this->not_tags = self::decodeSpaces($this->not_tags);
  263. return $input;
  264. }
  265. /**
  266. * Parse the search string to find search values.
  267. * Every word is a distinct search value, except when using a delimiter.
  268. * Supported delimiters are single quote (') and double quotes (").
  269. *
  270. * @param string $input
  271. * @return string
  272. */
  273. private function parseSearch($input) {
  274. $input = self::cleanSearch($input);
  275. if ($input == '') {
  276. return;
  277. }
  278. if (preg_match_all('/(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  279. $this->search = $matches['search'];
  280. $input = str_replace($matches[0], '', $input);
  281. }
  282. $input = self::cleanSearch($input);
  283. if ($input == '') {
  284. return;
  285. }
  286. if (is_array($this->search)) {
  287. $this->search = array_merge($this->search, explode(' ', $input));
  288. } else {
  289. $this->search = explode(' ', $input);
  290. }
  291. $this->search = self::decodeSpaces($this->search);
  292. }
  293. private function parseNotSearch($input) {
  294. $input = self::cleanSearch($input);
  295. if ($input == '') {
  296. return;
  297. }
  298. if (preg_match_all('/[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  299. $this->not_search = $matches['search'];
  300. $input = str_replace($matches[0], '', $input);
  301. }
  302. if ($input == '') {
  303. return;
  304. }
  305. if (preg_match_all('/[!-](?P<search>[^\s]+)/', $input, $matches)) {
  306. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']);
  307. $input = str_replace($matches[0], '', $input);
  308. }
  309. $this->not_search = self::removeEmptyValues($this->not_search);
  310. $this->not_search = self::decodeSpaces($this->not_search);
  311. return $input;
  312. }
  313. /**
  314. * Remove all unnecessary spaces in the search
  315. *
  316. * @param string $input
  317. * @return string
  318. */
  319. private static function cleanSearch($input) {
  320. $input = preg_replace('/\s+/', ' ', $input);
  321. return trim($input);
  322. }
  323. }