Search.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. <?php
  2. require_once(LIB_PATH . '/lib_date.php');
  3. /**
  4. * Contains a search from the search form.
  5. *
  6. * It allows to extract meaningful bits of the search and store them in a
  7. * convenient object
  8. */
  9. class FreshRSS_Search {
  10. // This contains the user input string
  11. private $raw_input = '';
  12. // The following properties are extracted from the raw input
  13. private $intitle;
  14. private $min_date;
  15. private $max_date;
  16. private $min_pubdate;
  17. private $max_pubdate;
  18. private $inurl;
  19. private $author;
  20. private $tags;
  21. private $search;
  22. private $not_intitle;
  23. private $not_inurl;
  24. private $not_author;
  25. private $not_tags;
  26. private $not_search;
  27. public function __construct($input) {
  28. if ($input == '') {
  29. return;
  30. }
  31. $this->raw_input = $input;
  32. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  33. $input = $this->parseNotIntitleSearch($input);
  34. $input = $this->parseNotAuthorSearch($input);
  35. $input = $this->parseNotInurlSearch($input);
  36. $input = $this->parseNotTagsSeach($input);
  37. $input = $this->parsePubdateSearch($input);
  38. $input = $this->parseDateSearch($input);
  39. $input = $this->parseIntitleSearch($input);
  40. $input = $this->parseAuthorSearch($input);
  41. $input = $this->parseInurlSearch($input);
  42. $input = $this->parseTagsSeach($input);
  43. $input = $this->parseNotSearch($input);
  44. $input = $this->parseSearch($input);
  45. }
  46. public function __toString() {
  47. return $this->getRawInput();
  48. }
  49. public function getRawInput() {
  50. return $this->raw_input;
  51. }
  52. public function getIntitle() {
  53. return $this->intitle;
  54. }
  55. public function getNotIntitle() {
  56. return $this->not_intitle;
  57. }
  58. public function getMinDate() {
  59. return $this->min_date;
  60. }
  61. public function getMaxDate() {
  62. return $this->max_date;
  63. }
  64. public function getMinPubdate() {
  65. return $this->min_pubdate;
  66. }
  67. public function getMaxPubdate() {
  68. return $this->max_pubdate;
  69. }
  70. public function getInurl() {
  71. return $this->inurl;
  72. }
  73. public function getNotInurl() {
  74. return $this->not_inurl;
  75. }
  76. public function getAuthor() {
  77. return $this->author;
  78. }
  79. public function getNotAuthor() {
  80. return $this->not_author;
  81. }
  82. public function getTags() {
  83. return $this->tags;
  84. }
  85. public function getNotTags() {
  86. return $this->not_tags;
  87. }
  88. public function getSearch() {
  89. return $this->search;
  90. }
  91. public function getNotSearch() {
  92. return $this->not_search;
  93. }
  94. private static function removeEmptyValues($anArray) {
  95. return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array();
  96. }
  97. /**
  98. * Parse the search string to find intitle keyword and the search related
  99. * to it.
  100. * The search is the first word following the keyword.
  101. *
  102. * @param string $input
  103. * @return string
  104. */
  105. private function parseIntitleSearch($input) {
  106. if (preg_match_all('/\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  107. $this->intitle = $matches['search'];
  108. $input = str_replace($matches[0], '', $input);
  109. }
  110. if (preg_match_all('/\bintitle:(?P<search>\w*)/', $input, $matches)) {
  111. $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']);
  112. $input = str_replace($matches[0], '', $input);
  113. }
  114. $this->intitle = self::removeEmptyValues($this->intitle);
  115. return $input;
  116. }
  117. private function parseNotIntitleSearch($input) {
  118. if (preg_match_all('/[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  119. $this->not_intitle = $matches['search'];
  120. $input = str_replace($matches[0], '', $input);
  121. }
  122. if (preg_match_all('/[!-]intitle:(?P<search>\w*)/', $input, $matches)) {
  123. $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']);
  124. $input = str_replace($matches[0], '', $input);
  125. }
  126. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  127. return $input;
  128. }
  129. /**
  130. * Parse the search string to find author keyword and the search related
  131. * to it.
  132. * The search is the first word following the keyword except when using
  133. * a delimiter. Supported delimiters are single quote (') and double
  134. * quotes (").
  135. *
  136. * @param string $input
  137. * @return string
  138. */
  139. private function parseAuthorSearch($input) {
  140. if (preg_match_all('/\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  141. $this->author = $matches['search'];
  142. $input = str_replace($matches[0], '', $input);
  143. }
  144. if (preg_match_all('/\bauthor:(?P<search>\w*)/', $input, $matches)) {
  145. $this->author = array_merge($this->author ? $this->author : array(), $matches['search']);
  146. $input = str_replace($matches[0], '', $input);
  147. }
  148. $this->author = self::removeEmptyValues($this->author);
  149. return $input;
  150. }
  151. private function parseNotAuthorSearch($input) {
  152. if (preg_match_all('/[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  153. $this->not_author = $matches['search'];
  154. $input = str_replace($matches[0], '', $input);
  155. }
  156. if (preg_match_all('/[!-]author:(?P<search>\w*)/', $input, $matches)) {
  157. $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']);
  158. $input = str_replace($matches[0], '', $input);
  159. }
  160. $this->not_author = self::removeEmptyValues($this->not_author);
  161. return $input;
  162. }
  163. /**
  164. * Parse the search string to find inurl keyword and the search related
  165. * to it.
  166. * The search is the first word following the keyword.
  167. *
  168. * @param string $input
  169. * @return string
  170. */
  171. private function parseInurlSearch($input) {
  172. if (preg_match_all('/\binurl:(?P<search>[^\s]*)/', $input, $matches)) {
  173. $this->inurl = $matches['search'];
  174. $input = str_replace($matches[0], '', $input);
  175. }
  176. $this->inurl = self::removeEmptyValues($this->inurl);
  177. return $input;
  178. }
  179. private function parseNotInurlSearch($input) {
  180. if (preg_match_all('/[!-]inurl:(?P<search>[^\s]*)/', $input, $matches)) {
  181. $this->not_inurl = $matches['search'];
  182. $input = str_replace($matches[0], '', $input);
  183. }
  184. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  185. return $input;
  186. }
  187. /**
  188. * Parse the search string to find date keyword and the search related
  189. * to it.
  190. * The search is the first word following the keyword.
  191. *
  192. * @param string $input
  193. * @return string
  194. */
  195. private function parseDateSearch($input) {
  196. if (preg_match_all('/\bdate:(?P<search>[^\s]*)/', $input, $matches)) {
  197. $input = str_replace($matches[0], '', $input);
  198. $dates = self::removeEmptyValues($matches['search']);
  199. if (!empty($dates[0])) {
  200. list($this->min_date, $this->max_date) = parseDateInterval($dates[0]);
  201. }
  202. }
  203. return $input;
  204. }
  205. /**
  206. * Parse the search string to find pubdate keyword and the search related
  207. * to it.
  208. * The search is the first word following the keyword.
  209. *
  210. * @param string $input
  211. * @return string
  212. */
  213. private function parsePubdateSearch($input) {
  214. if (preg_match_all('/\bpubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  215. $input = str_replace($matches[0], '', $input);
  216. $dates = self::removeEmptyValues($matches['search']);
  217. if (!empty($dates[0])) {
  218. list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]);
  219. }
  220. }
  221. return $input;
  222. }
  223. /**
  224. * Parse the search string to find tags keyword (# followed by a word)
  225. * and the search related to it.
  226. * The search is the first word following the #.
  227. *
  228. * @param string $input
  229. * @return string
  230. */
  231. private function parseTagsSeach($input) {
  232. if (preg_match_all('/#(?P<search>[^\s]+)/', $input, $matches)) {
  233. $this->tags = $matches['search'];
  234. $input = str_replace($matches[0], '', $input);
  235. }
  236. $this->tags = self::removeEmptyValues($this->tags);
  237. return $input;
  238. }
  239. private function parseNotTagsSeach($input) {
  240. if (preg_match_all('/[!-]#(?P<search>[^\s]+)/', $input, $matches)) {
  241. $this->not_tags = $matches['search'];
  242. $input = str_replace($matches[0], '', $input);
  243. }
  244. $this->not_tags = self::removeEmptyValues($this->not_tags);
  245. return $input;
  246. }
  247. /**
  248. * Parse the search string to find search values.
  249. * Every word is a distinct search value, except when using a delimiter.
  250. * Supported delimiters are single quote (') and double quotes (").
  251. *
  252. * @param string $input
  253. * @return string
  254. */
  255. private function parseSearch($input) {
  256. $input = self::cleanSearch($input);
  257. if ($input == '') {
  258. return;
  259. }
  260. if (preg_match_all('/(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  261. $this->search = $matches['search'];
  262. $input = str_replace($matches[0], '', $input);
  263. }
  264. $input = self::cleanSearch($input);
  265. if ($input == '') {
  266. return;
  267. }
  268. if (is_array($this->search)) {
  269. $this->search = array_merge($this->search, explode(' ', $input));
  270. } else {
  271. $this->search = explode(' ', $input);
  272. }
  273. }
  274. private function parseNotSearch($input) {
  275. $input = self::cleanSearch($input);
  276. if ($input == '') {
  277. return;
  278. }
  279. if (preg_match_all('/[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  280. $this->not_search = $matches['search'];
  281. $input = str_replace($matches[0], '', $input);
  282. }
  283. if ($input == '') {
  284. return;
  285. }
  286. if (preg_match_all('/[!-](?P<search>[^\s]+)/', $input, $matches)) {
  287. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']);
  288. $input = str_replace($matches[0], '', $input);
  289. }
  290. $this->not_search = self::removeEmptyValues($this->not_search);
  291. return $input;
  292. }
  293. /**
  294. * Remove all unnecessary spaces in the search
  295. *
  296. * @param string $input
  297. * @return string
  298. */
  299. private static function cleanSearch($input) {
  300. $input = preg_replace('/\s+/', ' ', $input);
  301. return trim($input);
  302. }
  303. }