Search.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. <?php
  2. require_once(LIB_PATH . '/lib_date.php');
  3. /**
  4. * Contains a search from the search form.
  5. *
  6. * It allows to extract meaningful bits of the search and store them in a
  7. * convenient object
  8. */
  9. class FreshRSS_Search {
  10. // This contains the user input string
  11. private $raw_input = '';
  12. // The following properties are extracted from the raw input
  13. private $intitle;
  14. private $min_date;
  15. private $max_date;
  16. private $min_pubdate;
  17. private $max_pubdate;
  18. private $inurl;
  19. private $author;
  20. private $tags;
  21. private $search;
  22. private $not_intitle;
  23. private $not_inurl;
  24. private $not_author;
  25. private $not_tags;
  26. private $not_search;
  27. public function __construct($input) {
  28. if ($input == '') {
  29. return;
  30. }
  31. $this->raw_input = $input;
  32. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  33. $input = $this->parseNotIntitleSearch($input);
  34. $input = $this->parseNotAuthorSearch($input);
  35. $input = $this->parseNotInurlSearch($input);
  36. $input = $this->parseNotTagsSearch($input);
  37. $input = $this->parsePubdateSearch($input);
  38. $input = $this->parseDateSearch($input);
  39. $input = $this->parseIntitleSearch($input);
  40. $input = $this->parseAuthorSearch($input);
  41. $input = $this->parseInurlSearch($input);
  42. $input = $this->parseTagsSearch($input);
  43. $input = $this->parseNotSearch($input);
  44. $input = $this->parseSearch($input);
  45. }
  46. public function __toString() {
  47. return $this->getRawInput();
  48. }
  49. public function getRawInput() {
  50. return $this->raw_input;
  51. }
  52. public function getIntitle() {
  53. return $this->intitle;
  54. }
  55. public function getNotIntitle() {
  56. return $this->not_intitle;
  57. }
  58. public function getMinDate() {
  59. return $this->min_date;
  60. }
  61. public function setMinDate($value) {
  62. return $this->min_date = $value;
  63. }
  64. public function getMaxDate() {
  65. return $this->max_date;
  66. }
  67. public function setMaxDate($value) {
  68. return $this->max_date = $value;
  69. }
  70. public function getMinPubdate() {
  71. return $this->min_pubdate;
  72. }
  73. public function getMaxPubdate() {
  74. return $this->max_pubdate;
  75. }
  76. public function getInurl() {
  77. return $this->inurl;
  78. }
  79. public function getNotInurl() {
  80. return $this->not_inurl;
  81. }
  82. public function getAuthor() {
  83. return $this->author;
  84. }
  85. public function getNotAuthor() {
  86. return $this->not_author;
  87. }
  88. public function getTags() {
  89. return $this->tags;
  90. }
  91. public function getNotTags() {
  92. return $this->not_tags;
  93. }
  94. public function getSearch() {
  95. return $this->search;
  96. }
  97. public function getNotSearch() {
  98. return $this->not_search;
  99. }
  100. private static function removeEmptyValues($anArray) {
  101. return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array();
  102. }
  103. private static function decodeSpaces($value) {
  104. if (is_array($value)) {
  105. for ($i = count($value) - 1; $i >= 0; $i--) {
  106. $value[$i] = self::decodeSpaces($value[$i]);
  107. }
  108. } else {
  109. $value = trim(str_replace('+', ' ', $value));
  110. }
  111. return $value;
  112. }
  113. /**
  114. * Parse the search string to find intitle keyword and the search related
  115. * to it.
  116. * The search is the first word following the keyword.
  117. *
  118. * @param string $input
  119. * @return string
  120. */
  121. private function parseIntitleSearch($input) {
  122. if (preg_match_all('/\bintitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  123. $this->intitle = $matches['search'];
  124. $input = str_replace($matches[0], '', $input);
  125. }
  126. if (preg_match_all('/\bintitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  127. $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']);
  128. $input = str_replace($matches[0], '', $input);
  129. }
  130. $this->intitle = self::removeEmptyValues($this->intitle);
  131. $this->intitle = self::decodeSpaces($this->intitle);
  132. return $input;
  133. }
  134. private function parseNotIntitleSearch($input) {
  135. if (preg_match_all('/[!-]intitle:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  136. $this->not_intitle = $matches['search'];
  137. $input = str_replace($matches[0], '', $input);
  138. }
  139. if (preg_match_all('/[!-]intitle:(?P<search>[^\s"]*)/', $input, $matches)) {
  140. $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']);
  141. $input = str_replace($matches[0], '', $input);
  142. }
  143. $this->not_intitle = self::removeEmptyValues($this->not_intitle);
  144. $this->not_intitle = self::decodeSpaces($this->not_intitle);
  145. return $input;
  146. }
  147. /**
  148. * Parse the search string to find author keyword and the search related
  149. * to it.
  150. * The search is the first word following the keyword except when using
  151. * a delimiter. Supported delimiters are single quote (') and double
  152. * quotes (").
  153. *
  154. * @param string $input
  155. * @return string
  156. */
  157. private function parseAuthorSearch($input) {
  158. if (preg_match_all('/\bauthor:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  159. $this->author = $matches['search'];
  160. $input = str_replace($matches[0], '', $input);
  161. }
  162. if (preg_match_all('/\bauthor:(?P<search>[^\s"]*)/', $input, $matches)) {
  163. $this->author = array_merge($this->author ? $this->author : array(), $matches['search']);
  164. $input = str_replace($matches[0], '', $input);
  165. }
  166. $this->author = self::removeEmptyValues($this->author);
  167. $this->author = self::decodeSpaces($this->author);
  168. return $input;
  169. }
  170. private function parseNotAuthorSearch($input) {
  171. if (preg_match_all('/[!-]author:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  172. $this->not_author = $matches['search'];
  173. $input = str_replace($matches[0], '', $input);
  174. }
  175. if (preg_match_all('/[!-]author:(?P<search>[^\s"]*)/', $input, $matches)) {
  176. $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']);
  177. $input = str_replace($matches[0], '', $input);
  178. }
  179. $this->not_author = self::removeEmptyValues($this->not_author);
  180. $this->not_author = self::decodeSpaces($this->not_author);
  181. return $input;
  182. }
  183. /**
  184. * Parse the search string to find inurl keyword and the search related
  185. * to it.
  186. * The search is the first word following the keyword.
  187. *
  188. * @param string $input
  189. * @return string
  190. */
  191. private function parseInurlSearch($input) {
  192. if (preg_match_all('/\binurl:(?P<search>[^\s]*)/', $input, $matches)) {
  193. $this->inurl = $matches['search'];
  194. $input = str_replace($matches[0], '', $input);
  195. }
  196. $this->inurl = self::removeEmptyValues($this->inurl);
  197. $this->inurl = self::decodeSpaces($this->inurl);
  198. return $input;
  199. }
  200. private function parseNotInurlSearch($input) {
  201. if (preg_match_all('/[!-]inurl:(?P<search>[^\s]*)/', $input, $matches)) {
  202. $this->not_inurl = $matches['search'];
  203. $input = str_replace($matches[0], '', $input);
  204. }
  205. $this->not_inurl = self::removeEmptyValues($this->not_inurl);
  206. $this->not_inurl = self::decodeSpaces($this->not_inurl);
  207. return $input;
  208. }
  209. /**
  210. * Parse the search string to find date keyword and the search related
  211. * to it.
  212. * The search is the first word following the keyword.
  213. *
  214. * @param string $input
  215. * @return string
  216. */
  217. private function parseDateSearch($input) {
  218. if (preg_match_all('/\bdate:(?P<search>[^\s]*)/', $input, $matches)) {
  219. $input = str_replace($matches[0], '', $input);
  220. $dates = self::removeEmptyValues($matches['search']);
  221. if (!empty($dates[0])) {
  222. list($this->min_date, $this->max_date) = parseDateInterval($dates[0]);
  223. }
  224. }
  225. return $input;
  226. }
  227. /**
  228. * Parse the search string to find pubdate keyword and the search related
  229. * to it.
  230. * The search is the first word following the keyword.
  231. *
  232. * @param string $input
  233. * @return string
  234. */
  235. private function parsePubdateSearch($input) {
  236. if (preg_match_all('/\bpubdate:(?P<search>[^\s]*)/', $input, $matches)) {
  237. $input = str_replace($matches[0], '', $input);
  238. $dates = self::removeEmptyValues($matches['search']);
  239. if (!empty($dates[0])) {
  240. list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]);
  241. }
  242. }
  243. return $input;
  244. }
  245. /**
  246. * Parse the search string to find tags keyword (# followed by a word)
  247. * and the search related to it.
  248. * The search is the first word following the #.
  249. *
  250. * @param string $input
  251. * @return string
  252. */
  253. private function parseTagsSearch($input) {
  254. if (preg_match_all('/#(?P<search>[^\s]+)/', $input, $matches)) {
  255. $this->tags = $matches['search'];
  256. $input = str_replace($matches[0], '', $input);
  257. }
  258. $this->tags = self::removeEmptyValues($this->tags);
  259. $this->tags = self::decodeSpaces($this->tags);
  260. return $input;
  261. }
  262. private function parseNotTagsSearch($input) {
  263. if (preg_match_all('/[!-]#(?P<search>[^\s]+)/', $input, $matches)) {
  264. $this->not_tags = $matches['search'];
  265. $input = str_replace($matches[0], '', $input);
  266. }
  267. $this->not_tags = self::removeEmptyValues($this->not_tags);
  268. $this->not_tags = self::decodeSpaces($this->not_tags);
  269. return $input;
  270. }
  271. /**
  272. * Parse the search string to find search values.
  273. * Every word is a distinct search value, except when using a delimiter.
  274. * Supported delimiters are single quote (') and double quotes (").
  275. *
  276. * @param string $input
  277. * @return string
  278. */
  279. private function parseSearch($input) {
  280. $input = self::cleanSearch($input);
  281. if ($input == '') {
  282. return;
  283. }
  284. if (preg_match_all('/(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  285. $this->search = $matches['search'];
  286. $input = str_replace($matches[0], '', $input);
  287. }
  288. $input = self::cleanSearch($input);
  289. if ($input == '') {
  290. return;
  291. }
  292. if (is_array($this->search)) {
  293. $this->search = array_merge($this->search, explode(' ', $input));
  294. } else {
  295. $this->search = explode(' ', $input);
  296. }
  297. $this->search = self::decodeSpaces($this->search);
  298. }
  299. private function parseNotSearch($input) {
  300. $input = self::cleanSearch($input);
  301. if ($input == '') {
  302. return;
  303. }
  304. if (preg_match_all('/[!-](?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  305. $this->not_search = $matches['search'];
  306. $input = str_replace($matches[0], '', $input);
  307. }
  308. if ($input == '') {
  309. return;
  310. }
  311. if (preg_match_all('/[!-](?P<search>[^\s]+)/', $input, $matches)) {
  312. $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']);
  313. $input = str_replace($matches[0], '', $input);
  314. }
  315. $this->not_search = self::removeEmptyValues($this->not_search);
  316. $this->not_search = self::decodeSpaces($this->not_search);
  317. return $input;
  318. }
  319. /**
  320. * Remove all unnecessary spaces in the search
  321. *
  322. * @param string $input
  323. * @return string
  324. */
  325. private static function cleanSearch($input) {
  326. $input = preg_replace('/\s+/', ' ', $input);
  327. return trim($input);
  328. }
  329. }