BooleanSearch.php 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. <?php
  2. /**
  3. * Contains Boolean search from the search form.
  4. */
  5. class FreshRSS_BooleanSearch {
  6. private string $raw_input = '';
  7. /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
  8. private array $searches = [];
  9. /**
  10. * @phpstan-var 'AND'|'OR'|'AND NOT'
  11. */
  12. private string $operator;
  13. /** @param 'AND'|'OR'|'AND NOT' $operator */
  14. public function __construct(string $input, int $level = 0, string $operator = 'AND') {
  15. $this->operator = $operator;
  16. $input = trim($input);
  17. if ($input == '') {
  18. return;
  19. }
  20. $this->raw_input = $input;
  21. if ($level === 0) {
  22. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  23. $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
  24. $input = $this->parseUserQueryNames($input);
  25. $input = $this->parseUserQueryIds($input);
  26. }
  27. // Either parse everything as a series of BooleanSearch’s combined by implicit AND
  28. // or parse everything as a series of Search’s combined by explicit OR
  29. $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
  30. }
  31. /**
  32. * Parse the user queries (saved searches) by name and expand them in the input string.
  33. */
  34. private function parseUserQueryNames(string $input): string {
  35. $all_matches = [];
  36. if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matchesFound)) {
  37. $all_matches[] = $matchesFound;
  38. }
  39. if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matchesFound)) {
  40. $all_matches[] = $matchesFound;
  41. }
  42. if (!empty($all_matches)) {
  43. /** @var array<string,FreshRSS_UserQuery> */
  44. $queries = [];
  45. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  46. $query = new FreshRSS_UserQuery($raw_query);
  47. $queries[$query->getName()] = $query;
  48. }
  49. $fromS = [];
  50. $toS = [];
  51. foreach ($all_matches as $matches) {
  52. if (empty($matches['search'])) {
  53. continue;
  54. }
  55. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  56. $name = trim($matches['search'][$i]);
  57. if (!empty($queries[$name])) {
  58. $fromS[] = $matches[0][$i];
  59. $toS[] = '(' . trim($queries[$name]->getSearch()) . ')';
  60. }
  61. }
  62. }
  63. $input = str_replace($fromS, $toS, $input);
  64. }
  65. return $input;
  66. }
  67. /**
  68. * Parse the user queries (saved searches) by ID and expand them in the input string.
  69. */
  70. private function parseUserQueryIds(string $input): string {
  71. $all_matches = [];
  72. if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matchesFound)) {
  73. $all_matches[] = $matchesFound;
  74. }
  75. if (!empty($all_matches)) {
  76. $category_dao = FreshRSS_Factory::createCategoryDao();
  77. $feed_dao = FreshRSS_Factory::createFeedDao();
  78. $tag_dao = FreshRSS_Factory::createTagDao();
  79. /** @var array<string,FreshRSS_UserQuery> */
  80. $queries = [];
  81. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  82. $query = new FreshRSS_UserQuery($raw_query, $feed_dao, $category_dao, $tag_dao);
  83. $queries[] = $query;
  84. }
  85. $fromS = [];
  86. $toS = [];
  87. foreach ($all_matches as $matches) {
  88. if (empty($matches['search'])) {
  89. continue;
  90. }
  91. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  92. // Index starting from 1
  93. $id = (int)(trim($matches['search'][$i])) - 1;
  94. if (!empty($queries[$id])) {
  95. $fromS[] = $matches[0][$i];
  96. $toS[] = '(' . trim($queries[$id]->getSearch()) . ')';
  97. }
  98. }
  99. }
  100. $input = str_replace($fromS, $toS, $input);
  101. }
  102. return $input;
  103. }
  104. /** @return bool True if some parenthesis logic took over, false otherwise */
  105. private function parseParentheses(string $input, int $level): bool {
  106. $input = trim($input);
  107. $length = strlen($input);
  108. $i = 0;
  109. $before = '';
  110. $hasParenthesis = false;
  111. $nextOperator = 'AND';
  112. while ($i < $length) {
  113. $c = $input[$i];
  114. $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
  115. if ($c === '(' && !$backslashed) {
  116. $hasParenthesis = true;
  117. $before = trim($before);
  118. if (preg_match('/[!-]$/i', $before)) {
  119. // Trim trailing negation
  120. $before = substr($before, 0, -1);
  121. // The text prior to the negation is a BooleanSearch
  122. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  123. if (count($searchBefore->searches()) > 0) {
  124. $this->searches[] = $searchBefore;
  125. }
  126. $before = '';
  127. // The next BooleanSearch will have to be combined with AND NOT instead of default AND
  128. $nextOperator = 'AND NOT';
  129. } elseif (preg_match('/\bOR$/i', $before)) {
  130. // Trim trailing OR
  131. $before = substr($before, 0, -2);
  132. // The text prior to the OR is a BooleanSearch
  133. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  134. if (count($searchBefore->searches()) > 0) {
  135. $this->searches[] = $searchBefore;
  136. }
  137. $before = '';
  138. // The next BooleanSearch will have to be combined with OR instead of default AND
  139. $nextOperator = 'OR';
  140. } elseif ($before !== '') {
  141. // The text prior to the opening parenthesis is a BooleanSearch
  142. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  143. if (count($searchBefore->searches()) > 0) {
  144. $this->searches[] = $searchBefore;
  145. }
  146. $before = '';
  147. }
  148. // Search the matching closing parenthesis
  149. $parentheses = 1;
  150. $sub = '';
  151. $i++;
  152. while ($i < $length) {
  153. $c = $input[$i];
  154. $backslashed = $input[$i - 1] === '\\';
  155. if ($c === '(' && !$backslashed) {
  156. // One nested level deeper
  157. $parentheses++;
  158. $sub .= $c;
  159. } elseif ($c === ')' && !$backslashed) {
  160. $parentheses--;
  161. if ($parentheses === 0) {
  162. // Found the matching closing parenthesis
  163. $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
  164. $nextOperator = 'AND';
  165. if (count($searchSub->searches()) > 0) {
  166. $this->searches[] = $searchSub;
  167. }
  168. $sub = '';
  169. break;
  170. } else {
  171. $sub .= $c;
  172. }
  173. } else {
  174. $sub .= $c;
  175. }
  176. $i++;
  177. }
  178. // $sub = trim($sub);
  179. // if ($sub != '') {
  180. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  181. // }
  182. // } elseif ($c === ')') {
  183. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  184. } else {
  185. $before .= $c;
  186. }
  187. $i++;
  188. }
  189. if ($hasParenthesis) {
  190. $before = trim($before);
  191. if (preg_match('/^OR\b/i', $before)) {
  192. // The next BooleanSearch will have to be combined with OR instead of default AND
  193. $nextOperator = 'OR';
  194. // Trim leading OR
  195. $before = substr($before, 2);
  196. }
  197. // The remaining text after the last parenthesis is a BooleanSearch
  198. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  199. $nextOperator = 'AND';
  200. if (count($searchBefore->searches()) > 0) {
  201. $this->searches[] = $searchBefore;
  202. }
  203. return true;
  204. }
  205. // There was no parenthesis logic to apply
  206. return false;
  207. }
  208. private function parseOrSegments(string $input): void {
  209. $input = trim($input);
  210. if ($input === '') {
  211. return;
  212. }
  213. $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
  214. $segment = '';
  215. $ns = count($splits);
  216. for ($i = 0; $i < $ns; $i++) {
  217. $segment = $segment . $splits[$i];
  218. if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
  219. $segment = '';
  220. } else {
  221. $quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
  222. if ($quotes % 2 === 0) {
  223. $segment = trim($segment);
  224. $this->searches[] = new FreshRSS_Search($segment);
  225. $segment = '';
  226. }
  227. }
  228. }
  229. $segment = trim($segment);
  230. if ($segment != '') {
  231. $this->searches[] = new FreshRSS_Search($segment);
  232. }
  233. }
  234. /**
  235. * Either a list of FreshRSS_BooleanSearch combined by implicit AND
  236. * or a series of FreshRSS_Search combined by explicit OR
  237. * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
  238. */
  239. public function searches(): array {
  240. return $this->searches;
  241. }
  242. /** @return 'AND'|'OR'|'AND NOT' depending on how this BooleanSearch should be combined */
  243. public function operator(): string {
  244. return $this->operator;
  245. }
  246. /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
  247. public function add($search): void {
  248. $this->searches[] = $search;
  249. }
  250. public function __toString(): string {
  251. return $this->getRawInput();
  252. }
  253. public function getRawInput(): string {
  254. return $this->raw_input;
  255. }
  256. }