BooleanSearch.php 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Contains Boolean search from the search form.
  5. */
  6. class FreshRSS_BooleanSearch {
  7. private string $raw_input = '';
  8. /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
  9. private array $searches = [];
  10. /**
  11. * @phpstan-var 'AND'|'OR'|'AND NOT'
  12. */
  13. private string $operator;
  14. /** @param 'AND'|'OR'|'AND NOT' $operator */
  15. public function __construct(string $input, int $level = 0, string $operator = 'AND') {
  16. $this->operator = $operator;
  17. $input = trim($input);
  18. if ($input == '') {
  19. return;
  20. }
  21. $this->raw_input = $input;
  22. if ($level === 0) {
  23. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  24. $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
  25. $input = $this->parseUserQueryNames($input);
  26. $input = $this->parseUserQueryIds($input);
  27. }
  28. // Either parse everything as a series of BooleanSearch’s combined by implicit AND
  29. // or parse everything as a series of Search’s combined by explicit OR
  30. $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
  31. }
  32. /**
  33. * Parse the user queries (saved searches) by name and expand them in the input string.
  34. */
  35. private function parseUserQueryNames(string $input): string {
  36. $all_matches = [];
  37. if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matchesFound)) {
  38. $all_matches[] = $matchesFound;
  39. }
  40. if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matchesFound)) {
  41. $all_matches[] = $matchesFound;
  42. }
  43. if (!empty($all_matches)) {
  44. /** @var array<string,FreshRSS_UserQuery> */
  45. $queries = [];
  46. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  47. $query = new FreshRSS_UserQuery($raw_query);
  48. $queries[$query->getName()] = $query;
  49. }
  50. $fromS = [];
  51. $toS = [];
  52. foreach ($all_matches as $matches) {
  53. if (empty($matches['search'])) {
  54. continue;
  55. }
  56. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  57. $name = trim($matches['search'][$i]);
  58. if (!empty($queries[$name])) {
  59. $fromS[] = $matches[0][$i];
  60. $toS[] = '(' . trim($queries[$name]->getSearch()->getRawInput()) . ')';
  61. }
  62. }
  63. }
  64. $input = str_replace($fromS, $toS, $input);
  65. }
  66. return $input;
  67. }
  68. /**
  69. * Parse the user queries (saved searches) by ID and expand them in the input string.
  70. */
  71. private function parseUserQueryIds(string $input): string {
  72. $all_matches = [];
  73. if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matchesFound)) {
  74. $all_matches[] = $matchesFound;
  75. }
  76. if (!empty($all_matches)) {
  77. $category_dao = FreshRSS_Factory::createCategoryDao();
  78. $feed_dao = FreshRSS_Factory::createFeedDao();
  79. $tag_dao = FreshRSS_Factory::createTagDao();
  80. /** @var array<string,FreshRSS_UserQuery> */
  81. $queries = [];
  82. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  83. $query = new FreshRSS_UserQuery($raw_query, $feed_dao, $category_dao, $tag_dao);
  84. $queries[] = $query;
  85. }
  86. $fromS = [];
  87. $toS = [];
  88. foreach ($all_matches as $matches) {
  89. if (empty($matches['search'])) {
  90. continue;
  91. }
  92. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  93. // Index starting from 1
  94. $id = (int)(trim($matches['search'][$i])) - 1;
  95. if (!empty($queries[$id])) {
  96. $fromS[] = $matches[0][$i];
  97. $toS[] = '(' . trim($queries[$id]->getSearch()->getRawInput()) . ')';
  98. }
  99. }
  100. }
  101. $input = str_replace($fromS, $toS, $input);
  102. }
  103. return $input;
  104. }
  105. /** @return bool True if some parenthesis logic took over, false otherwise */
  106. private function parseParentheses(string $input, int $level): bool {
  107. $input = trim($input);
  108. $length = strlen($input);
  109. $i = 0;
  110. $before = '';
  111. $hasParenthesis = false;
  112. $nextOperator = 'AND';
  113. while ($i < $length) {
  114. $c = $input[$i];
  115. $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
  116. if ($c === '(' && !$backslashed) {
  117. $hasParenthesis = true;
  118. $before = trim($before);
  119. if (preg_match('/[!-]$/i', $before)) {
  120. // Trim trailing negation
  121. $before = substr($before, 0, -1);
  122. // The text prior to the negation is a BooleanSearch
  123. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  124. if (count($searchBefore->searches()) > 0) {
  125. $this->searches[] = $searchBefore;
  126. }
  127. $before = '';
  128. // The next BooleanSearch will have to be combined with AND NOT instead of default AND
  129. $nextOperator = 'AND NOT';
  130. } elseif (preg_match('/\bOR$/i', $before)) {
  131. // Trim trailing OR
  132. $before = substr($before, 0, -2);
  133. // The text prior to the OR is a BooleanSearch
  134. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  135. if (count($searchBefore->searches()) > 0) {
  136. $this->searches[] = $searchBefore;
  137. }
  138. $before = '';
  139. // The next BooleanSearch will have to be combined with OR instead of default AND
  140. $nextOperator = 'OR';
  141. } elseif ($before !== '') {
  142. // The text prior to the opening parenthesis is a BooleanSearch
  143. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  144. if (count($searchBefore->searches()) > 0) {
  145. $this->searches[] = $searchBefore;
  146. }
  147. $before = '';
  148. }
  149. // Search the matching closing parenthesis
  150. $parentheses = 1;
  151. $sub = '';
  152. $i++;
  153. while ($i < $length) {
  154. $c = $input[$i];
  155. $backslashed = $input[$i - 1] === '\\';
  156. if ($c === '(' && !$backslashed) {
  157. // One nested level deeper
  158. $parentheses++;
  159. $sub .= $c;
  160. } elseif ($c === ')' && !$backslashed) {
  161. $parentheses--;
  162. if ($parentheses === 0) {
  163. // Found the matching closing parenthesis
  164. $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
  165. $nextOperator = 'AND';
  166. if (count($searchSub->searches()) > 0) {
  167. $this->searches[] = $searchSub;
  168. }
  169. $sub = '';
  170. break;
  171. } else {
  172. $sub .= $c;
  173. }
  174. } else {
  175. $sub .= $c;
  176. }
  177. $i++;
  178. }
  179. // $sub = trim($sub);
  180. // if ($sub != '') {
  181. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  182. // }
  183. // } elseif ($c === ')') {
  184. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  185. } else {
  186. $before .= $c;
  187. }
  188. $i++;
  189. }
  190. if ($hasParenthesis) {
  191. $before = trim($before);
  192. if (preg_match('/^OR\b/i', $before)) {
  193. // The next BooleanSearch will have to be combined with OR instead of default AND
  194. $nextOperator = 'OR';
  195. // Trim leading OR
  196. $before = substr($before, 2);
  197. }
  198. // The remaining text after the last parenthesis is a BooleanSearch
  199. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  200. $nextOperator = 'AND';
  201. if (count($searchBefore->searches()) > 0) {
  202. $this->searches[] = $searchBefore;
  203. }
  204. return true;
  205. }
  206. // There was no parenthesis logic to apply
  207. return false;
  208. }
  209. private function parseOrSegments(string $input): void {
  210. $input = trim($input);
  211. if ($input === '') {
  212. return;
  213. }
  214. $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
  215. $segment = '';
  216. $ns = count($splits);
  217. for ($i = 0; $i < $ns; $i++) {
  218. $segment = $segment . $splits[$i];
  219. if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
  220. $segment = '';
  221. } else {
  222. $quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
  223. if ($quotes % 2 === 0) {
  224. $segment = trim($segment);
  225. $this->searches[] = new FreshRSS_Search($segment);
  226. $segment = '';
  227. }
  228. }
  229. }
  230. $segment = trim($segment);
  231. if ($segment != '') {
  232. $this->searches[] = new FreshRSS_Search($segment);
  233. }
  234. }
  235. /**
  236. * Either a list of FreshRSS_BooleanSearch combined by implicit AND
  237. * or a series of FreshRSS_Search combined by explicit OR
  238. * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
  239. */
  240. public function searches(): array {
  241. return $this->searches;
  242. }
  243. /** @return 'AND'|'OR'|'AND NOT' depending on how this BooleanSearch should be combined */
  244. public function operator(): string {
  245. return $this->operator;
  246. }
  247. /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
  248. public function add($search): void {
  249. $this->searches[] = $search;
  250. }
  251. public function __toString(): string {
  252. return $this->getRawInput();
  253. }
  254. public function getRawInput(): string {
  255. return $this->raw_input;
  256. }
  257. }