BooleanSearch.php 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Contains Boolean search from the search form.
  5. */
  6. class FreshRSS_BooleanSearch {
  7. private string $raw_input = '';
  8. /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
  9. private array $searches = [];
  10. /**
  11. * @phpstan-var 'AND'|'OR'|'AND NOT'
  12. */
  13. private string $operator;
  14. /** @param 'AND'|'OR'|'AND NOT' $operator */
  15. public function __construct(string $input, int $level = 0, string $operator = 'AND', bool $allowUserQueries = true) {
  16. $this->operator = $operator;
  17. $input = trim($input);
  18. if ($input === '') {
  19. return;
  20. }
  21. if ($level === 0) {
  22. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  23. if (!is_string($input)) {
  24. return;
  25. }
  26. $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
  27. if (!is_string($input)) {
  28. return;
  29. }
  30. $input = $this->parseUserQueryNames($input, $allowUserQueries);
  31. $input = $this->parseUserQueryIds($input, $allowUserQueries);
  32. $input = trim($input);
  33. }
  34. $this->raw_input = $input;
  35. // Either parse everything as a series of BooleanSearch’s combined by implicit AND
  36. // or parse everything as a series of Search’s combined by explicit OR
  37. $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
  38. }
  39. /**
  40. * Parse the user queries (saved searches) by name and expand them in the input string.
  41. */
  42. private function parseUserQueryNames(string $input, bool $allowUserQueries = true): string {
  43. $all_matches = [];
  44. if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matchesFound)) {
  45. $all_matches[] = $matchesFound;
  46. }
  47. if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matchesFound)) {
  48. $all_matches[] = $matchesFound;
  49. }
  50. if (!empty($all_matches)) {
  51. /** @var array<string,FreshRSS_UserQuery> */
  52. $queries = [];
  53. foreach (FreshRSS_Context::userConf()->queries as $raw_query) {
  54. $query = new FreshRSS_UserQuery($raw_query, FreshRSS_Context::categories(), FreshRSS_Context::labels());
  55. $queries[$query->getName()] = $query;
  56. }
  57. $fromS = [];
  58. $toS = [];
  59. foreach ($all_matches as $matches) {
  60. if (empty($matches['search'])) {
  61. continue;
  62. }
  63. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  64. $name = trim($matches['search'][$i]);
  65. if (!empty($queries[$name])) {
  66. $fromS[] = $matches[0][$i];
  67. if ($allowUserQueries) {
  68. $toS[] = '(' . trim($queries[$name]->getSearch()->getRawInput()) . ')';
  69. } else {
  70. $toS[] = '';
  71. }
  72. }
  73. }
  74. }
  75. $input = str_replace($fromS, $toS, $input);
  76. }
  77. return $input;
  78. }
  79. /**
  80. * Parse the user queries (saved searches) by ID and expand them in the input string.
  81. */
  82. private function parseUserQueryIds(string $input, bool $allowUserQueries = true): string {
  83. $all_matches = [];
  84. if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matchesFound)) {
  85. $all_matches[] = $matchesFound;
  86. }
  87. if (!empty($all_matches)) {
  88. /** @var array<string,FreshRSS_UserQuery> */
  89. $queries = [];
  90. foreach (FreshRSS_Context::userConf()->queries as $raw_query) {
  91. $query = new FreshRSS_UserQuery($raw_query, FreshRSS_Context::categories(), FreshRSS_Context::labels());
  92. $queries[] = $query;
  93. }
  94. $fromS = [];
  95. $toS = [];
  96. foreach ($all_matches as $matches) {
  97. if (empty($matches['search'])) {
  98. continue;
  99. }
  100. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  101. // Index starting from 1
  102. $id = (int)(trim($matches['search'][$i])) - 1;
  103. if (!empty($queries[$id])) {
  104. $fromS[] = $matches[0][$i];
  105. if ($allowUserQueries) {
  106. $toS[] = '(' . trim($queries[$id]->getSearch()->getRawInput()) . ')';
  107. } else {
  108. $toS[] = '';
  109. }
  110. }
  111. }
  112. }
  113. $input = str_replace($fromS, $toS, $input);
  114. }
  115. return $input;
  116. }
  117. /** @return bool True if some parenthesis logic took over, false otherwise */
  118. private function parseParentheses(string $input, int $level): bool {
  119. $input = trim($input);
  120. $length = strlen($input);
  121. $i = 0;
  122. $before = '';
  123. $hasParenthesis = false;
  124. $nextOperator = 'AND';
  125. while ($i < $length) {
  126. $c = $input[$i];
  127. $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
  128. if ($c === '(' && !$backslashed) {
  129. $hasParenthesis = true;
  130. $before = trim($before);
  131. if (preg_match('/[!-]$/i', $before)) {
  132. // Trim trailing negation
  133. $before = substr($before, 0, -1);
  134. // The text prior to the negation is a BooleanSearch
  135. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  136. if (count($searchBefore->searches()) > 0) {
  137. $this->searches[] = $searchBefore;
  138. }
  139. $before = '';
  140. // The next BooleanSearch will have to be combined with AND NOT instead of default AND
  141. $nextOperator = 'AND NOT';
  142. } elseif (preg_match('/\bOR$/i', $before)) {
  143. // Trim trailing OR
  144. $before = substr($before, 0, -2);
  145. // The text prior to the OR is a BooleanSearch
  146. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  147. if (count($searchBefore->searches()) > 0) {
  148. $this->searches[] = $searchBefore;
  149. }
  150. $before = '';
  151. // The next BooleanSearch will have to be combined with OR instead of default AND
  152. $nextOperator = 'OR';
  153. } elseif ($before !== '') {
  154. // The text prior to the opening parenthesis is a BooleanSearch
  155. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  156. if (count($searchBefore->searches()) > 0) {
  157. $this->searches[] = $searchBefore;
  158. }
  159. $before = '';
  160. }
  161. // Search the matching closing parenthesis
  162. $parentheses = 1;
  163. $sub = '';
  164. $i++;
  165. while ($i < $length) {
  166. $c = $input[$i];
  167. $backslashed = $input[$i - 1] === '\\';
  168. if ($c === '(' && !$backslashed) {
  169. // One nested level deeper
  170. $parentheses++;
  171. $sub .= $c;
  172. } elseif ($c === ')' && !$backslashed) {
  173. $parentheses--;
  174. if ($parentheses === 0) {
  175. // Found the matching closing parenthesis
  176. $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
  177. $nextOperator = 'AND';
  178. if (count($searchSub->searches()) > 0) {
  179. $this->searches[] = $searchSub;
  180. }
  181. $sub = '';
  182. break;
  183. } else {
  184. $sub .= $c;
  185. }
  186. } else {
  187. $sub .= $c;
  188. }
  189. $i++;
  190. }
  191. // $sub = trim($sub);
  192. // if ($sub != '') {
  193. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  194. // }
  195. // } elseif ($c === ')') {
  196. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  197. } else {
  198. $before .= $c;
  199. }
  200. $i++;
  201. }
  202. if ($hasParenthesis) {
  203. $before = trim($before);
  204. if (preg_match('/^OR\b/i', $before)) {
  205. // The next BooleanSearch will have to be combined with OR instead of default AND
  206. $nextOperator = 'OR';
  207. // Trim leading OR
  208. $before = substr($before, 2);
  209. }
  210. // The remaining text after the last parenthesis is a BooleanSearch
  211. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  212. $nextOperator = 'AND';
  213. if (count($searchBefore->searches()) > 0) {
  214. $this->searches[] = $searchBefore;
  215. }
  216. return true;
  217. }
  218. // There was no parenthesis logic to apply
  219. return false;
  220. }
  221. private function parseOrSegments(string $input): void {
  222. $input = trim($input);
  223. if ($input === '') {
  224. return;
  225. }
  226. $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
  227. $segment = '';
  228. $ns = count($splits);
  229. for ($i = 0; $i < $ns; $i++) {
  230. $segment = $segment . $splits[$i];
  231. if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
  232. $segment = '';
  233. } else {
  234. $quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
  235. if ($quotes % 2 === 0) {
  236. $segment = trim($segment);
  237. $this->searches[] = new FreshRSS_Search($segment);
  238. $segment = '';
  239. }
  240. }
  241. }
  242. $segment = trim($segment);
  243. if ($segment != '') {
  244. $this->searches[] = new FreshRSS_Search($segment);
  245. }
  246. }
  247. /**
  248. * Either a list of FreshRSS_BooleanSearch combined by implicit AND
  249. * or a series of FreshRSS_Search combined by explicit OR
  250. * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
  251. */
  252. public function searches(): array {
  253. return $this->searches;
  254. }
  255. /** @return 'AND'|'OR'|'AND NOT' depending on how this BooleanSearch should be combined */
  256. public function operator(): string {
  257. return $this->operator;
  258. }
  259. /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
  260. public function add($search): void {
  261. $this->searches[] = $search;
  262. }
  263. #[\Override]
  264. public function __toString(): string {
  265. return $this->getRawInput();
  266. }
  267. public function getRawInput(): string {
  268. return $this->raw_input;
  269. }
  270. }