BooleanSearch.php 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. <?php
  2. /**
  3. * Contains Boolean search from the search form.
  4. */
  5. class FreshRSS_BooleanSearch {
  6. /** @var string */
  7. private $raw_input = '';
  8. /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
  9. private $searches = [];
  10. /**
  11. * @phpstan-var 'AND'|'OR'|'AND NOT'
  12. * @var string
  13. */
  14. private $operator;
  15. /** @param 'AND'|'OR'|'AND NOT' $operator */
  16. public function __construct(string $input, int $level = 0, string $operator = 'AND') {
  17. $this->operator = $operator;
  18. $input = trim($input);
  19. if ($input == '') {
  20. return;
  21. }
  22. $this->raw_input = $input;
  23. if ($level === 0) {
  24. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  25. $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
  26. $input = $this->parseUserQueryNames($input);
  27. $input = $this->parseUserQueryIds($input);
  28. }
  29. // Either parse everything as a series of BooleanSearch’s combined by implicit AND
  30. // or parse everything as a series of Search’s combined by explicit OR
  31. $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
  32. }
  33. /**
  34. * Parse the user queries (saved searches) by name and expand them in the input string.
  35. */
  36. private function parseUserQueryNames(string $input): string {
  37. $all_matches = [];
  38. if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matchesFound)) {
  39. $all_matches[] = $matchesFound;
  40. }
  41. if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matchesFound)) {
  42. $all_matches[] = $matchesFound;
  43. }
  44. if (!empty($all_matches)) {
  45. /** @var array<string,FreshRSS_UserQuery> */
  46. $queries = [];
  47. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  48. $query = new FreshRSS_UserQuery($raw_query);
  49. $queries[$query->getName()] = $query;
  50. }
  51. $fromS = [];
  52. $toS = [];
  53. foreach ($all_matches as $matches) {
  54. if (empty($matches['search'])) {
  55. continue;
  56. }
  57. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  58. $name = trim($matches['search'][$i]);
  59. if (!empty($queries[$name])) {
  60. $fromS[] = $matches[0][$i];
  61. $toS[] = '(' . trim($queries[$name]->getSearch()) . ')';
  62. }
  63. }
  64. }
  65. $input = str_replace($fromS, $toS, $input);
  66. }
  67. return $input;
  68. }
  69. /**
  70. * Parse the user queries (saved searches) by ID and expand them in the input string.
  71. */
  72. private function parseUserQueryIds(string $input): string {
  73. $all_matches = [];
  74. if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matchesFound)) {
  75. $all_matches[] = $matchesFound;
  76. }
  77. if (!empty($all_matches)) {
  78. /** @var array<string,FreshRSS_UserQuery> */
  79. $queries = [];
  80. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  81. $query = new FreshRSS_UserQuery($raw_query);
  82. $queries[] = $query;
  83. }
  84. $fromS = [];
  85. $toS = [];
  86. foreach ($all_matches as $matches) {
  87. if (empty($matches['search'])) {
  88. continue;
  89. }
  90. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  91. // Index starting from 1
  92. $id = (int)(trim($matches['search'][$i])) - 1;
  93. if (!empty($queries[$id])) {
  94. $fromS[] = $matches[0][$i];
  95. $toS[] = '(' . trim($queries[$id]->getSearch()) . ')';
  96. }
  97. }
  98. }
  99. $input = str_replace($fromS, $toS, $input);
  100. }
  101. return $input;
  102. }
  103. /** @return bool True if some parenthesis logic took over, false otherwise */
  104. private function parseParentheses(string $input, int $level): bool {
  105. $input = trim($input);
  106. $length = strlen($input);
  107. $i = 0;
  108. $before = '';
  109. $hasParenthesis = false;
  110. $nextOperator = 'AND';
  111. while ($i < $length) {
  112. $c = $input[$i];
  113. $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
  114. if ($c === '(' && !$backslashed) {
  115. $hasParenthesis = true;
  116. $before = trim($before);
  117. if (preg_match('/[!-]$/i', $before)) {
  118. // Trim trailing negation
  119. $before = substr($before, 0, -1);
  120. // The text prior to the negation is a BooleanSearch
  121. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  122. if (count($searchBefore->searches()) > 0) {
  123. $this->searches[] = $searchBefore;
  124. }
  125. $before = '';
  126. // The next BooleanSearch will have to be combined with AND NOT instead of default AND
  127. $nextOperator = 'AND NOT';
  128. } elseif (preg_match('/\bOR$/i', $before)) {
  129. // Trim trailing OR
  130. $before = substr($before, 0, -2);
  131. // The text prior to the OR is a BooleanSearch
  132. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  133. if (count($searchBefore->searches()) > 0) {
  134. $this->searches[] = $searchBefore;
  135. }
  136. $before = '';
  137. // The next BooleanSearch will have to be combined with OR instead of default AND
  138. $nextOperator = 'OR';
  139. } elseif ($before !== '') {
  140. // The text prior to the opening parenthesis is a BooleanSearch
  141. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  142. if (count($searchBefore->searches()) > 0) {
  143. $this->searches[] = $searchBefore;
  144. }
  145. $before = '';
  146. }
  147. // Search the matching closing parenthesis
  148. $parentheses = 1;
  149. $sub = '';
  150. $i++;
  151. while ($i < $length) {
  152. $c = $input[$i];
  153. $backslashed = $input[$i - 1] === '\\';
  154. if ($c === '(' && !$backslashed) {
  155. // One nested level deeper
  156. $parentheses++;
  157. $sub .= $c;
  158. } elseif ($c === ')' && !$backslashed) {
  159. $parentheses--;
  160. if ($parentheses === 0) {
  161. // Found the matching closing parenthesis
  162. $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
  163. $nextOperator = 'AND';
  164. if (count($searchSub->searches()) > 0) {
  165. $this->searches[] = $searchSub;
  166. }
  167. $sub = '';
  168. break;
  169. } else {
  170. $sub .= $c;
  171. }
  172. } else {
  173. $sub .= $c;
  174. }
  175. $i++;
  176. }
  177. // $sub = trim($sub);
  178. // if ($sub != '') {
  179. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  180. // }
  181. // } elseif ($c === ')') {
  182. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  183. } else {
  184. $before .= $c;
  185. }
  186. $i++;
  187. }
  188. if ($hasParenthesis) {
  189. $before = trim($before);
  190. if (preg_match('/^OR\b/i', $before)) {
  191. // The next BooleanSearch will have to be combined with OR instead of default AND
  192. $nextOperator = 'OR';
  193. // Trim leading OR
  194. $before = substr($before, 2);
  195. }
  196. // The remaining text after the last parenthesis is a BooleanSearch
  197. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  198. $nextOperator = 'AND';
  199. if (count($searchBefore->searches()) > 0) {
  200. $this->searches[] = $searchBefore;
  201. }
  202. return true;
  203. }
  204. // There was no parenthesis logic to apply
  205. return false;
  206. }
  207. private function parseOrSegments(string $input): void {
  208. $input = trim($input);
  209. if ($input === '') {
  210. return;
  211. }
  212. $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
  213. $segment = '';
  214. $ns = count($splits);
  215. for ($i = 0; $i < $ns; $i++) {
  216. $segment = $segment . $splits[$i];
  217. if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
  218. $segment = '';
  219. } else {
  220. $quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
  221. if ($quotes % 2 === 0) {
  222. $segment = trim($segment);
  223. $this->searches[] = new FreshRSS_Search($segment);
  224. $segment = '';
  225. }
  226. }
  227. }
  228. $segment = trim($segment);
  229. if ($segment != '') {
  230. $this->searches[] = new FreshRSS_Search($segment);
  231. }
  232. }
  233. /**
  234. * Either a list of FreshRSS_BooleanSearch combined by implicit AND
  235. * or a series of FreshRSS_Search combined by explicit OR
  236. * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
  237. */
  238. public function searches(): array {
  239. return $this->searches;
  240. }
  241. /** @return 'AND'|'OR'|'AND NOT' depending on how this BooleanSearch should be combined */
  242. public function operator(): string {
  243. return $this->operator;
  244. }
  245. /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
  246. public function add($search): void {
  247. $this->searches[] = $search;
  248. }
  249. public function __toString(): string {
  250. return $this->getRawInput();
  251. }
  252. public function getRawInput(): string {
  253. return $this->raw_input;
  254. }
  255. }