BooleanSearch.php 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. <?php
  2. /**
  3. * Contains Boolean search from the search form.
  4. */
  5. class FreshRSS_BooleanSearch {
  6. /** @var string */
  7. private $raw_input = '';
  8. /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
  9. private $searches = array();
  10. /** @var 'AND'|'OR'|'AND NOT' */
  11. private $operator;
  12. /** @param 'AND'|'OR'|'AND NOT' $operator */
  13. public function __construct(string $input, int $level = 0, string $operator = 'AND') {
  14. $this->operator = $operator;
  15. $input = trim($input);
  16. if ($input == '') {
  17. return;
  18. }
  19. $this->raw_input = $input;
  20. if ($level === 0) {
  21. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  22. $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
  23. $input = $this->parseUserQueryNames($input);
  24. $input = $this->parseUserQueryIds($input);
  25. }
  26. // Either parse everything as a series of BooleanSearch's combined by implicit AND
  27. // or parse everything as a series of Search's combined by explicit OR
  28. $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
  29. }
  30. /**
  31. * Parse the user queries (saved searches) by name and expand them in the input string.
  32. */
  33. private function parseUserQueryNames(string $input): string {
  34. $all_matches = [];
  35. if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  36. $all_matches[] = $matches;
  37. }
  38. if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matches)) {
  39. $all_matches[] = $matches;
  40. }
  41. if (!empty($all_matches)) {
  42. /** @var array<string,FreshRSS_UserQuery> */
  43. $queries = [];
  44. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  45. $query = new FreshRSS_UserQuery($raw_query);
  46. $queries[$query->getName()] = $query;
  47. }
  48. $fromS = [];
  49. $toS = [];
  50. foreach ($all_matches as $matches) {
  51. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  52. $name = trim($matches['search'][$i]);
  53. if (!empty($queries[$name])) {
  54. $fromS[] = $matches[0][$i];
  55. $toS[] = '(' . trim($queries[$name]->getSearch()) . ')';
  56. }
  57. }
  58. }
  59. $input = str_replace($fromS, $toS, $input);
  60. }
  61. return $input;
  62. }
  63. /**
  64. * Parse the user queries (saved searches) by ID and expand them in the input string.
  65. */
  66. private function parseUserQueryIds(string $input): string {
  67. $all_matches = [];
  68. if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matches)) {
  69. $all_matches[] = $matches;
  70. }
  71. if (!empty($all_matches)) {
  72. /** @var array<string,FreshRSS_UserQuery> */
  73. $queries = [];
  74. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  75. $query = new FreshRSS_UserQuery($raw_query);
  76. $queries[] = $query;
  77. }
  78. $fromS = [];
  79. $toS = [];
  80. foreach ($all_matches as $matches) {
  81. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  82. // Index starting from 1
  83. $id = intval(trim($matches['search'][$i])) - 1;
  84. if (!empty($queries[$id])) {
  85. $fromS[] = $matches[0][$i];
  86. $toS[] = '(' . trim($queries[$id]->getSearch()) . ')';
  87. }
  88. }
  89. }
  90. $input = str_replace($fromS, $toS, $input);
  91. }
  92. return $input;
  93. }
  94. /** @return bool True if some parenthesis logic took over, false otherwise */
  95. private function parseParentheses(string $input, int $level): bool {
  96. $input = trim($input);
  97. $length = strlen($input);
  98. $i = 0;
  99. $before = '';
  100. $hasParenthesis = false;
  101. $nextOperator = 'AND';
  102. while ($i < $length) {
  103. $c = $input[$i];
  104. $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
  105. if ($c === '(' && !$backslashed) {
  106. $hasParenthesis = true;
  107. $before = trim($before);
  108. if (preg_match('/[!-]$/i', $before)) {
  109. // Trim trailing negation
  110. $before = substr($before, 0, -1);
  111. // The text prior to the negation is a BooleanSearch
  112. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  113. if (count($searchBefore->searches()) > 0) {
  114. $this->searches[] = $searchBefore;
  115. }
  116. $before = '';
  117. // The next BooleanSearch will have to be combined with AND NOT instead of default AND
  118. $nextOperator = 'AND NOT';
  119. } elseif (preg_match('/\bOR$/i', $before)) {
  120. // Trim trailing OR
  121. $before = substr($before, 0, -2);
  122. // The text prior to the OR is a BooleanSearch
  123. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  124. if (count($searchBefore->searches()) > 0) {
  125. $this->searches[] = $searchBefore;
  126. }
  127. $before = '';
  128. // The next BooleanSearch will have to be combined with OR instead of default AND
  129. $nextOperator = 'OR';
  130. } elseif ($before !== '') {
  131. // The text prior to the opening parenthesis is a BooleanSearch
  132. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  133. if (count($searchBefore->searches()) > 0) {
  134. $this->searches[] = $searchBefore;
  135. }
  136. $before = '';
  137. }
  138. // Search the matching closing parenthesis
  139. $parentheses = 1;
  140. $sub = '';
  141. $i++;
  142. while ($i < $length) {
  143. $c = $input[$i];
  144. $backslashed = $input[$i - 1] === '\\';
  145. if ($c === '(' && !$backslashed) {
  146. // One nested level deeper
  147. $parentheses++;
  148. $sub .= $c;
  149. } elseif ($c === ')' && !$backslashed) {
  150. $parentheses--;
  151. if ($parentheses === 0) {
  152. // Found the matching closing parenthesis
  153. $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
  154. $nextOperator = 'AND';
  155. if (count($searchSub->searches()) > 0) {
  156. $this->searches[] = $searchSub;
  157. }
  158. $sub = '';
  159. break;
  160. } else {
  161. $sub .= $c;
  162. }
  163. } else {
  164. $sub .= $c;
  165. }
  166. $i++;
  167. }
  168. // $sub = trim($sub);
  169. // if ($sub != '') {
  170. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  171. // }
  172. // } elseif ($c === ')') {
  173. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  174. } else {
  175. $before .= $c;
  176. }
  177. $i++;
  178. }
  179. if ($hasParenthesis) {
  180. $before = trim($before);
  181. if (preg_match('/^OR\b/i', $before)) {
  182. // The next BooleanSearch will have to be combined with OR instead of default AND
  183. $nextOperator = 'OR';
  184. // Trim leading OR
  185. $before = substr($before, 2);
  186. }
  187. // The remaining text after the last parenthesis is a BooleanSearch
  188. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  189. $nextOperator = 'AND';
  190. if (count($searchBefore->searches()) > 0) {
  191. $this->searches[] = $searchBefore;
  192. }
  193. return true;
  194. }
  195. // There was no parenthesis logic to apply
  196. return false;
  197. }
  198. private function parseOrSegments(string $input): void {
  199. $input = trim($input);
  200. if ($input == '') {
  201. return;
  202. }
  203. $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE);
  204. $segment = '';
  205. $ns = count($splits);
  206. for ($i = 0; $i < $ns; $i++) {
  207. $segment = $segment . $splits[$i];
  208. if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
  209. $segment = '';
  210. } else {
  211. $quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
  212. if ($quotes % 2 === 0) {
  213. $segment = trim($segment);
  214. $this->searches[] = new FreshRSS_Search($segment);
  215. $segment = '';
  216. }
  217. }
  218. }
  219. $segment = trim($segment);
  220. if ($segment != '') {
  221. $this->searches[] = new FreshRSS_Search($segment);
  222. }
  223. }
  224. /**
  225. * Either a list of FreshRSS_BooleanSearch combined by implicit AND
  226. * or a series of FreshRSS_Search combined by explicit OR
  227. * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
  228. */
  229. public function searches() {
  230. return $this->searches;
  231. }
  232. /** @return 'AND'|'OR'|'AND NOT' depending on how this BooleanSearch should be combined */
  233. public function operator(): string {
  234. return $this->operator;
  235. }
  236. /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
  237. public function add($search): void {
  238. $this->searches[] = $search;
  239. }
  240. public function __toString(): string {
  241. return $this->getRawInput();
  242. }
  243. public function getRawInput(): string {
  244. return $this->raw_input;
  245. }
  246. }