BooleanSearch.php 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. <?php
  2. /**
  3. * Contains Boolean search from the search form.
  4. */
  5. class FreshRSS_BooleanSearch {
  6. /** @var string */
  7. private $raw_input = '';
  8. /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
  9. private $searches = array();
  10. /** @var string 'AND' or 'OR' or 'AND NOT' */
  11. private $operator;
  12. public function __construct(string $input, int $level = 0, $operator = 'AND') {
  13. $this->operator = $operator;
  14. $input = trim($input);
  15. if ($input == '') {
  16. return;
  17. }
  18. $this->raw_input = $input;
  19. if ($level === 0) {
  20. $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
  21. $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
  22. $input = $this->parseUserQueryNames($input);
  23. $input = $this->parseUserQueryIds($input);
  24. }
  25. // Either parse everything as a series of BooleanSearch's combined by implicit AND
  26. // or parse everything as a series of Search's combined by explicit OR
  27. $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
  28. }
  29. /**
  30. * Parse the user queries (saved searches) by name and expand them in the input string.
  31. */
  32. private function parseUserQueryNames(string $input): string {
  33. $all_matches = [];
  34. if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
  35. $all_matches[] = $matches;
  36. }
  37. if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matches)) {
  38. $all_matches[] = $matches;
  39. }
  40. if (!empty($all_matches)) {
  41. /** @var array<string,FreshRSS_UserQuery> */
  42. $queries = [];
  43. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  44. $query = new FreshRSS_UserQuery($raw_query);
  45. $queries[$query->getName()] = $query;
  46. }
  47. $fromS = [];
  48. $toS = [];
  49. foreach ($all_matches as $matches) {
  50. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  51. $name = trim($matches['search'][$i]);
  52. if (!empty($queries[$name])) {
  53. $fromS[] = $matches[0][$i];
  54. $toS[] = '(' . trim($queries[$name]->getSearch()) . ')';
  55. }
  56. }
  57. }
  58. $input = str_replace($fromS, $toS, $input);
  59. }
  60. return $input;
  61. }
  62. /**
  63. * Parse the user queries (saved searches) by ID and expand them in the input string.
  64. */
  65. private function parseUserQueryIds(string $input): string {
  66. $all_matches = [];
  67. if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matches)) {
  68. $all_matches[] = $matches;
  69. }
  70. if (!empty($all_matches)) {
  71. /** @var array<string,FreshRSS_UserQuery> */
  72. $queries = [];
  73. foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
  74. $query = new FreshRSS_UserQuery($raw_query);
  75. $queries[] = $query;
  76. }
  77. $fromS = [];
  78. $toS = [];
  79. foreach ($all_matches as $matches) {
  80. for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
  81. // Index starting from 1
  82. $id = intval(trim($matches['search'][$i])) - 1;
  83. if (!empty($queries[$id])) {
  84. $fromS[] = $matches[0][$i];
  85. $toS[] = '(' . trim($queries[$id]->getSearch()) . ')';
  86. }
  87. }
  88. }
  89. $input = str_replace($fromS, $toS, $input);
  90. }
  91. return $input;
  92. }
  93. /** @return bool True if some parenthesis logic took over, false otherwise */
  94. private function parseParentheses(string $input, int $level): bool {
  95. $input = trim($input);
  96. $length = strlen($input);
  97. $i = 0;
  98. $before = '';
  99. $hasParenthesis = false;
  100. $nextOperator = 'AND';
  101. while ($i < $length) {
  102. $c = $input[$i];
  103. $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
  104. if ($c === '(' && !$backslashed) {
  105. $hasParenthesis = true;
  106. $before = trim($before);
  107. if (preg_match('/[!-]$/i', $before)) {
  108. // Trim trailing negation
  109. $before = substr($before, 0, -1);
  110. // The text prior to the negation is a BooleanSearch
  111. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  112. if (count($searchBefore->searches()) > 0) {
  113. $this->searches[] = $searchBefore;
  114. }
  115. $before = '';
  116. // The next BooleanSearch will have to be combined with AND NOT instead of default AND
  117. $nextOperator = 'AND NOT';
  118. } elseif (preg_match('/\bOR$/i', $before)) {
  119. // Trim trailing OR
  120. $before = substr($before, 0, -2);
  121. // The text prior to the OR is a BooleanSearch
  122. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  123. if (count($searchBefore->searches()) > 0) {
  124. $this->searches[] = $searchBefore;
  125. }
  126. $before = '';
  127. // The next BooleanSearch will have to be combined with OR instead of default AND
  128. $nextOperator = 'OR';
  129. } elseif ($before !== '') {
  130. // The text prior to the opening parenthesis is a BooleanSearch
  131. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  132. if (count($searchBefore->searches()) > 0) {
  133. $this->searches[] = $searchBefore;
  134. }
  135. $before = '';
  136. }
  137. // Search the matching closing parenthesis
  138. $parentheses = 1;
  139. $sub = '';
  140. $i++;
  141. while ($i < $length) {
  142. $c = $input[$i];
  143. $backslashed = $input[$i - 1] === '\\';
  144. if ($c === '(' && !$backslashed) {
  145. // One nested level deeper
  146. $parentheses++;
  147. $sub .= $c;
  148. } elseif ($c === ')' && !$backslashed) {
  149. $parentheses--;
  150. if ($parentheses === 0) {
  151. // Found the matching closing parenthesis
  152. $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
  153. $nextOperator = 'AND';
  154. if (count($searchSub->searches()) > 0) {
  155. $this->searches[] = $searchSub;
  156. }
  157. $sub = '';
  158. break;
  159. } else {
  160. $sub .= $c;
  161. }
  162. } else {
  163. $sub .= $c;
  164. }
  165. $i++;
  166. }
  167. // $sub = trim($sub);
  168. // if ($sub != '') {
  169. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  170. // }
  171. // } elseif ($c === ')') {
  172. // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
  173. } else {
  174. $before .= $c;
  175. }
  176. $i++;
  177. }
  178. if ($hasParenthesis) {
  179. $before = trim($before);
  180. if (preg_match('/^OR\b/i', $before)) {
  181. // The next BooleanSearch will have to be combined with OR instead of default AND
  182. $nextOperator = 'OR';
  183. // Trim leading OR
  184. $before = substr($before, 2);
  185. }
  186. // The remaining text after the last parenthesis is a BooleanSearch
  187. $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
  188. $nextOperator = 'AND';
  189. if (count($searchBefore->searches()) > 0) {
  190. $this->searches[] = $searchBefore;
  191. }
  192. return true;
  193. }
  194. // There was no parenthesis logic to apply
  195. return false;
  196. }
  197. private function parseOrSegments(string $input) {
  198. $input = trim($input);
  199. if ($input == '') {
  200. return;
  201. }
  202. $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE);
  203. $segment = '';
  204. $ns = count($splits);
  205. for ($i = 0; $i < $ns; $i++) {
  206. $segment = $segment . $splits[$i];
  207. if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
  208. $segment = '';
  209. } else {
  210. $quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
  211. if ($quotes % 2 === 0) {
  212. $segment = trim($segment);
  213. $this->searches[] = new FreshRSS_Search($segment);
  214. $segment = '';
  215. }
  216. }
  217. }
  218. $segment = trim($segment);
  219. if ($segment != '') {
  220. $this->searches[] = new FreshRSS_Search($segment);
  221. }
  222. }
  223. /**
  224. * Either a list of FreshRSS_BooleanSearch combined by implicit AND
  225. * or a series of FreshRSS_Search combined by explicit OR
  226. * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
  227. */
  228. public function searches() {
  229. return $this->searches;
  230. }
  231. /** @return string 'AND' or 'OR' depending on how this BooleanSearch should be combined */
  232. public function operator(): string {
  233. return $this->operator;
  234. }
  235. /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
  236. public function add($search) {
  237. $this->searches[] = $search;
  238. }
  239. public function __toString(): string {
  240. return $this->getRawInput();
  241. }
  242. public function getRawInput(): string {
  243. return $this->raw_input;
  244. }
  245. }