| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532 |
- <?php
- declare(strict_types=1);
- /**
- * Contains Boolean search from the search form.
- */
- class FreshRSS_BooleanSearch implements \Stringable {
- private string $raw_input = '';
- /** @var list<FreshRSS_BooleanSearch|FreshRSS_Search> */
- private array $searches = [];
- /**
- * @param string $input
- * @param int $level
- * @param 'AND'|'OR'|'AND NOT'|'OR NOT' $operator
- * @param bool $allowUserQueries
- */
- public function __construct(
- string $input,
- int $level = 0,
- private readonly string $operator = 'AND',
- bool $allowUserQueries = true
- ) {
- $input = trim($input);
- if ($input === '') {
- return;
- }
- $this->raw_input = $input;
- if ($level === 0) {
- $input = self::escapeLiteralParentheses($input);
- $input = $this->parseUserQueryNames($input, $allowUserQueries);
- $input = $this->parseUserQueryIds($input, $allowUserQueries);
- $input = trim($input);
- }
- $input = self::consistentOrParentheses($input);
- // Either parse everything as a series of BooleanSearch’s combined by implicit AND
- // or parse everything as a series of Search’s combined by explicit OR
- $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
- }
- public function __clone() {
- foreach ($this->searches as $key => $search) {
- $this->searches[$key] = clone $search;
- }
- }
- /**
- * Parse the user queries (saved searches) by name and expand them in the input string.
- */
- private function parseUserQueryNames(string $input, bool $allowUserQueries = true): string {
- $all_matches = [];
- if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matchesFound)) {
- $all_matches[] = $matchesFound;
- }
- if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matchesFound)) {
- $all_matches[] = $matchesFound;
- }
- if (!empty($all_matches)) {
- $queries = [];
- foreach (FreshRSS_Context::userConf()->queries as $raw_query) {
- if (($raw_query['name'] ?? '') !== '' && ($raw_query['search'] ?? '') !== '') {
- $queries[$raw_query['name']] = trim($raw_query['search']);
- }
- }
- $fromS = [];
- $toS = [];
- foreach ($all_matches as $matches) {
- if (empty($matches['search'])) {
- continue;
- }
- for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
- $name = trim($matches['search'][$i]);
- if (!empty($queries[$name])) {
- $fromS[] = $matches[0][$i];
- if ($allowUserQueries) {
- $toS[] = '(' . self::escapeLiteralParentheses($queries[$name]) . ')';
- } else {
- $toS[] = '';
- }
- }
- }
- }
- $input = str_replace($fromS, $toS, $input);
- }
- return $input;
- }
- /**
- * Parse the user queries (saved searches) by ID and expand them in the input string.
- */
- private function parseUserQueryIds(string $input, bool $allowUserQueries = true): string {
- $all_matches = [];
- if (preg_match_all('/\bS:(?P<search>[0-9,]+)/', $input, $matchesFound)) {
- $all_matches[] = $matchesFound;
- }
- if (!empty($all_matches)) {
- $queries = [];
- foreach (FreshRSS_Context::userConf()->queries as $raw_query) {
- $queries[] = trim($raw_query['search'] ?? '');
- }
- $fromS = [];
- $toS = [];
- foreach ($all_matches as $matches) {
- if (empty($matches['search'])) {
- continue;
- }
- for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
- $ids = explode(',', $matches['search'][$i]);
- $ids = array_map('intval', $ids);
- $matchedQueries = [];
- foreach ($ids as $id) {
- if (!empty($queries[$id])) {
- $matchedQueries[] = $queries[$id];
- }
- }
- if (empty($matchedQueries)) {
- continue;
- }
- $fromS[] = $matches[0][$i];
- if ($allowUserQueries) {
- $escapedQueries = array_map(fn(string $query): string => self::escapeLiteralParentheses($query), $matchedQueries);
- $toS[] = '(' . implode(') OR (', $escapedQueries) . ')';
- } else {
- $toS[] = '';
- }
- }
- }
- $input = str_replace($fromS, $toS, $input);
- }
- return $input;
- }
- /**
- * Temporarily escape parentheses used in regex expressions or inside quoted strings.
- */
- public static function escapeLiteralParentheses(string $input): string {
- return preg_replace_callback('%(?<=[\\s(:#!-]|^)(?<![\\\\])(?P<delim>[\'"/]).+?(?<!\\\\)(?P=delim)[im]*%',
- fn(array $matches): string => str_replace(['(', ')'], ['\\u0028', '\\u0029'], $matches[0]),
- $input
- ) ?? '';
- }
- public static function unescapeLiteralParentheses(string $input): string {
- return str_replace(['\\u0028', '\\u0029'], ['(', ')'], $input);
- }
- /**
- * Example: 'ab cd OR ef OR "gh ij"' becomes '(ab cd) OR (ef) OR ("gh ij")'
- */
- public static function addOrParentheses(string $input): string {
- $input = trim($input);
- if ($input === '') {
- return '';
- }
- $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
- $ns = count($splits);
- if ($ns <= 1) {
- return $input;
- }
- $result = '';
- $segment = '';
- for ($i = 0; $i < $ns; $i++) {
- $segment .= $splits[$i];
- if (trim($segment) === '') {
- $segment = '';
- } elseif (strcasecmp($segment, 'OR') === 0) {
- $result .= $segment . ' ';
- $segment = '';
- } else {
- $quotes = substr_count($segment, '"') + substr_count($segment, '"');
- if ($quotes % 2 === 0) {
- $segment = trim($segment);
- if (in_array($segment, ['!', '-'], true)) {
- $result .= $segment;
- } else {
- $result .= '(' . $segment . ') ';
- }
- $segment = '';
- }
- }
- }
- $segment = trim($segment);
- if (in_array($segment, ['!', '-'], true)) {
- $result .= $segment;
- } elseif ($segment !== '') {
- $result .= '(' . $segment . ')';
- }
- return trim($result);
- }
- /**
- * If the query contains a mix of `OR` expressions with and without parentheses,
- * then add parentheses to make the query consistent.
- * Example: '(ab (cd OR ef)) OR gh OR ij OR (kl)' becomes '(ab ((cd) OR (ef))) OR (gh) OR (ij) OR (kl)'
- */
- public static function consistentOrParentheses(string $input): string {
- if (!preg_match('/(?<!\\\\)\\(/', $input)) {
- // No unescaped parentheses in the input
- return trim($input);
- }
- $parenthesesCount = 0;
- $result = '';
- $segment = '';
- $length = strlen($input);
- for ($i = 0; $i < $length; $i++) {
- $c = $input[$i];
- $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
- if (!$backslashed) {
- if ($c === '(') {
- if ($parenthesesCount === 0) {
- if ($segment !== '') {
- $result = rtrim($result) . ' ' . self::addOrParentheses($segment);
- $negation = preg_match('/[!-]$/', $result);
- if (!$negation) {
- $result .= ' ';
- }
- $segment = '';
- }
- $c = '';
- }
- $parenthesesCount++;
- } elseif ($c === ')') {
- $parenthesesCount--;
- if ($parenthesesCount === 0) {
- $segment = self::consistentOrParentheses($segment);
- if ($segment !== '') {
- $result .= '(' . $segment . ')';
- $segment = '';
- }
- $c = '';
- }
- }
- }
- $segment .= $c;
- }
- if (trim($segment) !== '') {
- $result = rtrim($result);
- $negation = preg_match('/[!-]$/', $segment);
- if (!$negation) {
- $result .= ' ';
- }
- $result .= self::addOrParentheses($segment);
- }
- return trim($result);
- }
- /** @return bool True if some parenthesis logic took over, false otherwise */
- private function parseParentheses(string $input, int $level): bool {
- $input = trim($input);
- $length = strlen($input);
- $i = 0;
- $before = '';
- $hasParenthesis = false;
- $nextOperator = 'AND';
- while ($i < $length) {
- $c = $input[$i];
- $backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
- if ($c === '(' && !$backslashed) {
- $hasParenthesis = true;
- $before = trim($before);
- if (preg_match('/[!-]$/', $before)) {
- // Trim trailing negation
- $before = rtrim($before, ' !-');
- $isOr = preg_match('/\bOR$/i', $before);
- if ($isOr) {
- // Trim trailing OR
- $before = substr($before, 0, -2);
- }
- // The text prior to the negation is a BooleanSearch
- $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
- if (count($searchBefore->searches()) > 0) {
- $this->searches[] = $searchBefore;
- }
- $before = '';
- // The next BooleanSearch will have to be combined with AND NOT or OR NOT instead of default AND
- $nextOperator = $isOr ? 'OR NOT' : 'AND NOT';
- } elseif (preg_match('/\bOR$/i', $before)) {
- // Trim trailing OR
- $before = substr($before, 0, -2);
- // The text prior to the OR is a BooleanSearch
- $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
- if (count($searchBefore->searches()) > 0) {
- $this->searches[] = $searchBefore;
- }
- $before = '';
- // The next BooleanSearch will have to be combined with OR instead of default AND
- $nextOperator = 'OR';
- } elseif ($before !== '') {
- // The text prior to the opening parenthesis is a BooleanSearch
- $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
- if (count($searchBefore->searches()) > 0) {
- $this->searches[] = $searchBefore;
- }
- $before = '';
- }
- // Search the matching closing parenthesis
- $parentheses = 1;
- $sub = '';
- $i++;
- while ($i < $length) {
- $c = $input[$i];
- $backslashed = $input[$i - 1] === '\\';
- if ($c === '(' && !$backslashed) {
- // One nested level deeper
- $parentheses++;
- $sub .= $c;
- } elseif ($c === ')' && !$backslashed) {
- $parentheses--;
- if ($parentheses === 0) {
- // Found the matching closing parenthesis
- $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
- $nextOperator = 'AND';
- if (count($searchSub->searches()) > 0) {
- $this->searches[] = $searchSub;
- }
- $sub = '';
- break;
- } else {
- $sub .= $c;
- }
- } else {
- $sub .= $c;
- }
- $i++;
- }
- // $sub = trim($sub);
- // if ($sub !== '') {
- // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
- // }
- // } elseif ($c === ')') {
- // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
- } else {
- $before .= $c;
- }
- $i++;
- }
- if ($hasParenthesis) {
- $before = trim($before);
- if (preg_match('/^OR\b/i', $before)) {
- // The next BooleanSearch will have to be combined with OR instead of default AND
- $nextOperator = 'OR';
- // Trim leading OR
- $before = substr($before, 2);
- }
- // The remaining text after the last parenthesis is a BooleanSearch
- $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
- $nextOperator = 'AND';
- if (count($searchBefore->searches()) > 0) {
- $this->searches[] = $searchBefore;
- }
- return true;
- }
- // There was no parenthesis logic to apply
- return false;
- }
- private function parseOrSegments(string $input): void {
- $input = trim($input);
- if ($input === '') {
- return;
- }
- $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
- $segment = '';
- $ns = count($splits);
- for ($i = 0; $i < $ns; $i++) {
- $segment = $segment . $splits[$i];
- if (trim($segment) === '' || strcasecmp($segment, 'OR') === 0) {
- $segment = '';
- } else {
- $quotes = substr_count($segment, '"') + substr_count($segment, '"');
- if ($quotes % 2 === 0) {
- $segment = trim($segment);
- $this->searches[] = new FreshRSS_Search($segment);
- $segment = '';
- }
- }
- }
- $segment = trim($segment);
- if ($segment !== '') {
- $this->searches[] = new FreshRSS_Search($segment);
- }
- }
- /**
- * Either a list of FreshRSS_BooleanSearch combined by implicit AND
- * or a series of FreshRSS_Search combined by explicit OR
- * @return list<FreshRSS_BooleanSearch|FreshRSS_Search>
- */
- public function searches(): array {
- return $this->searches;
- }
- /** @return 'AND'|'OR'|'AND NOT'|'OR NOT' depending on how this BooleanSearch should be combined */
- public function operator(): string {
- return $this->operator;
- }
- /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
- public function prepend(FreshRSS_BooleanSearch|FreshRSS_Search $search): void {
- array_unshift($this->searches, $search);
- }
- /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
- public function add(FreshRSS_BooleanSearch|FreshRSS_Search $search): void {
- $this->searches[] = $search;
- }
- /**
- * Modify the first compatible search of the Boolean expression, or add it at the beginning.
- * Useful to modify some search parameters.
- * @return FreshRSS_BooleanSearch a new instance, modified.
- */
- public function enforce(FreshRSS_Search $search): self {
- $result = clone $this;
- $result->raw_input = '';
- if (count($result->searches) === 1 && $result->searches[0] instanceof FreshRSS_Search) {
- $result->searches[0] = $result->searches[0]->enforce($search);
- return $result;
- }
- if (count($result->searches) === 2) {
- foreach ($result->searches as $booleanSearch) {
- if (!($booleanSearch instanceof FreshRSS_BooleanSearch)) {
- break;
- }
- if ($booleanSearch->operator() === 'AND') {
- if (count($booleanSearch->searches) === 1 && $booleanSearch->searches[0] instanceof FreshRSS_Search &&
- $booleanSearch->searches[0]->hasSameOperators($search)) {
- $booleanSearch->searches[0] = $search;
- return $result;
- }
- }
- }
- }
- if (count($result->searches) > 1 || (count($result->searches) > 0 && $result->searches[0] instanceof FreshRSS_Search)) {
- // Wrap the existing searches in a new BooleanSearch if needed
- $wrap = new FreshRSS_BooleanSearch('');
- foreach ($result->searches as $existingSearch) {
- $wrap->add($existingSearch);
- }
- if (count($wrap->searches) > 0) {
- $result->searches = [$wrap];
- }
- }
- array_unshift($result->searches, $search);
- return $result;
- }
- /**
- * Remove the first compatible search of the Boolean expression, if any.
- * Useful to modify some search parameters.
- * @return FreshRSS_BooleanSearch a new instance, modified.
- */
- public function remove(FreshRSS_Search $search): self {
- $result = clone $this;
- $result->raw_input = '';
- if (count($result->searches) === 1 && $result->searches[0] instanceof FreshRSS_Search) {
- $result->searches[0] = $result->searches[0]->remove($search);
- return $result;
- }
- if (count($result->searches) === 2) {
- foreach ($result->searches as $booleanSearch) {
- if (!($booleanSearch instanceof FreshRSS_BooleanSearch)) {
- break;
- }
- if ($booleanSearch->operator() === 'AND') {
- if (count($booleanSearch->searches) === 1 && $booleanSearch->searches[0] instanceof FreshRSS_Search &&
- $booleanSearch->searches[0]->hasSameOperators($search)) {
- array_shift($booleanSearch->searches);
- return $result;
- }
- }
- }
- }
- return $result;
- }
- #[\Override]
- public function __toString(): string {
- $result = '';
- foreach ($this->searches as $search) {
- $part = $search->__toString();
- if ($part === '') {
- continue;
- }
- $operator = $search instanceof FreshRSS_BooleanSearch ? $search->operator : 'OR';
- if ((str_contains($part, ' ') || str_starts_with($part, '-')) && (count($this->searches) > 1 || in_array($operator, ['OR NOT', 'AND NOT'], true))) {
- $part = '(' . $part . ')';
- }
- $result .= match ($operator) {
- 'OR' => $result === '' ? '' : ' OR ',
- 'OR NOT' => $result === '' ? '-' : ' OR -',
- 'AND NOT' => $result === '' ? '-' : ' -',
- 'AND' => $result === '' ? '' : ' ',
- default => throw new InvalidArgumentException('Invalid operator: ' . $operator),
- } . $part;
- }
- return trim($result);
- }
- /** @return string Plain text search query. Must be XML-encoded or URL-encoded depending on the situation */
- #[Deprecated('Use __tostring() instead')]
- public function getRawInput(): string {
- return $this->raw_input;
- }
- }
|