| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- <?php
- /**
- * Contains Boolean search from the search form.
- */
- class FreshRSS_BooleanSearch {
- /** @var string */
- private $raw_input = '';
- /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
- private $searches = array();
- /** @var string 'AND' or 'OR' */
- private $operator;
- public function __construct(string $input, int $level = 0, $operator = 'AND') {
- $this->operator = $operator;
- $input = trim($input);
- if ($input == '') {
- return;
- }
- $this->raw_input = $input;
- if ($level === 0) {
- $input = preg_replace('/:"(.*?)"/', ':"\1"', $input);
- $input = preg_replace('/(?<=[\s!-]|^)"(.*?)"/', '"\1"', $input);
- $input = $this->parseUserQueryNames($input);
- $input = $this->parseUserQueryIds($input);
- }
- // Either parse everything as a series of BooleanSearch's combined by implicit AND
- // or parse everything as a series of Search's combined by explicit OR
- $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
- }
- /**
- * Parse the user queries (saved searches) by name and expand them in the input string.
- */
- private function parseUserQueryNames(string $input): string {
- $all_matches = [];
- if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
- $all_matches[] = $matches;
- }
- if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matches)) {
- $all_matches[] = $matches;
- }
- if (!empty($all_matches)) {
- /** @var array<string,FreshRSS_UserQuery> */
- $queries = [];
- foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
- $query = new FreshRSS_UserQuery($raw_query);
- $queries[$query->getName()] = $query;
- }
- $fromS = [];
- $toS = [];
- foreach ($all_matches as $matches) {
- for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
- $name = trim($matches['search'][$i]);
- if (!empty($queries[$name])) {
- $fromS[] = $matches[0][$i];
- $toS[] = '(' . trim($queries[$name]->getSearch()) . ')';
- }
- }
- }
- $input = str_replace($fromS, $toS, $input);
- }
- return $input;
- }
- /**
- * Parse the user queries (saved searches) by ID and expand them in the input string.
- */
- private function parseUserQueryIds(string $input): string {
- $all_matches = [];
- if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matches)) {
- $all_matches[] = $matches;
- }
- if (!empty($all_matches)) {
- /** @var array<string,FreshRSS_UserQuery> */
- $queries = [];
- foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
- $query = new FreshRSS_UserQuery($raw_query);
- $queries[] = $query;
- }
- $fromS = [];
- $toS = [];
- foreach ($all_matches as $matches) {
- for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
- // Index starting from 1
- $id = intval(trim($matches['search'][$i])) - 1;
- if (!empty($queries[$id])) {
- $fromS[] = $matches[0][$i];
- $toS[] = '(' . trim($queries[$id]->getSearch()) . ')';
- }
- }
- }
- $input = str_replace($fromS, $toS, $input);
- }
- return $input;
- }
- /** @return bool True if some parenthesis logic took over, false otherwise */
- private function parseParentheses(string $input, int $level): bool {
- $input = trim($input);
- $length = strlen($input);
- $i = 0;
- $before = '';
- $hasParenthesis = false;
- $nextOperator = 'AND';
- while ($i < $length) {
- $c = $input[$i];
- if ($c === '(') {
- $hasParenthesis = true;
- $before = trim($before);
- if (preg_match('/\bOR$/i', $before)) {
- // Trim trailing OR
- $before = substr($before, 0, -2);
- // The text prior to the OR is a BooleanSearch
- $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
- if (count($searchBefore->searches()) > 0) {
- $this->searches[] = $searchBefore;
- }
- $before = '';
- // The next BooleanSearch will have to be combined with OR instead of default AND
- $nextOperator = 'OR';
- } elseif ($before !== '') {
- // The text prior to the opening parenthesis is a BooleanSearch
- $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
- if (count($searchBefore->searches()) > 0) {
- $this->searches[] = $searchBefore;
- }
- $before = '';
- }
- // Search the matching closing parenthesis
- $parentheses = 1;
- $sub = '';
- $i++;
- while ($i < $length) {
- $c = $input[$i];
- if ($c === '(') {
- // One nested level deeper
- $parentheses++;
- $sub .= $c;
- } elseif ($c === ')') {
- $parentheses--;
- if ($parentheses === 0) {
- // Found the matching closing parenthesis
- $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
- $nextOperator = 'AND';
- if (count($searchSub->searches()) > 0) {
- $this->searches[] = $searchSub;
- }
- $sub = '';
- break;
- } else {
- $sub .= $c;
- }
- } else {
- $sub .= $c;
- }
- $i++;
- }
- // $sub = trim($sub);
- // if ($sub != '') {
- // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
- // }
- // } elseif ($c === ')') {
- // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
- } else {
- $before .= $c;
- }
- $i++;
- }
- if ($hasParenthesis) {
- $before = trim($before);
- if (preg_match('/^OR\b/i', $before)) {
- // The next BooleanSearch will have to be combined with OR instead of default AND
- $nextOperator = 'OR';
- // Trim leading OR
- $before = substr($before, 2);
- }
- // The remaining text after the last parenthesis is a BooleanSearch
- $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
- $nextOperator = 'AND';
- if (count($searchBefore->searches()) > 0) {
- $this->searches[] = $searchBefore;
- }
- return true;
- }
- // There was no parenthesis logic to apply
- return false;
- }
- private function parseOrSegments(string $input) {
- $input = trim($input);
- if ($input == '') {
- return;
- }
- $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE);
- $segment = '';
- $ns = count($splits);
- for ($i = 0; $i < $ns; $i++) {
- $segment = $segment . $splits[$i];
- if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
- $segment = '';
- } else {
- $quotes = substr_count($segment, '"') + substr_count($segment, '"');
- if ($quotes % 2 === 0) {
- $segment = trim($segment);
- $this->searches[] = new FreshRSS_Search($segment);
- $segment = '';
- }
- }
- }
- $segment = trim($segment);
- if ($segment != '') {
- $this->searches[] = new FreshRSS_Search($segment);
- }
- }
- /**
- * Either a list of FreshRSS_BooleanSearch combined by implicit AND
- * or a series of FreshRSS_Search combined by explicit OR
- * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
- */
- public function searches() {
- return $this->searches;
- }
- /** @return string 'AND' or 'OR' depending on how this BooleanSearch should be combined */
- public function operator(): string {
- return $this->operator;
- }
- /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
- public function add($search) {
- $this->searches[] = $search;
- }
- public function __toString(): string {
- return $this->getRawInput();
- }
- public function getRawInput(): string {
- return $this->raw_input;
- }
- }
|