Kaynağa Gözat

Fix parsing of literal "or" in regex (#8338)

fix https://github.com/FreshRSS/FreshRSS/issues/7879
Alexandre Alapetite 3 ay önce
ebeveyn
işleme
6d57a9de47

+ 20 - 8
app/Models/BooleanSearch.php

@@ -29,7 +29,7 @@ class FreshRSS_BooleanSearch implements \Stringable {
 		$this->raw_input = $input;
 
 		if ($level === 0) {
-			$input = self::escapeLiteralParentheses($input);
+			$input = self::escapeLiterals($input);
 			$input = $this->parseUserQueryNames($input, $allowUserQueries);
 			$input = $this->parseUserQueryIds($input, $allowUserQueries);
 			$input = trim($input);
@@ -79,7 +79,7 @@ class FreshRSS_BooleanSearch implements \Stringable {
 					if (!empty($queries[$name])) {
 						$fromS[] = $matches[0][$i];
 						if ($allowUserQueries) {
-							$toS[] = '(' . self::escapeLiteralParentheses($queries[$name]) . ')';
+							$toS[] = '(' . self::escapeLiterals($queries[$name]) . ')';
 						} else {
 							$toS[] = '';
 						}
@@ -130,7 +130,7 @@ class FreshRSS_BooleanSearch implements \Stringable {
 
 					$fromS[] = $matches[0][$i];
 					if ($allowUserQueries) {
-						$escapedQueries = array_map(fn(string $query): string => self::escapeLiteralParentheses($query), $matchedQueries);
+						$escapedQueries = array_map(fn(string $query): string => self::escapeLiterals($query), $matchedQueries);
 						$toS[] = '(' . implode(') OR (', $escapedQueries) . ')';
 					} else {
 						$toS[] = '';
@@ -144,17 +144,29 @@ class FreshRSS_BooleanSearch implements \Stringable {
 	}
 
 	/**
-	 * Temporarily escape parentheses used in regex expressions or inside quoted strings.
+	 * Temporarily escape parentheses and 'OR' used in regex expressions or inside "quoted strings".
 	 */
-	public static function escapeLiteralParentheses(string $input): string {
+	public static function escapeLiterals(string $input): string {
 		return preg_replace_callback('%(?<=[\\s(:#!-]|^)(?<![\\\\])(?P<delim>[\'"/]).+?(?<!\\\\)(?P=delim)[im]*%',
-			fn(array $matches): string => str_replace(['(', ')'], ['\\u0028', '\\u0029'], $matches[0]),
+			function (array $matches): string {
+				$match = $matches[0];
+				$match = str_replace(['(', ')'], ['\\u0028', '\\u0029'], $match);
+				$match = preg_replace_callback('/\bOR\b/i', fn(array $ms): string =>
+					str_replace(['O', 'o', 'R', 'r'], ['\\u004f', '\\u006f', '\\u0052', '\\u0072'], $ms[0]),
+					$match
+				) ?? '';
+				return $match;
+			},
 			$input
 		) ?? '';
 	}
 
-	public static function unescapeLiteralParentheses(string $input): string {
-		return str_replace(['\\u0028', '\\u0029'], ['(', ')'], $input);
+	public static function unescapeLiterals(string $input): string {
+		return str_replace(
+			['\\u0028', '\\u0029', '\\u004f', '\\u006f', '\\u0052', '\\u0072'],
+			['(', ')', 'O', 'o', 'R', 'r'],
+			$input
+		);
 	}
 
 	/**

+ 1 - 1
app/Models/Search.php

@@ -114,7 +114,7 @@ class FreshRSS_Search implements \Stringable {
 	public function __construct(string $input) {
 		$input = self::cleanSearch($input);
 		$input = self::unescape($input);
-		$input = FreshRSS_BooleanSearch::unescapeLiteralParentheses($input);
+		$input = FreshRSS_BooleanSearch::unescapeLiterals($input);
 		$this->raw_input = $input;
 
 		$input = $this->parseNotEntryIds($input);

+ 23 - 0
tests/app/Models/SearchTest.php

@@ -790,6 +790,21 @@ final class SearchTest extends \PHPUnit\Framework\TestCase {
 				'((e.title LIKE ? OR e.content LIKE ?) )',
 				['%https://example.net/test/%', '%https://example.net/test/%']
 			],
+			[	// Regex with literal 'or'
+				'intitle:/^A or B/i',
+				'(e.title ~* ? )',
+				['^A or B']
+			],
+			[	// Regex with literal 'OR'
+				'intitle:/^A B OR C D/i OR intitle:/^A B OR C D/i',
+				'(e.title ~* ? ) OR (e.title ~* ? )',
+				['^A B OR C D', '^A B OR C D']
+			],
+			[	// Quote with literal 'OR'
+				'intitle:"A B OR C D" OR intitle:"E or F"',
+				'(e.title LIKE ? ) OR (e.title LIKE ? )',
+				['%A B OR C D%', '%E or F%']
+			],
 		];
 	}
 
@@ -997,6 +1012,14 @@ final class SearchTest extends \PHPUnit\Framework\TestCase {
 				'-intitle:a -inurl:b',
 				'-intitle:a -inurl:b',
 			],
+			[
+				'intitle:/^A or B/i',
+				'intitle:/^A or B/i',
+			],
+			[
+				'intitle:/^A B OR C D/i',
+				'intitle:/^A B OR C D/i',
+			],
 		];
 	}