Kaynağa Gözat

PostgreSQL: compatibility with PCRE word boundary (#8141)

Allow the use of regex `\b` for word boundary (and `\B` for the opposite) even when using PostgreSQL.
Follow up of:
* https://github.com/FreshRSS/FreshRSS/pull/6706

For instance, `intitle:/\bnew\B/` will find *newest* but not *new* nor *renewal*.

Useful in particular to minimise the differences between PHP and database in:
* https://github.com/FreshRSS/FreshRSS/pull/7959
Alexandre Alapetite 5 ay önce
ebeveyn
işleme
1282d3a270

+ 6 - 0
app/Models/EntryDAOPGSQL.php

@@ -43,6 +43,12 @@ class FreshRSS_EntryDAOPGSQL extends FreshRSS_EntryDAOSQLite {
 	protected static function sqlRegex(string $expression, string $regex, array &$values): string {
 	protected static function sqlRegex(string $expression, string $regex, array &$values): string {
 		$matches = static::regexToSql($regex);
 		$matches = static::regexToSql($regex);
 		if (isset($matches['pattern'])) {
 		if (isset($matches['pattern'])) {
+			$replacements = [	// Convert some of the PCRE regex syntax to PostgreSQL
+				'\\b' => '\\y', // matches only at the beginning or end of a word (was: backspace)
+				'\\B' => '\\Y', // matches only at a point that is not the beginning or end of a word (was: backslash)
+			];
+			$matches['pattern'] = str_replace(array_keys($replacements), array_values($replacements), $matches['pattern']);
+
 			$matchType = $matches['matchType'] ?? '';
 			$matchType = $matches['matchType'] ?? '';
 			if (str_contains($matchType, 'm')) {
 			if (str_contains($matchType, 'm')) {
 				// newline-sensitive matching
 				// newline-sensitive matching

+ 2 - 0
docs/en/users/10_filter.md

@@ -155,6 +155,8 @@ As opposed to normal searches, special XML characters `<&">` are not escaped in
 	* [For MariaDB](https://mariadb.com/kb/en/pcre/);
 	* [For MariaDB](https://mariadb.com/kb/en/pcre/);
 	* [For MySQL](https://dev.mysql.com/doc/refman/9.0/en/regexp.html#function_regexp-like).
 	* [For MySQL](https://dev.mysql.com/doc/refman/9.0/en/regexp.html#function_regexp-like).
 
 
+> ℹ️ Even with PostgreSQL, you are welcome to use `\b` for word boundary (and `\B` for the opposite), as there is an automatic translation to `\y` and `\Y`.
+
 ## By sorting by date
 ## By sorting by date
 
 
 You can change the sort order by clicking the toggle button available in the header.
 You can change the sort order by clicking the toggle button available in the header.

+ 2 - 0
docs/fr/users/03_Main_view.md

@@ -313,3 +313,5 @@ Contrairement aux recherches normales, les caractères spéciaux XML `<&">` ne s
 	* [Pour PostgreSQL](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-POSIX-REGEXP) ;
 	* [Pour PostgreSQL](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-POSIX-REGEXP) ;
 	* [Pour MariaDB](https://mariadb.com/kb/en/pcre/) ;
 	* [Pour MariaDB](https://mariadb.com/kb/en/pcre/) ;
 	* [Pour MySQL](https://dev.mysql.com/doc/refman/9.0/en/regexp.html#function_regexp-like).
 	* [Pour MySQL](https://dev.mysql.com/doc/refman/9.0/en/regexp.html#function_regexp-like).
+
+> ℹ️ Même avec PostgreSQL, vous pouvez utiliser `\b` pour les limites de mots (et `\B` pour l’inverse), car une traduction automatique est effectuée vers `\y` et `\Y`.

+ 10 - 0
tests/app/Models/SearchTest.php

@@ -751,6 +751,11 @@ final class SearchTest extends \PHPUnit\Framework\TestCase {
 				'(e.content ~ ? )',
 				'(e.content ~ ? )',
 				['^ab\\M']
 				['^ab\\M']
 			],
 			],
+			[
+				'intitle:/\\b\\d+/',
+				'(e.title ~ ? )',
+				['\\y\\d+']
+			],
 			[
 			[
 				'author:/^ab$/',
 				'author:/^ab$/',
 				"(REPLACE(e.author, ';', '\n') ~ ? )",
 				"(REPLACE(e.author, ';', '\n') ~ ? )",
@@ -819,6 +824,11 @@ final class SearchTest extends \PHPUnit\Framework\TestCase {
 				"(e.title REGEXP ? )",
 				"(e.title REGEXP ? )",
 				['(?-i)(?m)^ab$']
 				['(?-i)(?m)^ab$']
 			],
 			],
+			[
+				'intitle:/\\b\\d+/',
+				"(e.title REGEXP ? )",
+				['(?-i)\\b\\d+']
+			],
 			[
 			[
 				'intext:/^ab$/m',
 				'intext:/^ab$/m',
 				'(UNCOMPRESS(e.content_bin) REGEXP ?) )',
 				'(UNCOMPRESS(e.content_bin) REGEXP ?) )',