Просмотр исходного кода

Fix MySQL transliterator_transliterate fallback (#8427)

The string syntax of `strtr()` cannot handle mutibytes characters, so need to be rewritten using an array map approach.
Extend the fallback replacements to include the Windows/ISO charsets of the latin languages for which we have a translation
Alexandre Alapetite 2 месяцев назад
Родитель
Сommit
7573fee4f0
2 измененных файлов с 154 добавлено и 4 удалено
  1. 38 4
      app/Models/DatabaseDAO.php
  2. 116 0
      tests/app/Models/DatabaseDAOTest.php

+ 38 - 4
app/Models/DatabaseDAO.php

@@ -495,10 +495,44 @@ SQL;
 				return $transliterated;
 			}
 		}
-		return strtolower(strtr($str,
-			'ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ',
-			'AAAAAAaaaaaaOOOOOOooooooEEEEeeeeCcIIIIiiiiUUUUuuuuyNn'
-		));
+		// Fallback covering only Latin: Windows-1252 / ISO-8859-15 / ISO-8859-1, Windows-1250 / ISO-8859-2, Windows-1257 / ISO-8859-13, Windows-1254 / ISO-8859-9
+		// phpcs:disable PSR12.Operators.OperatorSpacing.NoSpaceBefore, PSR12.Operators.OperatorSpacing.NoSpaceAfter, Squiz.WhiteSpace.OperatorSpacing.NoSpaceBefore, Squiz.WhiteSpace.OperatorSpacing.NoSpaceAfter
+		$replacements = [
+			'A' => 'a', 'À'=>'a', 'Á'=>'a', 'Â'=>'a', 'Ä'=>'a', 'Ã'=>'a', 'Å'=>'a', 'Ă'=>'a', 'Ą'=>'a', 'Ā'=>'a',
+			'à'=>'a', 'á'=>'a', 'â'=>'a', 'ä'=>'a', 'ã'=>'a', 'å'=>'a', 'ă'=>'a', 'ą'=>'a', 'ā'=>'a',
+			'B' => 'b',
+			'C' => 'c', 'Ç'=>'c', 'Ć'=>'c', 'Č'=>'c', 'ç'=>'c', 'ć'=>'c', 'č'=>'c',
+			'D' => 'd', 'Ď'=>'d', 'Đ'=>'d', 'ď'=>'d', 'đ'=>'d',
+			'E' => 'e', 'È'=>'e', 'É'=>'e', 'Ê'=>'e', 'Ë'=>'e', 'Ę'=>'e', 'Ě'=>'e', 'Ē'=>'e', 'Ė'=>'e',
+			'è'=>'e', 'é'=>'e', 'ê'=>'e', 'ë'=>'e', 'ę'=>'e', 'ě'=>'e', 'ē'=>'e', 'ė'=>'e',
+			'F' => 'f',
+			'G' => 'g', 'Ğ'=>'g', 'Ģ'=>'g', 'ğ'=>'g', 'ģ'=>'g',
+			'H' => 'h',
+			'I' => 'i', 'Ì'=>'i', 'Í'=>'i', 'Î'=>'i', 'Ï'=>'i', 'İ'=>'i', 'Ī'=>'i', 'Į'=>'i',
+			'ì'=>'i', 'í'=>'i', 'î'=>'i', 'ï'=>'i', 'ı'=>'i', 'ī'=>'i', 'į'=>'i',
+			'J' => 'j',
+			'K' => 'k', 'Ķ'=>'k', 'ķ'=>'k',
+			'L' => 'l', 'Ĺ'=>'l', 'Ľ'=>'l', 'Ł'=>'l', 'Ļ'=>'l', 'ĺ'=>'l', 'ľ'=>'l', 'ł'=>'l', 'ļ'=>'l',
+			'M' => 'm',
+			'N' => 'n', 'Ñ'=>'n', 'Ń'=>'n', 'Ň'=>'n', 'Ņ'=>'n', 'ñ'=>'n', 'ń'=>'n', 'ň'=>'n', 'ņ'=>'n',
+			'O' => 'o', 'Ò'=>'o', 'Ó'=>'o', 'Ô'=>'o', 'Ö'=>'o', 'Õ'=>'o', 'Ø'=>'o', 'Ő'=>'o', 'ò'=>'o', 'ó'=>'o', 'ô'=>'o', 'ö'=>'o', 'õ'=>'o', 'ø'=>'o', 'ő'=>'o',
+			'P' => 'p',
+			'Q' => 'q',
+			'R' => 'r', 'Ŕ'=>'r', 'Ř'=>'r', 'ŕ'=>'r', 'ř'=>'r',
+			'S' => 's', 'Ś'=>'s', 'Š'=>'s', 'Ş'=>'s', 'ß'=>'ss', 'ś'=>'s', 'š'=>'s', 'ş'=>'s',
+			'T' => 't', 'Ť'=>'t', 'Ţ'=>'t', 'ť'=>'t', 'ţ'=>'t',
+			'U' => 'u', 'Ù'=>'u', 'Ú'=>'u', 'Û'=>'u', 'Ü'=>'u', 'Ů'=>'u', 'Ű'=>'u', 'Ū'=>'u', 'Ų'=>'u',
+			'ù'=>'u', 'ú'=>'u', 'û'=>'u', 'ü'=>'u', 'ů'=>'u', 'ű'=>'u', 'ū'=>'u', 'ų'=>'u',
+			'V' => 'v',
+			'W' => 'w',
+			'X' => 'x',
+			'Y' => 'y', 'Ý'=>'y', 'Ÿ'=>'y', 'ý'=>'y', 'ÿ'=>'y',
+			'Z' => 'z', 'Ź'=>'z', 'Ż'=>'z', 'Ž'=>'z', 'ź'=>'z', 'ż'=>'z', 'ž'=>'z',
+			'Æ'=>'ae', 'æ'=>'ae',
+			'Œ'=>'oe', 'œ'=>'oe',
+		];
+		// phpcs:enable PSR12.Operators.OperatorSpacing.NoSpaceBefore, PSR12.Operators.OperatorSpacing.NoSpaceAfter, Squiz.WhiteSpace.OperatorSpacing.NoSpaceBefore, Squiz.WhiteSpace.OperatorSpacing.NoSpaceAfter
+		return strtr($str, $replacements);
 	}
 
 	/**

+ 116 - 0
tests/app/Models/DatabaseDAOTest.php

@@ -0,0 +1,116 @@
+<?php
+declare(strict_types=1);
+
+use PHPUnit\Framework\Attributes\DataProvider;
+
+final class DatabaseDAOTest extends \PHPUnit\Framework\TestCase {
+
+	/** @return list<array{string,string,bool,bool}> */
+	public static function provideStrilikeCommon(): array {
+		return [
+			['abc', 'abc', false, true],
+			['AbC', 'aBc', false, true],
+			['zabc', 'abc', false, false],
+			['abcd', 'abc', false, false],
+			['aéc', 'ac', false, false],
+			['abcd', 'bc', true, true],
+			['abcd', 'BC', true, true],
+			['aßc', 'ß', true, true],
+			['aéc', 'é', true, true],
+			['Été', 'Ét', true, true],
+			['aßc', 'ac', true, false],
+			['ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz', false, true],
+			['abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', true, true],
+		];
+	}
+
+	/** @return list<array{string,string,bool,bool}> */
+	public static function provideStrilikeAccents(): array {
+		return [
+			['café', 'cafè', false, false],
+			['Été', 'Eté', false, false],
+			['Été', 'Et', true, false],
+		];
+	}
+
+	/** @return list<array{string,string,bool,bool}> */
+	public static function provideStrilikeNoAccents(): array {
+		return [
+			['café', 'cafè', false, true],
+			['Été', 'Eté', false, true],
+			['Été', 'Et', true, true],
+		];
+	}
+
+	/** @return list<array{string,string,bool,bool}> */
+	public static function provideStrilikeAccentsCasing(): array {
+		return [
+			['Été', 'été', false, true],
+			['AÎNÉE', 'aîné', true, true],
+			['AÎNÉ', 'aine', false, false],
+			['AÎNÉE', 'aine', true, false],
+		];
+	}
+
+	/** @return list<array{string,string,bool,bool}> */
+	public static function provideStrilikeUnicodeCasing(): array {
+		return [
+			['ČĆĐŠŽ', 'čćđšž', false, true],	// Croatian
+			['ÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ', 'áčďéěíňóřšťúůýž', false, true],	// Czech
+			['ÆØÅ', 'æøå', false, true],	// Danish
+			['ŠŽÕÄÖÜ', 'šžõäöü', false, true],	// Estonian
+			['ÄÖ', 'äö', false, true],	// Finnish
+			['ÀÂÆÇÈÉÊËÎÏÔŒÙÛÜŸ', 'àâæçèéêëîïôœùûüÿ', false, true],	// French
+			['ÄÖÜ', 'äöü', false, true],	// German
+			['ΑΆΒΓΔΕΈΖΗΉΘΙΊΪΚΛΜΝΞΟΌΠΡΣΤΥΎΫΦΧΨΩΏ', 'αάβγδεέζηήθιίϊκλμνξοόπρστυύϋφχψωώ', false, true],	// Greek
+			['ÁÉÍÓÖŐÚÜŰ', 'áéíóöőúüű', false, true],	// Hungarian
+			['ÁÉÍÓÚ', 'áéíóú', false, true],	// Irish
+			['ÀÈÉÌÒÓÙ', 'àèéìòóù', false, true],	// Italian
+			['ĀČĒĢĪĶĻŅŠŪŽ', 'āčēģīķļņšūž', false, true],	// Latvian
+			['ĄČĘĖĮŠŲŪŽ', 'ąčęėįšųūž', false, true],	// Lithuanian
+			['ĊĠĦŻ', 'ċġħż', false, true],	// Maltese
+			['ĄĆĘŁŃÓŚŹŻ', 'ąćęłńóśźż', false, true],	// Polish
+			['ÁÂÃÇÉÍÓÕÚ', 'áâãçéíóõú', false, true],	// Portuguese
+			['ĂÂÎȘȚ', 'ăâîșț', false, true],	// Romanian
+			['ÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ', 'áäčďéíĺľňóôŕšťúýž', false, true],	// Slovak
+			['ČŠŽ', 'čšž', false, true],	// Slovenian
+			['ÁÉÍÑÓÚÜ', 'áéíñóúü', false, true],	// Spanish
+			['ÅÄÖ', 'åäö', false, true],	// Swedish
+		];
+	}
+
+	/** @return list<array{string,string,bool,bool}> */
+	public static function provideStrilikeNoUnicodeCasing(): array {
+		return [
+			['café', 'cafè', false, false],
+			['café', 'Café', true, true],
+			['Été', 'été', true, false],
+		];
+	}
+
+	#[DataProvider('provideStrilikeCommon')]
+	#[DataProvider('provideStrilikeNoAccents')]
+	#[DataProvider('provideStrilikeUnicodeCasing')]
+	public static function test_strilike_MySQL(string $haystack, string $needle, bool $contains, bool $expected): void {
+		if (!function_exists('transliterator_transliterate') && str_contains($haystack, 'α')) {
+			self::markTestSkipped('transliterator_transliterate function not available to handle e.g. Greek.');
+			return;	// @phpstan-ignore deadCode.unreachable
+		}
+		self::assertSame($expected, FreshRSS_DatabaseDAO::strilike($haystack, $needle, $contains));
+	}
+
+	#[DataProvider('provideStrilikeCommon')]
+	#[DataProvider('provideStrilikeAccents')]
+	#[DataProvider('provideStrilikeAccentsCasing')]
+	#[DataProvider('provideStrilikeUnicodeCasing')]
+	public static function test_strilike_PGSQL(string $haystack, string $needle, bool $contains, bool $expected): void {
+		self::assertSame($expected, FreshRSS_DatabaseDAOPGSQL::strilike($haystack, $needle, $contains));
+	}
+
+	#[DataProvider('provideStrilikeCommon')]
+	#[DataProvider('provideStrilikeAccents')]
+	#[DataProvider('provideStrilikeNoUnicodeCasing')]
+	public static function test_strilike_SQLite(string $haystack, string $needle, bool $contains, bool $expected): void {
+		self::assertSame($expected, FreshRSS_DatabaseDAOSQLite::strilike($haystack, $needle, $contains));
+	}
+}