Kaynağa Gözat

fix(i18n): validate language directory names against gen.lang.* keys (#8767)

Detect when an `app/i18n/<lang>/` directory has no matching `gen.lang.<lang>`
key in the reference language (or vice versa), and refuse to regenerate the
README from that invalid state.

This catches a class of silent corruption where the README translation
table renders literal i18n keys instead of localised language names. The
trigger is most often a case-folded directory on macOS APFS - git tracks
`zh-TW`, the local FS reads back `zh-tw`, the script's `_t('gen.lang.zh-tw')`
lookup misses, and the README ends up with `gen.lang.zh-tw (zh-tw)` instead
of `正體中文 (zh-TW)`. The same check also flags orphan directories (no
display-name key) and orphan keys (no directory).

The new validateLanguageNames() method on I18nData performs a bidirectional
set comparison and returns human-readable issues. cli/check.translation.php
prints them to STDERR and gates --generate-readme on the result, leaving
routine completeness validation behaviour unchanged. Adds four PHPUnit
tests covering: clean state, case mismatch, orphan directory, orphan key.

Co-authored-by: Bjørn A. Andersen <polybjorn@users.noreply.github.com>
polybjorn 2 hafta önce
ebeveyn
işleme
3a7431ce04

+ 13 - 0
cli/check.translation.php

@@ -43,6 +43,12 @@ if (isset($cliOptions->language)) {
 }
 
 $isValidated = true;
+$languageNameIssues = $i18nData->validateLanguageNames();
+foreach ($languageNameIssues as $issue) {
+	fwrite(STDERR, "Error: {$issue}\n");
+	$isValidated = false;
+}
+
 $result = [];
 $report = [];
 $percentage = [];
@@ -116,6 +122,13 @@ function writeToReadme(string $readmePath, string $markdownTable): void {
 }
 
 if ($cliOptions->generateReadme) {
+	if ($languageNameIssues !== []) {
+		// Refuse to regenerate the README when language directory names and
+		// `gen.lang.*` keys disagree, otherwise we would silently produce a
+		// corrupt translation table (e.g. literal `gen.lang.*` keys instead of
+		// localised language names). Routine incomplete translations are fine.
+		exit(1);
+	}
 	$markdownTable = <<<EOF
 	| __language__ | __translated__ | |
 	| - | - | - |

+ 39 - 0
cli/i18n/I18nData.php

@@ -246,6 +246,45 @@ class I18nData {
 		return $languages;
 	}
 
+	/**
+	 * Verify that the set of available language directories matches the set of
+	 * `gen.lang.<code>` keys in the reference language, case-sensitively.
+	 *
+	 * Catches two classes of mismatch:
+	 * - A language directory whose name has no matching `gen.lang.<code>` key
+	 *   (case-folding on case-insensitive filesystems such as macOS APFS, typo,
+	 *   or a new language added without its display name).
+	 * - A `gen.lang.<code>` key with no matching directory (orphan after a
+	 *   language was removed).
+	 *
+	 * @return list<string> Human-readable mismatches; empty when consistent.
+	 */
+	public function validateLanguageNames(): array {
+		$prefix = 'gen.lang.';
+		$declared = [];
+		foreach (array_keys($this->data[static::REFERENCE_LANGUAGE]['gen.php'] ?? []) as $key) {
+			if (str_starts_with((string)$key, $prefix)) {
+				$declared[] = substr((string)$key, strlen($prefix));
+			}
+		}
+		sort($declared);
+
+		$available = $this->getAvailableLanguages();
+		$issues = [];
+		foreach (array_diff($available, $declared) as $orphanDir) {
+			$issues[] = "Language directory `app/i18n/{$orphanDir}/` has no matching "
+				. "`gen.lang.{$orphanDir}` key in the reference language. Possible causes: "
+				. 'case mismatch (e.g. on macOS APFS), typo, or missing display-name key.';
+		}
+		foreach (array_diff($declared, $available) as $orphanKey) {
+			$issues[] = "Reference key `gen.lang.{$orphanKey}` has no matching "
+				. "`app/i18n/{$orphanKey}/` directory. Possible cause: orphan key after "
+				. 'a language was removed.';
+		}
+
+		return $issues;
+	}
+
 	/**
 	 * Return all available languages without the reference language
 	 * @return list<string>

+ 46 - 0
tests/cli/i18n/I18nDataTest.php

@@ -869,4 +869,50 @@ final class I18nDataTest extends \PHPUnit\Framework\TestCase {
 		$data = new I18nData($rawData);
 		self::assertSame($this->referenceData['en'], $data->getReferenceLanguage());
 	}
+
+	/** @return array<string,array<string,array<string,I18nValue>>> */
+	private function dataWithLangKeys(string ...$langCodes): array {
+		$genFile = [];
+		foreach ($langCodes as $code) {
+			$genFile['gen.lang.' . $code] = $this->value;
+		}
+		return [
+			'en' => ['gen.php' => $genFile],
+		];
+	}
+
+	public function testValidateLanguageNamesPassesWhenDirsAndKeysMatch(): void {
+		$rawData = $this->dataWithLangKeys('en', 'fr', 'zh-TW');
+		$rawData['fr'] = [];
+		$rawData['zh-TW'] = [];
+		$data = new I18nData($rawData);
+		self::assertSame([], $data->validateLanguageNames());
+	}
+
+	public function testValidateLanguageNamesFlagsCaseMismatch(): void {
+		$rawData = $this->dataWithLangKeys('en', 'zh-TW');
+		$rawData['zh-tw'] = [];
+		$data = new I18nData($rawData);
+		$issues = $data->validateLanguageNames();
+		self::assertCount(2, $issues);
+		self::assertStringContainsString('app/i18n/zh-tw/', $issues[0]);
+		self::assertStringContainsString('gen.lang.zh-TW', $issues[1]);
+	}
+
+	public function testValidateLanguageNamesFlagsOrphanDirectory(): void {
+		$rawData = $this->dataWithLangKeys('en');
+		$rawData['fr'] = [];
+		$data = new I18nData($rawData);
+		$issues = $data->validateLanguageNames();
+		self::assertCount(1, $issues);
+		self::assertStringContainsString('app/i18n/fr/', $issues[0]);
+	}
+
+	public function testValidateLanguageNamesFlagsOrphanKey(): void {
+		$rawData = $this->dataWithLangKeys('en', 'fr');
+		$data = new I18nData($rawData);
+		$issues = $data->validateLanguageNames();
+		self::assertCount(1, $issues);
+		self::assertStringContainsString('gen.lang.fr', $issues[0]);
+	}
 }