| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294 |
- <?php
- declare(strict_types=1);
- /**
- * Plural form inspired by GNU gettext plural forms, converted into PHP lambdas.
- * https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
- */
- final class PluralFormsCompiler {
- private const FORMULA_PATTERN = '/^\s*nplurals\s*=\s*(\d+)\s*;\s*plural\s*=\s*(.+?)\s*;\s*$/';
- private const COMMENT_PATTERN = '/^\s*\/\/\s*Plural-Forms:\s*(?P<formula>.+?)\s*$/mi';
- private const COMMENT_PREFIX_PATTERN = '/^\s*\/\/\s*/';
- private const ALLOWED_EXPRESSION_PATTERN = '/^[0-9n\s!<>=&|?:()%+*\/-]+$/';
- /**
- * @return array{formula:string,nplurals:int,lambda:string}
- */
- public function compileFormula(string $pluralForms): array {
- ['formula' => $formula, 'nplurals' => $pluralCount, 'expression' => $expression] =
- $this->parsePluralHeader($pluralForms);
- $this->validatePluralExpression($expression, $formula);
- $lambdaExpression = $pluralCount === 2 && !str_contains($expression, '?')
- ? '((' . $this->transpileLeafExpression($expression) . ') ? 1 : 0)'
- : $this->transpileExpression($expression);
- return [
- 'formula' => $formula,
- 'nplurals' => $pluralCount,
- 'lambda' => 'static fn (int $n): int => ' . $lambdaExpression,
- ];
- }
- public function compileFormulaToLambda(string $pluralForms): string {
- return $this->compileFormula($pluralForms)['lambda'];
- }
- public function compileFile(string $filePath, bool $throwOnError = true): bool {
- try {
- if (!is_file($filePath)) {
- throw new InvalidArgumentException('Plural file not found: ' . $filePath);
- }
- $compiled = $this->compileFormula($this->extractPluralFormsFromFile($filePath));
- $newContent = $this->renderCompiledFile($compiled);
- $currentContent = file_get_contents($filePath);
- if (!is_string($currentContent)) {
- throw new RuntimeException('Unable to read plural file: ' . $filePath);
- }
- if ($currentContent === $newContent) {
- return false;
- }
- if (file_put_contents($filePath, $newContent) === false) {
- throw new RuntimeException('Unable to write plural file: ' . $filePath);
- }
- } catch (Throwable $e) {
- if ($throwOnError) {
- throw $e;
- }
- $message = 'Error compiling plural file `' . $filePath . '`: ' . $e->getMessage() . "\n";
- if (defined('STDERR')) {
- fwrite(STDERR, $message);
- } else {
- echo $message;
- }
- return false;
- }
- return true;
- }
- public function compileAll(string $globPattern = I18N_PATH . '/*/plurals.php'): int {
- $files = glob($globPattern) ?: [];
- sort($files, SORT_NATURAL);
- $changed = 0;
- foreach ($files as $filePath) {
- if ($this->compileFile($filePath, throwOnError: false)) {
- $changed++;
- }
- }
- return $changed;
- }
- private function extractPluralFormsFromFile(string $filePath): string {
- $fileContent = file_get_contents($filePath);
- if (!is_string($fileContent)) {
- throw new RuntimeException('Unable to read plural file: ' . $filePath);
- }
- if (preg_match(self::COMMENT_PATTERN, $fileContent, $matches) === 1) {
- return $this->normalisePluralForms($matches['formula']);
- }
- return $this->extractGetTextPluralFormsFromFile($filePath);
- }
- /**
- * @param array{formula:string,nplurals:int,lambda:string} $compiled
- */
- private function renderCompiledFile(array $compiled): string {
- return <<<PHP
- <?php
- // Plural-Forms: {$compiled['formula']}
- // This file is generated by cli/compile.plurals.php.
- // Edit the formula comment and run `make fix-all`.
- return array(
- 'nplurals' => {$compiled['nplurals']},
- 'plural' => {$compiled['lambda']},
- );
- PHP;
- }
- /**
- * @return array{formula:string,nplurals:int,expression:string}
- */
- private function parsePluralHeader(string $pluralForms): array {
- $formula = $this->normalisePluralForms($pluralForms);
- if (!preg_match(self::FORMULA_PATTERN, $formula, $matches)) {
- throw new InvalidArgumentException('Invalid plural formula: ' . $formula);
- }
- return [
- 'formula' => $formula,
- 'nplurals' => max(1, (int)$matches[1]),
- 'expression' => $matches[2],
- ];
- }
- private function normalisePluralForms(string $pluralForms): string {
- $pluralForms = trim($pluralForms);
- $pluralForms = preg_replace(self::COMMENT_PREFIX_PATTERN, '', $pluralForms) ?? $pluralForms;
- if (preg_match('/^\s*Plural-Forms:\s*(?P<formula>.+?)\s*$/i', $pluralForms, $matches) === 1) {
- $pluralForms = $matches['formula'];
- }
- return trim($pluralForms);
- }
- private function extractGetTextPluralFormsFromFile(string $filePath): string {
- $pluralData = include $filePath;
- $pluralForms = is_array($pluralData) ? ($pluralData['plural-forms'] ?? null) : null;
- if (!is_string($pluralForms) || $pluralForms === '') {
- throw new RuntimeException('No plural formula found in `' . $filePath . '`');
- }
- return $this->normalisePluralForms($pluralForms);
- }
- /**
- * Lightweight validation only. The compiler transpiles real shipped formulas heuristically.
- */
- private function validatePluralExpression(string $expression, string $pluralForms): void {
- if (!preg_match(self::ALLOWED_EXPRESSION_PATTERN, $expression)) {
- throw new RuntimeException('Unsupported token in plural expression `' . $pluralForms . '`');
- }
- $depth = 0;
- $length = strlen($expression);
- for ($index = 0; $index < $length; $index++) {
- $character = $expression[$index];
- if ($character === '(') {
- $depth++;
- } elseif ($character === ')') {
- $depth--;
- if ($depth < 0) {
- throw new RuntimeException('Unbalanced parentheses in plural expression `' . $pluralForms . '`');
- }
- }
- }
- if ($depth !== 0) {
- throw new RuntimeException('Unbalanced parentheses in plural expression `' . $pluralForms . '`');
- }
- if (substr_count($expression, '?') !== substr_count($expression, ':')) {
- throw new RuntimeException('Unbalanced ternary operators in plural expression `' . $pluralForms . '`');
- }
- if (str_contains($expression, '/')) {
- throw new RuntimeException('Operator `/` is not supported in plural expression `' . $pluralForms . '`');
- }
- }
- private function transpileExpression(string $expression): string {
- $expression = $this->stripOuterParentheses(trim($expression));
- [$condition, $ifTrue, $ifFalse] = $this->splitTopLevelTernary($expression);
- if ($condition === null) {
- return $this->transpileLeafExpression($expression);
- }
- return '(' . $this->transpileLeafExpression($condition) . ' ? ' . $this->transpileExpression($ifTrue) . ' : '
- . $this->transpileExpression($ifFalse) . ')';
- }
- private function transpileLeafExpression(string $expression): string {
- $expression = $this->stripOuterParentheses(trim($expression));
- // Convert gettext variable name to PHP variable syntax
- $expression = preg_replace('/\bn\b/', '$n', $expression) ?? $expression;
- // Enforce strict equality
- $expression = preg_replace('/(?<![=!<>])==(?!=)/', '===', $expression) ?? $expression;
- // Enforce strict inequality
- $expression = preg_replace('/!=(?!=)/', '!==', $expression) ?? $expression;
- // Normalise operator spacing
- $expression = preg_replace('/\s*(===|!==|==|!=|<=|>=|\|\||&&|[%*+\-<>])\s*/', ' $1 ', $expression) ?? $expression;
- // Collapse repeated whitespace
- $expression = preg_replace('/\s+/', ' ', trim($expression)) ?? trim($expression);
- return $expression;
- }
- /**
- * @return array{0:?string,1:string,2:string}
- */
- private function splitTopLevelTernary(string $expression): array {
- $questionPosition = null;
- $depth = 0;
- $ternaryDepth = 0;
- $length = strlen($expression);
- for ($index = 0; $index < $length; $index++) {
- $character = $expression[$index];
- if ($character === '(') {
- $depth++;
- continue;
- }
- if ($character === ')') {
- $depth--;
- continue;
- }
- if ($depth !== 0) {
- continue;
- }
- if ($character === '?') {
- $questionPosition ??= $index;
- $ternaryDepth++;
- continue;
- }
- if ($character === ':' && $questionPosition !== null) {
- $ternaryDepth--;
- if ($ternaryDepth === 0) {
- return [
- trim(substr($expression, 0, $questionPosition)),
- trim(substr($expression, $questionPosition + 1, $index - $questionPosition - 1)),
- trim(substr($expression, $index + 1)),
- ];
- }
- }
- }
- return [null, '', ''];
- }
- private function stripOuterParentheses(string $expression): string {
- $expression = trim($expression);
- while (str_starts_with($expression, '(') && str_ends_with($expression, ')')) {
- $depth = 0;
- $isWrapped = true;
- $length = strlen($expression);
- for ($index = 0; $index < $length; $index++) {
- $character = $expression[$index];
- if ($character === '(') {
- $depth++;
- } elseif ($character === ')') {
- $depth--;
- }
- if ($depth === 0 && $index < $length - 1) {
- $isWrapped = false;
- break;
- }
- }
- if (!$isWrapped) {
- break;
- }
- $expression = trim(substr($expression, 1, -1));
- }
- return $expression;
- }
- }
|