PluralFormsCompiler.php 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Plural form inspired by GNU gettext plural forms, converted into PHP lambdas.
  5. * https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
  6. */
  7. final class PluralFormsCompiler {
  8. private const FORMULA_PATTERN = '/^\s*nplurals\s*=\s*(\d+)\s*;\s*plural\s*=\s*(.+?)\s*;\s*$/';
  9. private const COMMENT_PATTERN = '/^\s*\/\/\s*Plural-Forms:\s*(?P<formula>.+?)\s*$/mi';
  10. private const COMMENT_PREFIX_PATTERN = '/^\s*\/\/\s*/';
  11. private const ALLOWED_EXPRESSION_PATTERN = '/^[0-9n\s!<>=&|?:()%+*\/-]+$/';
  12. /**
  13. * @return array{formula:string,nplurals:int,lambda:string}
  14. */
  15. public function compileFormula(string $pluralForms): array {
  16. ['formula' => $formula, 'nplurals' => $pluralCount, 'expression' => $expression] =
  17. $this->parsePluralHeader($pluralForms);
  18. $this->validatePluralExpression($expression, $formula);
  19. $lambdaExpression = $pluralCount === 2 && !str_contains($expression, '?')
  20. ? '((' . $this->transpileLeafExpression($expression) . ') ? 1 : 0)'
  21. : $this->transpileExpression($expression);
  22. return [
  23. 'formula' => $formula,
  24. 'nplurals' => $pluralCount,
  25. 'lambda' => 'static fn (int $n): int => ' . $lambdaExpression,
  26. ];
  27. }
  28. public function compileFormulaToLambda(string $pluralForms): string {
  29. return $this->compileFormula($pluralForms)['lambda'];
  30. }
  31. public function compileFile(string $filePath, bool $throwOnError = true): bool {
  32. try {
  33. if (!is_file($filePath)) {
  34. throw new InvalidArgumentException('Plural file not found: ' . $filePath);
  35. }
  36. $compiled = $this->compileFormula($this->extractPluralFormsFromFile($filePath));
  37. $newContent = $this->renderCompiledFile($compiled);
  38. $currentContent = file_get_contents($filePath);
  39. if (!is_string($currentContent)) {
  40. throw new RuntimeException('Unable to read plural file: ' . $filePath);
  41. }
  42. if ($currentContent === $newContent) {
  43. return false;
  44. }
  45. if (file_put_contents($filePath, $newContent) === false) {
  46. throw new RuntimeException('Unable to write plural file: ' . $filePath);
  47. }
  48. } catch (Throwable $e) {
  49. if ($throwOnError) {
  50. throw $e;
  51. }
  52. $message = 'Error compiling plural file `' . $filePath . '`: ' . $e->getMessage() . "\n";
  53. if (defined('STDERR')) {
  54. fwrite(STDERR, $message);
  55. } else {
  56. echo $message;
  57. }
  58. return false;
  59. }
  60. return true;
  61. }
  62. public function compileAll(string $globPattern = I18N_PATH . '/*/plurals.php'): int {
  63. $files = glob($globPattern) ?: [];
  64. sort($files, SORT_NATURAL);
  65. $changed = 0;
  66. foreach ($files as $filePath) {
  67. if ($this->compileFile($filePath, throwOnError: false)) {
  68. $changed++;
  69. }
  70. }
  71. return $changed;
  72. }
  73. private function extractPluralFormsFromFile(string $filePath): string {
  74. $fileContent = file_get_contents($filePath);
  75. if (!is_string($fileContent)) {
  76. throw new RuntimeException('Unable to read plural file: ' . $filePath);
  77. }
  78. if (preg_match(self::COMMENT_PATTERN, $fileContent, $matches) === 1) {
  79. return $this->normalisePluralForms($matches['formula']);
  80. }
  81. return $this->extractGetTextPluralFormsFromFile($filePath);
  82. }
  83. /**
  84. * @param array{formula:string,nplurals:int,lambda:string} $compiled
  85. */
  86. private function renderCompiledFile(array $compiled): string {
  87. return <<<PHP
  88. <?php
  89. // Plural-Forms: {$compiled['formula']}
  90. // This file is generated by cli/compile.plurals.php.
  91. // Edit the formula comment and run `make fix-all`.
  92. return array(
  93. 'nplurals' => {$compiled['nplurals']},
  94. 'plural' => {$compiled['lambda']},
  95. );
  96. PHP;
  97. }
  98. /**
  99. * @return array{formula:string,nplurals:int,expression:string}
  100. */
  101. private function parsePluralHeader(string $pluralForms): array {
  102. $formula = $this->normalisePluralForms($pluralForms);
  103. if (!preg_match(self::FORMULA_PATTERN, $formula, $matches)) {
  104. throw new InvalidArgumentException('Invalid plural formula: ' . $formula);
  105. }
  106. return [
  107. 'formula' => $formula,
  108. 'nplurals' => max(1, (int)$matches[1]),
  109. 'expression' => $matches[2],
  110. ];
  111. }
  112. private function normalisePluralForms(string $pluralForms): string {
  113. $pluralForms = trim($pluralForms);
  114. $pluralForms = preg_replace(self::COMMENT_PREFIX_PATTERN, '', $pluralForms) ?? $pluralForms;
  115. if (preg_match('/^\s*Plural-Forms:\s*(?P<formula>.+?)\s*$/i', $pluralForms, $matches) === 1) {
  116. $pluralForms = $matches['formula'];
  117. }
  118. return trim($pluralForms);
  119. }
  120. private function extractGetTextPluralFormsFromFile(string $filePath): string {
  121. $pluralData = include $filePath;
  122. $pluralForms = is_array($pluralData) ? ($pluralData['plural-forms'] ?? null) : null;
  123. if (!is_string($pluralForms) || $pluralForms === '') {
  124. throw new RuntimeException('No plural formula found in `' . $filePath . '`');
  125. }
  126. return $this->normalisePluralForms($pluralForms);
  127. }
  128. /**
  129. * Lightweight validation only. The compiler transpiles real shipped formulas heuristically.
  130. */
  131. private function validatePluralExpression(string $expression, string $pluralForms): void {
  132. if (!preg_match(self::ALLOWED_EXPRESSION_PATTERN, $expression)) {
  133. throw new RuntimeException('Unsupported token in plural expression `' . $pluralForms . '`');
  134. }
  135. $depth = 0;
  136. $length = strlen($expression);
  137. for ($index = 0; $index < $length; $index++) {
  138. $character = $expression[$index];
  139. if ($character === '(') {
  140. $depth++;
  141. } elseif ($character === ')') {
  142. $depth--;
  143. if ($depth < 0) {
  144. throw new RuntimeException('Unbalanced parentheses in plural expression `' . $pluralForms . '`');
  145. }
  146. }
  147. }
  148. if ($depth !== 0) {
  149. throw new RuntimeException('Unbalanced parentheses in plural expression `' . $pluralForms . '`');
  150. }
  151. if (substr_count($expression, '?') !== substr_count($expression, ':')) {
  152. throw new RuntimeException('Unbalanced ternary operators in plural expression `' . $pluralForms . '`');
  153. }
  154. if (str_contains($expression, '/')) {
  155. throw new RuntimeException('Operator `/` is not supported in plural expression `' . $pluralForms . '`');
  156. }
  157. }
  158. private function transpileExpression(string $expression): string {
  159. $expression = $this->stripOuterParentheses(trim($expression));
  160. [$condition, $ifTrue, $ifFalse] = $this->splitTopLevelTernary($expression);
  161. if ($condition === null) {
  162. return $this->transpileLeafExpression($expression);
  163. }
  164. return '(' . $this->transpileLeafExpression($condition) . ' ? ' . $this->transpileExpression($ifTrue) . ' : '
  165. . $this->transpileExpression($ifFalse) . ')';
  166. }
  167. private function transpileLeafExpression(string $expression): string {
  168. $expression = $this->stripOuterParentheses(trim($expression));
  169. // Convert gettext variable name to PHP variable syntax
  170. $expression = preg_replace('/\bn\b/', '$n', $expression) ?? $expression;
  171. // Enforce strict equality
  172. $expression = preg_replace('/(?<![=!<>])==(?!=)/', '===', $expression) ?? $expression;
  173. // Enforce strict inequality
  174. $expression = preg_replace('/!=(?!=)/', '!==', $expression) ?? $expression;
  175. // Normalise operator spacing
  176. $expression = preg_replace('/\s*(===|!==|==|!=|<=|>=|\|\||&&|[%*+\-<>])\s*/', ' $1 ', $expression) ?? $expression;
  177. // Collapse repeated whitespace
  178. $expression = preg_replace('/\s+/', ' ', trim($expression)) ?? trim($expression);
  179. return $expression;
  180. }
  181. /**
  182. * @return array{0:?string,1:string,2:string}
  183. */
  184. private function splitTopLevelTernary(string $expression): array {
  185. $questionPosition = null;
  186. $depth = 0;
  187. $ternaryDepth = 0;
  188. $length = strlen($expression);
  189. for ($index = 0; $index < $length; $index++) {
  190. $character = $expression[$index];
  191. if ($character === '(') {
  192. $depth++;
  193. continue;
  194. }
  195. if ($character === ')') {
  196. $depth--;
  197. continue;
  198. }
  199. if ($depth !== 0) {
  200. continue;
  201. }
  202. if ($character === '?') {
  203. $questionPosition ??= $index;
  204. $ternaryDepth++;
  205. continue;
  206. }
  207. if ($character === ':' && $questionPosition !== null) {
  208. $ternaryDepth--;
  209. if ($ternaryDepth === 0) {
  210. return [
  211. trim(substr($expression, 0, $questionPosition)),
  212. trim(substr($expression, $questionPosition + 1, $index - $questionPosition - 1)),
  213. trim(substr($expression, $index + 1)),
  214. ];
  215. }
  216. }
  217. }
  218. return [null, '', ''];
  219. }
  220. private function stripOuterParentheses(string $expression): string {
  221. $expression = trim($expression);
  222. while (str_starts_with($expression, '(') && str_ends_with($expression, ')')) {
  223. $depth = 0;
  224. $isWrapped = true;
  225. $length = strlen($expression);
  226. for ($index = 0; $index < $length; $index++) {
  227. $character = $expression[$index];
  228. if ($character === '(') {
  229. $depth++;
  230. } elseif ($character === ')') {
  231. $depth--;
  232. }
  233. if ($depth === 0 && $index < $length - 1) {
  234. $isWrapped = false;
  235. break;
  236. }
  237. }
  238. if (!$isWrapped) {
  239. break;
  240. }
  241. $expression = trim(substr($expression, 1, -1));
  242. }
  243. return $expression;
  244. }
  245. }