Translator.php 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. <?php /** @noinspection HtmlDeprecatedTag */
  2. namespace Gt\CssXPath;
  3. class Translator {
  4. const cssRegex =
  5. '/'
  6. . '(?P<star>\*)'
  7. . '|(:(?P<pseudo>[\w-]*))'
  8. . '|\(*(?P<pseudospecifier>["\']*[\w\h-]*["\']*)\)'
  9. . '|(?P<element>[\w-]*)'
  10. . '|(?P<child>\s*>\s*)'
  11. . '|(#(?P<id>[\w-]*))'
  12. . '|(\.(?P<class>[\w-]*))'
  13. . '|(?P<sibling>\s*\+\s*)'
  14. . "|(\[(?P<attribute>[\w-]*)((?P<attribute_equals>[=~$*]+)(?P<attribute_value>(.+\[\]'?)|[^\]]+))*\])+"
  15. . '|(?P<descendant>\s+)'
  16. . '/';
  17. const EQUALS_EXACT = "=";
  18. const EQUALS_CONTAINS_WORD = "~=";
  19. const EQUALS_ENDS_WITH = "$=";
  20. const EQUALS_CONTAINS = "*=";
  21. const EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED = "|=";
  22. const EQUALS_STARTS_WITH = "^=";
  23. /** @var string */
  24. protected $cssSelector;
  25. /** @var string */
  26. protected $prefix;
  27. public function __construct(string $cssSelector, string $prefix = ".//") {
  28. $this->cssSelector = $cssSelector;
  29. $this->prefix = $prefix;
  30. }
  31. public function __toString():string {
  32. return $this->asXPath();
  33. }
  34. public function asXPath():string {
  35. return $this->convert($this->cssSelector);
  36. }
  37. protected function convert(string $css):string {
  38. $cssArray = preg_split(
  39. '/(["\']).*?\1(*SKIP)(*F)|,/',
  40. $css
  41. );
  42. $xPathArray = [];
  43. foreach($cssArray as $input) {
  44. $output = $this->convertSingleSelector(trim($input));
  45. $xPathArray []= $output;
  46. }
  47. return implode(" | ", $xPathArray);
  48. }
  49. protected function convertSingleSelector(string $css):string {
  50. $thread = $this->preg_match_collated(self::cssRegex, $css);
  51. $thread = array_values($thread);
  52. $xpath = [$this->prefix];
  53. $hasElement = false;
  54. foreach($thread as $threadKey => $currentThreadItem) {
  55. $next = isset($thread[$threadKey + 1])
  56. ? $thread[$threadKey + 1]
  57. : false;
  58. switch ($currentThreadItem["type"]) {
  59. case "star":
  60. case "element":
  61. $xpath []= $currentThreadItem['content'];
  62. $hasElement = true;
  63. break;
  64. case "pseudo":
  65. $specifier = "";
  66. if ($next && $next["type"] == "pseudospecifier") {
  67. $specifier = "{$next['content']}";
  68. }
  69. switch ($currentThreadItem["content"]) {
  70. case "disabled":
  71. case "checked":
  72. case "selected":
  73. array_push(
  74. $xpath,
  75. "[@{$currentThreadItem['content']}]"
  76. );
  77. break;
  78. case "text":
  79. array_push(
  80. $xpath,
  81. '[@type="text"]'
  82. );
  83. break;
  84. case "contains":
  85. if(empty($specifier)) {
  86. continue 3;
  87. }
  88. array_push(
  89. $xpath,
  90. "[contains(text(),$specifier)]"
  91. );
  92. break;
  93. case "first-child":
  94. $prev = count($xpath) - 1;
  95. $xpath[$prev] = '*[1]/self::' . $xpath[$prev];
  96. break;
  97. case "nth-child":
  98. if (empty($specifier)) {
  99. continue 3;
  100. }
  101. $prev = count($xpath) - 1;
  102. $previous = $xpath[$prev];
  103. if (substr($previous, -1, 1) === "]") {
  104. $xpath[$prev] = str_replace(
  105. "]",
  106. " and position() = $specifier]",
  107. $xpath[$prev]
  108. );
  109. }
  110. else {
  111. array_push(
  112. $xpath,
  113. "[$specifier]"
  114. );
  115. }
  116. break;
  117. case "nth-of-type":
  118. if (empty($specifier)) {
  119. continue 3;
  120. }
  121. $prev = count($xpath) - 1;
  122. $previous = $xpath[$prev];
  123. if(substr($previous, -1, 1) === "]") {
  124. array_push(
  125. $xpath,
  126. "[$specifier]"
  127. );
  128. }
  129. else {
  130. array_push(
  131. $xpath,
  132. "[$specifier]"
  133. );
  134. }
  135. break;
  136. }
  137. break;
  138. case "child":
  139. array_push($xpath, "/");
  140. $hasElement = false;
  141. break;
  142. case "id":
  143. array_push(
  144. $xpath,
  145. ($hasElement ? '' : '*')
  146. . "[@id='{$currentThreadItem['content']}']"
  147. );
  148. $hasElement = true;
  149. break;
  150. case "class":
  151. // https://devhints.io/xpath#class-check
  152. array_push(
  153. $xpath,
  154. ($hasElement ? '' : '*')
  155. . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]"
  156. );
  157. $hasElement = true;
  158. break;
  159. case "sibling":
  160. array_push(
  161. $xpath,
  162. "/following-sibling::*[1]/self::"
  163. );
  164. $hasElement = false;
  165. break;
  166. case "attribute":
  167. if(!$hasElement) {
  168. array_push($xpath, "*");
  169. $hasElement = true;
  170. }
  171. /** @var null|array<int, array<string, string>> $detail */
  172. $detail = $currentThreadItem["detail"] ?? null;
  173. $detailType = $detail[0] ?? null;
  174. $detailValue = $detail[1] ?? null;
  175. if(!$detailType
  176. || $detailType["type"] !== "attribute_equals") {
  177. array_push(
  178. $xpath,
  179. "[@{$currentThreadItem['content']}]"
  180. );
  181. continue 2;
  182. }
  183. $valueString = trim(
  184. $detailValue["content"],
  185. " '\""
  186. );
  187. $equalsType = $detailType["content"];
  188. switch ($equalsType) {
  189. case self::EQUALS_EXACT:
  190. array_push(
  191. $xpath,
  192. "[@{$currentThreadItem['content']}=\"{$valueString}\"]"
  193. );
  194. break;
  195. case self::EQUALS_CONTAINS:
  196. array_push(
  197. $xpath,
  198. "[contains(@{$currentThreadItem['content']},\"{$valueString}\")]"
  199. );
  200. break;
  201. case self::EQUALS_CONTAINS_WORD:
  202. array_push(
  203. $xpath,
  204. "["
  205. . "contains("
  206. . "concat(\" \",@{$currentThreadItem['content']},\" \"),"
  207. . "concat(\" \",\"{$valueString}\",\" \")"
  208. . ")"
  209. . "]"
  210. );
  211. break;
  212. case self::EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED:
  213. throw new NotYetImplementedException();
  214. case self::EQUALS_STARTS_WITH:
  215. throw new NotYetImplementedException();
  216. case self::EQUALS_ENDS_WITH:
  217. array_push(
  218. $xpath,
  219. "["
  220. . "substring("
  221. . "@{$currentThreadItem['content']},"
  222. . "string-length(@{$currentThreadItem['content']}) - "
  223. . "string-length(\"{$valueString}\") + 1)"
  224. . "=\"{$valueString}\""
  225. . "]"
  226. );
  227. break;
  228. }
  229. break;
  230. case "descendant":
  231. array_push($xpath, "//");
  232. $hasElement = false;
  233. break;
  234. }
  235. }
  236. return implode("", $xpath);
  237. }
  238. /** @return array<int, array<string, string>> */
  239. protected function preg_match_collated(
  240. string $regex,
  241. string $string,
  242. callable $transform = null
  243. ):array {
  244. preg_match_all(
  245. $regex,
  246. $string,
  247. $matches,
  248. PREG_PATTERN_ORDER
  249. );
  250. $set = [];
  251. foreach($matches[0] as $k => $v) {
  252. if(!empty($v)) {
  253. $set[$k] = null;
  254. }
  255. }
  256. foreach($matches as $k => $m) {
  257. if(is_numeric($k)) {
  258. continue;
  259. }
  260. foreach($m as $i => $match) {
  261. if($match === "") {
  262. continue;
  263. }
  264. $toSet = null;
  265. if($transform) {
  266. $toSet = $transform($k, $match);
  267. }
  268. else {
  269. $toSet = ["type" => $k, "content" => $match];
  270. }
  271. if(!isset($set[$i])) {
  272. $set[$i] = $toSet;
  273. }
  274. else {
  275. if(!isset($set[$i]["detail"])) {
  276. $set[$i]["detail"] = [];
  277. }
  278. array_push($set[$i]["detail"], $toSet);
  279. }
  280. }
  281. }
  282. return $set;
  283. }
  284. }