Translator.php 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. <?php /** @noinspection HtmlDeprecatedTag */
  2. namespace Gt\CssXPath;
  3. class Translator {
  4. const cssRegex =
  5. '/'
  6. . '(?P<star>\*)'
  7. . '|(:(?P<pseudo>[\w-]*))'
  8. . '|\(*(?P<pseudospecifier>["\']*[\w\h-]*["\']*)\)'
  9. . '|(?P<element>[\w-]*)'
  10. . '|(?P<child>\s*>\s*)'
  11. . '|(#(?P<id>[\w-]*))'
  12. . '|(\.(?P<class>[\w-]*))'
  13. . '|(?P<sibling>\s*\+\s*)'
  14. . "|(\[(?P<attribute>[\w-]*)((?P<attribute_equals>[=~$|^*]+)(?P<attribute_value>(.+\[\]'?)|[^\]]+))*\])+"
  15. . '|(?P<descendant>\s+)'
  16. . '/';
  17. const EQUALS_EXACT = "=";
  18. const EQUALS_CONTAINS_WORD = "~=";
  19. const EQUALS_ENDS_WITH = "$=";
  20. const EQUALS_CONTAINS = "*=";
  21. const EQUALS_OR_STARTS_WITH_HYPHENATED = "|=";
  22. const EQUALS_STARTS_WITH = "^=";
  23. public function __construct(
  24. protected string $cssSelector,
  25. protected string $prefix = ".//",
  26. protected bool $htmlMode = true
  27. ) {
  28. }
  29. public function __toString():string {
  30. return $this->asXPath();
  31. }
  32. public function asXPath():string {
  33. return $this->convert($this->cssSelector);
  34. }
  35. protected function convert(string $css):string {
  36. $cssArray = preg_split(
  37. '/(["\']).*?\1(*SKIP)(*F)|,/',
  38. $css
  39. );
  40. $xPathArray = [];
  41. foreach($cssArray as $input) {
  42. $output = $this->convertSingleSelector(trim($input));
  43. $xPathArray []= $output;
  44. }
  45. return implode(" | ", $xPathArray);
  46. }
  47. protected function convertSingleSelector(string $css):string {
  48. $thread = $this->preg_match_collated(self::cssRegex, $css);
  49. $thread = array_values($thread);
  50. $xpath = [$this->prefix];
  51. $hasElement = false;
  52. foreach($thread as $threadKey => $currentThreadItem) {
  53. $next = isset($thread[$threadKey + 1])
  54. ? $thread[$threadKey + 1]
  55. : false;
  56. switch ($currentThreadItem["type"]) {
  57. case "star":
  58. case "element":
  59. if($this->htmlMode) {
  60. $xpath []= strtolower($currentThreadItem['content']);
  61. } else {
  62. $xpath []= $currentThreadItem['content'];
  63. }
  64. $hasElement = true;
  65. break;
  66. case "pseudo":
  67. $specifier = "";
  68. if ($next && $next["type"] == "pseudospecifier") {
  69. $specifier = "{$next['content']}";
  70. }
  71. switch ($currentThreadItem["content"]) {
  72. case "disabled":
  73. case "checked":
  74. case "selected":
  75. array_push(
  76. $xpath,
  77. "[@{$currentThreadItem['content']}]"
  78. );
  79. break;
  80. case "text":
  81. array_push(
  82. $xpath,
  83. '[@type="text"]'
  84. );
  85. break;
  86. case "contains":
  87. if(empty($specifier)) {
  88. continue 3;
  89. }
  90. array_push(
  91. $xpath,
  92. "[contains(text(),$specifier)]"
  93. );
  94. break;
  95. case "first-child":
  96. $prev = count($xpath) - 1;
  97. $xpath[$prev] = '*[1]/self::' . $xpath[$prev];
  98. break;
  99. case "nth-child":
  100. if (empty($specifier)) {
  101. continue 3;
  102. }
  103. $prev = count($xpath) - 1;
  104. $previous = $xpath[$prev];
  105. if (substr($previous, -1, 1) === "]") {
  106. $xpath[$prev] = str_replace(
  107. "]",
  108. " and position() = $specifier]",
  109. $xpath[$prev]
  110. );
  111. }
  112. else {
  113. array_push(
  114. $xpath,
  115. "[$specifier]"
  116. );
  117. }
  118. break;
  119. case "last-child":
  120. $prev = count($xpath) - 1;
  121. $xpath[$prev] = '*[last()]/self::' . $xpath[$prev];
  122. break;
  123. case 'first-of-type':
  124. $prev = count($xpath) - 1;
  125. $previous = $xpath[$prev];
  126. if(substr($previous, -1, 1) === "]") {
  127. array_push(
  128. $xpath,
  129. "[1]"
  130. );
  131. }
  132. else {
  133. array_push(
  134. $xpath,
  135. "[1]"
  136. );
  137. }
  138. break;
  139. case "nth-of-type":
  140. if (empty($specifier)) {
  141. continue 3;
  142. }
  143. $prev = count($xpath) - 1;
  144. $previous = $xpath[$prev];
  145. if(substr($previous, -1, 1) === "]") {
  146. array_push(
  147. $xpath,
  148. "[$specifier]"
  149. );
  150. }
  151. else {
  152. array_push(
  153. $xpath,
  154. "[$specifier]"
  155. );
  156. }
  157. break;
  158. case "last-of-type":
  159. $prev = count($xpath) - 1;
  160. $previous = $xpath[$prev];
  161. if(substr($previous, -1, 1) === "]") {
  162. array_push(
  163. $xpath,
  164. "[last()]"
  165. );
  166. }
  167. else {
  168. array_push(
  169. $xpath,
  170. "[last()]"
  171. );
  172. }
  173. break;
  174. }
  175. break;
  176. case "child":
  177. array_push($xpath, "/");
  178. $hasElement = false;
  179. break;
  180. case "id":
  181. array_push(
  182. $xpath,
  183. ($hasElement ? '' : '*')
  184. . "[@id='{$currentThreadItem['content']}']"
  185. );
  186. $hasElement = true;
  187. break;
  188. case "class":
  189. // https://devhints.io/xpath#class-check
  190. array_push(
  191. $xpath,
  192. ($hasElement ? '' : '*')
  193. . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]"
  194. );
  195. $hasElement = true;
  196. break;
  197. case "sibling":
  198. array_push(
  199. $xpath,
  200. "/following-sibling::*[1]/self::"
  201. );
  202. $hasElement = false;
  203. break;
  204. case "attribute":
  205. if(!$hasElement) {
  206. array_push($xpath, "*");
  207. $hasElement = true;
  208. }
  209. if($this->htmlMode) {
  210. $currentThreadItem['content'] = strtolower($currentThreadItem['content']);
  211. }
  212. /** @var null|array<int, array<string, string>> $detail */
  213. $detail = $currentThreadItem["detail"] ?? null;
  214. $detailType = $detail[0] ?? null;
  215. $detailValue = $detail[1] ?? null;
  216. if(!$detailType
  217. || $detailType["type"] !== "attribute_equals") {
  218. array_push(
  219. $xpath,
  220. "[@{$currentThreadItem['content']}]"
  221. );
  222. continue 2;
  223. }
  224. $valueString = trim(
  225. $detailValue["content"],
  226. " '\""
  227. );
  228. $equalsType = $detailType["content"];
  229. switch ($equalsType) {
  230. case self::EQUALS_EXACT:
  231. array_push(
  232. $xpath,
  233. "[@{$currentThreadItem['content']}=\"{$valueString}\"]"
  234. );
  235. break;
  236. case self::EQUALS_CONTAINS:
  237. array_push(
  238. $xpath,
  239. "[contains(@{$currentThreadItem['content']},\"{$valueString}\")]"
  240. );
  241. break;
  242. case self::EQUALS_CONTAINS_WORD:
  243. array_push(
  244. $xpath,
  245. "["
  246. . "contains("
  247. . "concat(\" \",@{$currentThreadItem['content']},\" \"),"
  248. . "concat(\" \",\"{$valueString}\",\" \")"
  249. . ")"
  250. . "]"
  251. );
  252. break;
  253. case self::EQUALS_OR_STARTS_WITH_HYPHENATED:
  254. array_push(
  255. $xpath,
  256. "["
  257. . "@{$currentThreadItem['content']}=\"{$valueString}\" or "
  258. . "starts-with(@{$currentThreadItem['content']}, \"{$valueString}-\")"
  259. . "]"
  260. );
  261. break;
  262. case self::EQUALS_STARTS_WITH:
  263. array_push(
  264. $xpath,
  265. "[starts-with("
  266. . "@{$currentThreadItem['content']}, \"{$valueString}\""
  267. . ")]"
  268. );
  269. break;
  270. case self::EQUALS_ENDS_WITH:
  271. array_push(
  272. $xpath,
  273. "["
  274. . "substring("
  275. . "@{$currentThreadItem['content']},"
  276. . "string-length(@{$currentThreadItem['content']}) - "
  277. . "string-length(\"{$valueString}\") + 1)"
  278. . "=\"{$valueString}\""
  279. . "]"
  280. );
  281. break;
  282. }
  283. break;
  284. case "descendant":
  285. array_push($xpath, "//");
  286. $hasElement = false;
  287. break;
  288. }
  289. }
  290. return implode("", $xpath);
  291. }
  292. /** @return array<int, array<string, string>> */
  293. protected function preg_match_collated(
  294. string $regex,
  295. string $string,
  296. ?callable $transform = null
  297. ):array {
  298. preg_match_all(
  299. $regex,
  300. $string,
  301. $matches,
  302. PREG_PATTERN_ORDER
  303. );
  304. $set = [];
  305. foreach($matches[0] as $k => $v) {
  306. if(!empty($v)) {
  307. $set[$k] = null;
  308. }
  309. }
  310. foreach($matches as $k => $m) {
  311. if(is_numeric($k)) {
  312. continue;
  313. }
  314. foreach($m as $i => $match) {
  315. if($match === "") {
  316. continue;
  317. }
  318. $toSet = null;
  319. if($transform) {
  320. $toSet = $transform($k, $match);
  321. }
  322. else {
  323. $toSet = ["type" => $k, "content" => $match];
  324. }
  325. if(!isset($set[$i])) {
  326. $set[$i] = $toSet;
  327. }
  328. else {
  329. if(!isset($set[$i]["detail"])) {
  330. $set[$i]["detail"] = [];
  331. }
  332. array_push($set[$i]["detail"], $toSet);
  333. }
  334. }
  335. }
  336. return $set;
  337. }
  338. }