Translator.php 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. <?php /** @noinspection HtmlDeprecatedTag */
  2. namespace Gt\CssXPath;
  3. class Translator {
  4. const cssRegex =
  5. '/'
  6. . '(?P<star>\*)'
  7. . '|(:(?P<pseudo>[\w-]*))'
  8. . '|\(*(?P<pseudospecifier>["\']*[\w\h-]*["\']*)\)'
  9. . '|(?P<element>[\w-]*)'
  10. . '|(?P<child>\s*>\s*)'
  11. . '|(#(?P<id>[\w-]*))'
  12. . '|(\.(?P<class>[\w-]*))'
  13. . '|(?P<sibling>\s*\+\s*)'
  14. . '|(?P<subsequentsibling>\s*~\s*)'
  15. . "|(\[(?P<attribute>[\w-]*)((?P<attribute_equals>[=~$|^*]+)(?P<attribute_value>(.+\[\]'?)|[^\]]+))*\])+"
  16. . '|(?P<descendant>\s+)'
  17. . '/';
  18. const EQUALS_EXACT = "=";
  19. const EQUALS_CONTAINS_WORD = "~=";
  20. const EQUALS_ENDS_WITH = "$=";
  21. const EQUALS_CONTAINS = "*=";
  22. const EQUALS_OR_STARTS_WITH_HYPHENATED = "|=";
  23. const EQUALS_STARTS_WITH = "^=";
  24. public function __construct(
  25. protected string $cssSelector,
  26. protected string $prefix = ".//",
  27. protected bool $htmlMode = true
  28. ) {
  29. }
  30. public function __toString():string {
  31. return $this->asXPath();
  32. }
  33. public function asXPath():string {
  34. return $this->convert($this->cssSelector);
  35. }
  36. protected function convert(string $css):string {
  37. $cssArray = preg_split(
  38. '/(["\']).*?\1(*SKIP)(*F)|,/',
  39. $css
  40. );
  41. $xPathArray = [];
  42. foreach($cssArray as $input) {
  43. $output = $this->convertSingleSelector(trim($input));
  44. $xPathArray []= $output;
  45. }
  46. return implode(" | ", $xPathArray);
  47. }
  48. protected function convertSingleSelector(string $css):string {
  49. $thread = $this->preg_match_collated(self::cssRegex, $css);
  50. $thread = array_values($thread);
  51. $xpath = [$this->prefix];
  52. $hasElement = false;
  53. foreach($thread as $threadKey => $currentThreadItem) {
  54. $next = isset($thread[$threadKey + 1])
  55. ? $thread[$threadKey + 1]
  56. : false;
  57. switch ($currentThreadItem["type"]) {
  58. case "star":
  59. case "element":
  60. if($this->htmlMode) {
  61. $xpath []= strtolower($currentThreadItem['content']);
  62. } else {
  63. $xpath []= $currentThreadItem['content'];
  64. }
  65. $hasElement = true;
  66. break;
  67. case "pseudo":
  68. $specifier = "";
  69. if ($next && $next["type"] == "pseudospecifier") {
  70. $specifier = "{$next['content']}";
  71. }
  72. switch ($currentThreadItem["content"]) {
  73. case "disabled":
  74. case "checked":
  75. case "selected":
  76. array_push(
  77. $xpath,
  78. "[@{$currentThreadItem['content']}]"
  79. );
  80. break;
  81. case "text":
  82. array_push(
  83. $xpath,
  84. '[@type="text"]'
  85. );
  86. break;
  87. case "contains":
  88. if(empty($specifier)) {
  89. continue 3;
  90. }
  91. array_push(
  92. $xpath,
  93. "[contains(text(),$specifier)]"
  94. );
  95. break;
  96. case "first-child":
  97. $prev = count($xpath) - 1;
  98. $xpath[$prev] = '*[1]/self::' . $xpath[$prev];
  99. break;
  100. case "nth-child":
  101. if (empty($specifier)) {
  102. continue 3;
  103. }
  104. $prev = count($xpath) - 1;
  105. $previous = $xpath[$prev];
  106. if (substr($previous, -1, 1) === "]") {
  107. $xpath[$prev] = str_replace(
  108. "]",
  109. " and position() = $specifier]",
  110. $xpath[$prev]
  111. );
  112. }
  113. else {
  114. array_push(
  115. $xpath,
  116. "[$specifier]"
  117. );
  118. }
  119. break;
  120. case "last-child":
  121. $prev = count($xpath) - 1;
  122. $xpath[$prev] = '*[last()]/self::' . $xpath[$prev];
  123. break;
  124. case 'first-of-type':
  125. $prev = count($xpath) - 1;
  126. $previous = $xpath[$prev];
  127. if(substr($previous, -1, 1) === "]") {
  128. array_push(
  129. $xpath,
  130. "[1]"
  131. );
  132. }
  133. else {
  134. array_push(
  135. $xpath,
  136. "[1]"
  137. );
  138. }
  139. break;
  140. case "nth-of-type":
  141. if (empty($specifier)) {
  142. continue 3;
  143. }
  144. $prev = count($xpath) - 1;
  145. $previous = $xpath[$prev];
  146. if(substr($previous, -1, 1) === "]") {
  147. array_push(
  148. $xpath,
  149. "[$specifier]"
  150. );
  151. }
  152. else {
  153. array_push(
  154. $xpath,
  155. "[$specifier]"
  156. );
  157. }
  158. break;
  159. case "last-of-type":
  160. $prev = count($xpath) - 1;
  161. $previous = $xpath[$prev];
  162. if(substr($previous, -1, 1) === "]") {
  163. array_push(
  164. $xpath,
  165. "[last()]"
  166. );
  167. }
  168. else {
  169. array_push(
  170. $xpath,
  171. "[last()]"
  172. );
  173. }
  174. break;
  175. }
  176. break;
  177. case "child":
  178. array_push($xpath, "/");
  179. $hasElement = false;
  180. break;
  181. case "id":
  182. array_push(
  183. $xpath,
  184. ($hasElement ? '' : '*')
  185. . "[@id='{$currentThreadItem['content']}']"
  186. );
  187. $hasElement = true;
  188. break;
  189. case "class":
  190. // https://devhints.io/xpath#class-check
  191. array_push(
  192. $xpath,
  193. ($hasElement ? '' : '*')
  194. . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]"
  195. );
  196. $hasElement = true;
  197. break;
  198. case "sibling":
  199. array_push(
  200. $xpath,
  201. "/following-sibling::*[1]/self::"
  202. );
  203. $hasElement = false;
  204. break;
  205. case "subsequentsibling":
  206. array_push(
  207. $xpath,
  208. "/following-sibling::"
  209. );
  210. $hasElement = false;
  211. break;
  212. case "attribute":
  213. if(!$hasElement) {
  214. array_push($xpath, "*");
  215. $hasElement = true;
  216. }
  217. if($this->htmlMode) {
  218. $currentThreadItem['content'] = strtolower($currentThreadItem['content']);
  219. }
  220. /** @var null|array<int, array<string, string>> $detail */
  221. $detail = $currentThreadItem["detail"] ?? null;
  222. $detailType = $detail[0] ?? null;
  223. $detailValue = $detail[1] ?? null;
  224. if(!$detailType
  225. || $detailType["type"] !== "attribute_equals") {
  226. array_push(
  227. $xpath,
  228. "[@{$currentThreadItem['content']}]"
  229. );
  230. continue 2;
  231. }
  232. $valueString = trim(
  233. $detailValue["content"],
  234. " '\""
  235. );
  236. $equalsType = $detailType["content"];
  237. switch ($equalsType) {
  238. case self::EQUALS_EXACT:
  239. array_push(
  240. $xpath,
  241. "[@{$currentThreadItem['content']}=\"{$valueString}\"]"
  242. );
  243. break;
  244. case self::EQUALS_CONTAINS:
  245. array_push(
  246. $xpath,
  247. "[contains(@{$currentThreadItem['content']},\"{$valueString}\")]"
  248. );
  249. break;
  250. case self::EQUALS_CONTAINS_WORD:
  251. array_push(
  252. $xpath,
  253. "["
  254. . "contains("
  255. . "concat(\" \",@{$currentThreadItem['content']},\" \"),"
  256. . "concat(\" \",\"{$valueString}\",\" \")"
  257. . ")"
  258. . "]"
  259. );
  260. break;
  261. case self::EQUALS_OR_STARTS_WITH_HYPHENATED:
  262. array_push(
  263. $xpath,
  264. "["
  265. . "@{$currentThreadItem['content']}=\"{$valueString}\" or "
  266. . "starts-with(@{$currentThreadItem['content']}, \"{$valueString}-\")"
  267. . "]"
  268. );
  269. break;
  270. case self::EQUALS_STARTS_WITH:
  271. array_push(
  272. $xpath,
  273. "[starts-with("
  274. . "@{$currentThreadItem['content']}, \"{$valueString}\""
  275. . ")]"
  276. );
  277. break;
  278. case self::EQUALS_ENDS_WITH:
  279. array_push(
  280. $xpath,
  281. "["
  282. . "substring("
  283. . "@{$currentThreadItem['content']},"
  284. . "string-length(@{$currentThreadItem['content']}) - "
  285. . "string-length(\"{$valueString}\") + 1)"
  286. . "=\"{$valueString}\""
  287. . "]"
  288. );
  289. break;
  290. }
  291. break;
  292. case "descendant":
  293. array_push($xpath, "//");
  294. $hasElement = false;
  295. break;
  296. }
  297. }
  298. return implode("", $xpath);
  299. }
  300. /** @return array<int, array<string, string>> */
  301. protected function preg_match_collated(
  302. string $regex,
  303. string $string,
  304. ?callable $transform = null
  305. ):array {
  306. preg_match_all(
  307. $regex,
  308. $string,
  309. $matches,
  310. PREG_PATTERN_ORDER
  311. );
  312. $set = [];
  313. foreach($matches[0] as $k => $v) {
  314. if(!empty($v)) {
  315. $set[$k] = null;
  316. }
  317. }
  318. foreach($matches as $k => $m) {
  319. if(is_numeric($k)) {
  320. continue;
  321. }
  322. foreach($m as $i => $match) {
  323. if($match === "") {
  324. continue;
  325. }
  326. $toSet = null;
  327. if($transform) {
  328. $toSet = $transform($k, $match);
  329. }
  330. else {
  331. $toSet = ["type" => $k, "content" => $match];
  332. }
  333. if(!isset($set[$i])) {
  334. $set[$i] = $toSet;
  335. }
  336. else {
  337. if(!isset($set[$i]["detail"])) {
  338. $set[$i]["detail"] = [];
  339. }
  340. array_push($set[$i]["detail"], $toSet);
  341. }
  342. }
  343. }
  344. return $set;
  345. }
  346. }