Translator.php 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. <?php /** @noinspection HtmlDeprecatedTag */
  2. namespace Gt\CssXPath;
  3. class Translator {
  4. const cssRegex =
  5. '/'
  6. . '(?P<star>\*)'
  7. . '|(:(?P<pseudo>[\w-]*))'
  8. . '|\(*(?P<pseudospecifier>["\']*[\w\h-]*["\']*)\)'
  9. . '|(?P<element>[\w-]*)'
  10. . '|(?P<child>\s*>\s*)'
  11. . '|(#(?P<id>[\w-]*))'
  12. . '|(\.(?P<class>[\w-]*))'
  13. . '|(?P<sibling>\s*\+\s*)'
  14. . "|(\[(?P<attribute>[\w-]*)((?P<attribute_equals>[=~$]+)(?P<attribute_value>(.+\[\]'?)|[^\]]+))*\])+"
  15. . '|(?P<descendant>\s+)'
  16. . '/';
  17. const EQUALS_EXACT = "=";
  18. const EQUALS_CONTAINS_WORD = "~=";
  19. const EQUALS_ENDS_WITH = "$=";
  20. const EQUALS_CONTAINS = "*=";
  21. const EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED = "|=";
  22. const EQUALS_STARTS_WITH = "^=";
  23. /** @var string */
  24. protected $cssSelector;
  25. /** @var string */
  26. protected $prefix;
  27. public function __construct(string $cssSelector, string $prefix = ".//") {
  28. $this->cssSelector = $cssSelector;
  29. $this->prefix = $prefix;
  30. }
  31. public function __toString():string {
  32. return $this->asXPath();
  33. }
  34. public function asXPath():string {
  35. return $this->convert($this->cssSelector);
  36. }
  37. protected function convert(string $css):string {
  38. $cssArray = preg_split(
  39. '/(["\']).*?\1(*SKIP)(*F)|,/',
  40. $css
  41. );
  42. $xPathArray = [];
  43. foreach($cssArray as $input) {
  44. $output = $this->convertSingleSelector(trim($input));
  45. $xPathArray []= $output;
  46. }
  47. return implode(" | ", $xPathArray);
  48. }
  49. protected function convertSingleSelector(string $css):string {
  50. $thread = $this->preg_match_collated(self::cssRegex, $css);
  51. $thread = array_values($thread);
  52. $xpath = [$this->prefix];
  53. $prevType = "";
  54. foreach($thread as $threadKey => $currentThreadItem) {
  55. $next = isset($thread[$threadKey + 1])
  56. ? $thread[$threadKey + 1]
  57. : false;
  58. switch ($currentThreadItem["type"]) {
  59. case "star":
  60. case "element":
  61. $xpath []= $currentThreadItem['content'];
  62. break;
  63. case "pseudo":
  64. $specifier = "";
  65. if ($next && $next["type"] == "pseudospecifier") {
  66. $specifier = "{$next['content']}";
  67. }
  68. switch ($currentThreadItem["content"]) {
  69. case "disabled":
  70. case "checked":
  71. case "selected":
  72. array_push(
  73. $xpath,
  74. "[@{$currentThreadItem['content']}]"
  75. );
  76. break;
  77. case "text":
  78. array_push(
  79. $xpath,
  80. '[@type="text"]'
  81. );
  82. break;
  83. case "contains":
  84. if(empty($specifier)) {
  85. continue 3;
  86. }
  87. array_push(
  88. $xpath,
  89. "[contains(text(),$specifier)]"
  90. );
  91. break;
  92. case "first-child":
  93. $prev = count($xpath) - 1;
  94. $xpath[$prev] = '*[1]/self::' . $xpath[$prev];
  95. break;
  96. case "nth-child":
  97. if (empty($specifier)) {
  98. continue 3;
  99. }
  100. $prev = count($xpath) - 1;
  101. $previous = $xpath[$prev];
  102. if (substr($previous, -1, 1) === "]") {
  103. $xpath[$prev] = str_replace(
  104. "]",
  105. " and position() = $specifier]",
  106. $xpath[$prev]
  107. );
  108. }
  109. else {
  110. array_push(
  111. $xpath,
  112. "[$specifier]"
  113. );
  114. }
  115. break;
  116. case "nth-of-type":
  117. if (empty($specifier)) {
  118. continue 3;
  119. }
  120. $prev = count($xpath) - 1;
  121. $previous = $xpath[$prev];
  122. if(substr($previous, -1, 1) === "]") {
  123. array_push(
  124. $xpath,
  125. "[$specifier]"
  126. );
  127. }
  128. else {
  129. array_push(
  130. $xpath,
  131. "[$specifier]"
  132. );
  133. }
  134. break;
  135. }
  136. break;
  137. case "child":
  138. array_push($xpath, "/");
  139. break;
  140. case "id":
  141. array_push(
  142. $xpath,
  143. ($prevType != "element" ? '*' : '')
  144. . "[@id='{$currentThreadItem['content']}']"
  145. );
  146. break;
  147. case "class":
  148. // https://devhints.io/xpath#class-check
  149. array_push(
  150. $xpath,
  151. (($prevType != "element" && $prevType != "class") ? '*' : '')
  152. . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]"
  153. );
  154. break;
  155. case "sibling":
  156. array_push(
  157. $xpath,
  158. "/following-sibling::*[1]/self::"
  159. );
  160. break;
  161. case "attribute":
  162. if(!$prevType) {
  163. array_push($xpath, "*");
  164. }
  165. /** @var null|array<int, array<string, string>> $detail */
  166. $detail = $currentThreadItem["detail"] ?? null;
  167. $detailType = $detail[0] ?? null;
  168. $detailValue = $detail[1] ?? null;
  169. if(!$detailType
  170. || $detailType["type"] !== "attribute_equals") {
  171. array_push(
  172. $xpath,
  173. "[@{$currentThreadItem['content']}]"
  174. );
  175. continue 2;
  176. }
  177. $valueString = trim(
  178. $detailValue["content"],
  179. " '\""
  180. );
  181. $equalsType = $detailType["content"];
  182. switch ($equalsType) {
  183. case self::EQUALS_EXACT:
  184. array_push(
  185. $xpath,
  186. "[@{$currentThreadItem['content']}=\"{$valueString}\"]"
  187. );
  188. break;
  189. case self::EQUALS_CONTAINS:
  190. throw new NotYetImplementedException();
  191. case self::EQUALS_CONTAINS_WORD:
  192. array_push(
  193. $xpath,
  194. "["
  195. . "contains("
  196. . "concat(\" \",@{$currentThreadItem['content']},\" \"),"
  197. . "concat(\" \",\"{$valueString}\",\" \")"
  198. . ")"
  199. . "]"
  200. );
  201. break;
  202. case self::EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED:
  203. throw new NotYetImplementedException();
  204. case self::EQUALS_STARTS_WITH:
  205. throw new NotYetImplementedException();
  206. case self::EQUALS_ENDS_WITH:
  207. array_push(
  208. $xpath,
  209. "["
  210. . "substring("
  211. . "@{$currentThreadItem['content']},"
  212. . "string-length(@{$currentThreadItem['content']}) - "
  213. . "string-length(\"{$valueString}\") + 1)"
  214. . "=\"{$valueString}\""
  215. . "]"
  216. );
  217. break;
  218. }
  219. break;
  220. case "descendant":
  221. array_push($xpath, "//");
  222. break;
  223. }
  224. $prevType = $currentThreadItem["type"];
  225. }
  226. return implode("", $xpath);
  227. }
  228. /** @return array<int, array<string, string>> */
  229. protected function preg_match_collated(
  230. string $regex,
  231. string $string,
  232. callable $transform = null
  233. ):array {
  234. preg_match_all(
  235. $regex,
  236. $string,
  237. $matches,
  238. PREG_PATTERN_ORDER
  239. );
  240. $set = [];
  241. foreach($matches[0] as $k => $v) {
  242. if(!empty($v)) {
  243. $set[$k] = null;
  244. }
  245. }
  246. foreach($matches as $k => $m) {
  247. if(is_numeric($k)) {
  248. continue;
  249. }
  250. foreach($m as $i => $match) {
  251. if($match === "") {
  252. continue;
  253. }
  254. $toSet = null;
  255. if($transform) {
  256. $toSet = $transform($k, $match);
  257. }
  258. else {
  259. $toSet = ["type" => $k, "content" => $match];
  260. }
  261. if(!isset($set[$i])) {
  262. $set[$i] = $toSet;
  263. }
  264. else {
  265. if(!isset($set[$i]["detail"])) {
  266. $set[$i]["detail"] = [];
  267. }
  268. array_push($set[$i]["detail"], $toSet);
  269. }
  270. }
  271. }
  272. return $set;
  273. }
  274. }