IRI.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257
  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  37. * @author Ryan Parman
  38. * @author Geoffrey Sneddon
  39. * @author Ryan McCue
  40. * @link http://simplepie.org/ SimplePie
  41. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42. */
  43. /**
  44. * IRI parser/serialiser/normaliser
  45. *
  46. * @package SimplePie
  47. * @subpackage HTTP
  48. * @author Geoffrey Sneddon
  49. * @author Steve Minutillo
  50. * @author Ryan McCue
  51. * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue
  52. * @license http://www.opensource.org/licenses/bsd-license.php
  53. */
  54. class SimplePie_IRI
  55. {
  56. /**
  57. * Scheme
  58. *
  59. * @var string
  60. */
  61. protected $scheme = null;
  62. /**
  63. * User Information
  64. *
  65. * @var string
  66. */
  67. protected $iuserinfo = null;
  68. /**
  69. * ihost
  70. *
  71. * @var string
  72. */
  73. protected $ihost = null;
  74. /**
  75. * Port
  76. *
  77. * @var string
  78. */
  79. protected $port = null;
  80. /**
  81. * ipath
  82. *
  83. * @var string
  84. */
  85. protected $ipath = '';
  86. /**
  87. * iquery
  88. *
  89. * @var string
  90. */
  91. protected $iquery = null;
  92. /**
  93. * ifragment
  94. *
  95. * @var string
  96. */
  97. protected $ifragment = null;
  98. /**
  99. * Normalization database
  100. *
  101. * Each key is the scheme, each value is an array with each key as the IRI
  102. * part and value as the default value for that part.
  103. */
  104. protected $normalization = array(
  105. 'acap' => array(
  106. 'port' => 674
  107. ),
  108. 'dict' => array(
  109. 'port' => 2628
  110. ),
  111. 'file' => array(
  112. 'ihost' => 'localhost'
  113. ),
  114. 'http' => array(
  115. 'port' => 80,
  116. 'ipath' => '/'
  117. ),
  118. 'https' => array(
  119. 'port' => 443,
  120. 'ipath' => '/'
  121. ),
  122. );
  123. /**
  124. * Return the entire IRI when you try and read the object as a string
  125. *
  126. * @return string
  127. */
  128. public function __toString()
  129. {
  130. return $this->get_iri();
  131. }
  132. /**
  133. * Overload __set() to provide access via properties
  134. *
  135. * @param string $name Property name
  136. * @param mixed $value Property value
  137. */
  138. public function __set($name, $value)
  139. {
  140. if (method_exists($this, 'set_' . $name))
  141. {
  142. call_user_func(array($this, 'set_' . $name), $value);
  143. }
  144. elseif (
  145. $name === 'iauthority'
  146. || $name === 'iuserinfo'
  147. || $name === 'ihost'
  148. || $name === 'ipath'
  149. || $name === 'iquery'
  150. || $name === 'ifragment'
  151. )
  152. {
  153. call_user_func(array($this, 'set_' . substr($name, 1)), $value);
  154. }
  155. }
  156. /**
  157. * Overload __get() to provide access via properties
  158. *
  159. * @param string $name Property name
  160. * @return mixed
  161. */
  162. public function __get($name)
  163. {
  164. // isset() returns false for null, we don't want to do that
  165. // Also why we use array_key_exists below instead of isset()
  166. $props = get_object_vars($this);
  167. if (
  168. $name === 'iri' ||
  169. $name === 'uri' ||
  170. $name === 'iauthority' ||
  171. $name === 'authority'
  172. )
  173. {
  174. $return = $this->{"get_$name"}();
  175. }
  176. elseif (array_key_exists($name, $props))
  177. {
  178. $return = $this->$name;
  179. }
  180. // host -> ihost
  181. elseif (($prop = 'i' . $name) && array_key_exists($prop, $props))
  182. {
  183. $name = $prop;
  184. $return = $this->$prop;
  185. }
  186. // ischeme -> scheme
  187. elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props))
  188. {
  189. $name = $prop;
  190. $return = $this->$prop;
  191. }
  192. else
  193. {
  194. trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
  195. $return = null;
  196. }
  197. if ($return === null && isset($this->normalization[$this->scheme][$name]))
  198. {
  199. return $this->normalization[$this->scheme][$name];
  200. }
  201. else
  202. {
  203. return $return;
  204. }
  205. }
  206. /**
  207. * Overload __isset() to provide access via properties
  208. *
  209. * @param string $name Property name
  210. * @return bool
  211. */
  212. public function __isset($name)
  213. {
  214. if (method_exists($this, 'get_' . $name) || isset($this->$name))
  215. {
  216. return true;
  217. }
  218. else
  219. {
  220. return false;
  221. }
  222. }
  223. /**
  224. * Overload __unset() to provide access via properties
  225. *
  226. * @param string $name Property name
  227. */
  228. public function __unset($name)
  229. {
  230. if (method_exists($this, 'set_' . $name))
  231. {
  232. call_user_func(array($this, 'set_' . $name), '');
  233. }
  234. }
  235. /**
  236. * Create a new IRI object, from a specified string
  237. *
  238. * @param string $iri
  239. */
  240. public function __construct($iri = null)
  241. {
  242. $this->set_iri($iri);
  243. }
  244. /**
  245. * Clean up
  246. */
  247. public function __destruct() {
  248. $this->set_iri(null, true);
  249. $this->set_path(null, true);
  250. $this->set_authority(null, true);
  251. }
  252. /**
  253. * Create a new IRI object by resolving a relative IRI
  254. *
  255. * Returns false if $base is not absolute, otherwise an IRI.
  256. *
  257. * @param IRI|string $base (Absolute) Base IRI
  258. * @param IRI|string $relative Relative IRI
  259. * @return IRI|false
  260. */
  261. public static function absolutize($base, $relative)
  262. {
  263. if (!($relative instanceof SimplePie_IRI))
  264. {
  265. $relative = new SimplePie_IRI($relative);
  266. }
  267. if (!$relative->is_valid())
  268. {
  269. return false;
  270. }
  271. elseif ($relative->scheme !== null)
  272. {
  273. return clone $relative;
  274. }
  275. else
  276. {
  277. if (!($base instanceof SimplePie_IRI))
  278. {
  279. $base = new SimplePie_IRI($base);
  280. }
  281. if ($base->scheme !== null && $base->is_valid())
  282. {
  283. if ($relative->get_iri() !== '')
  284. {
  285. if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null)
  286. {
  287. $target = clone $relative;
  288. $target->scheme = $base->scheme;
  289. }
  290. else
  291. {
  292. $target = new SimplePie_IRI;
  293. $target->scheme = $base->scheme;
  294. $target->iuserinfo = $base->iuserinfo;
  295. $target->ihost = $base->ihost;
  296. $target->port = $base->port;
  297. if ($relative->ipath !== '')
  298. {
  299. if ($relative->ipath[0] === '/')
  300. {
  301. $target->ipath = $relative->ipath;
  302. }
  303. elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '')
  304. {
  305. $target->ipath = '/' . $relative->ipath;
  306. }
  307. elseif (($last_segment = strrpos($base->ipath, '/')) !== false)
  308. {
  309. $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
  310. }
  311. else
  312. {
  313. $target->ipath = $relative->ipath;
  314. }
  315. $target->ipath = $target->remove_dot_segments($target->ipath);
  316. $target->iquery = $relative->iquery;
  317. }
  318. else
  319. {
  320. $target->ipath = $base->ipath;
  321. if ($relative->iquery !== null)
  322. {
  323. $target->iquery = $relative->iquery;
  324. }
  325. elseif ($base->iquery !== null)
  326. {
  327. $target->iquery = $base->iquery;
  328. }
  329. }
  330. $target->ifragment = $relative->ifragment;
  331. }
  332. }
  333. else
  334. {
  335. $target = clone $base;
  336. $target->ifragment = null;
  337. }
  338. $target->scheme_normalization();
  339. return $target;
  340. }
  341. else
  342. {
  343. return false;
  344. }
  345. }
  346. }
  347. /**
  348. * Parse an IRI into scheme/authority/path/query/fragment segments
  349. *
  350. * @param string $iri
  351. * @return array
  352. */
  353. protected function parse_iri($iri)
  354. {
  355. $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
  356. if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match))
  357. {
  358. if ($match[1] === '')
  359. {
  360. $match['scheme'] = null;
  361. }
  362. if (!isset($match[3]) || $match[3] === '')
  363. {
  364. $match['authority'] = null;
  365. }
  366. if (!isset($match[5]))
  367. {
  368. $match['path'] = '';
  369. }
  370. if (!isset($match[6]) || $match[6] === '')
  371. {
  372. $match['query'] = null;
  373. }
  374. if (!isset($match[8]) || $match[8] === '')
  375. {
  376. $match['fragment'] = null;
  377. }
  378. return $match;
  379. }
  380. else
  381. {
  382. // This can occur when a paragraph is accidentally parsed as a URI
  383. return false;
  384. }
  385. }
  386. /**
  387. * Remove dot segments from a path
  388. *
  389. * @param string $input
  390. * @return string
  391. */
  392. protected function remove_dot_segments($input)
  393. {
  394. $output = '';
  395. while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
  396. {
  397. // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
  398. if (strpos($input, '../') === 0)
  399. {
  400. $input = substr($input, 3);
  401. }
  402. elseif (strpos($input, './') === 0)
  403. {
  404. $input = substr($input, 2);
  405. }
  406. // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
  407. elseif (strpos($input, '/./') === 0)
  408. {
  409. $input = substr($input, 2);
  410. }
  411. elseif ($input === '/.')
  412. {
  413. $input = '/';
  414. }
  415. // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
  416. elseif (strpos($input, '/../') === 0)
  417. {
  418. $input = substr($input, 3);
  419. $output = substr_replace($output, '', strrpos($output, '/'));
  420. }
  421. elseif ($input === '/..')
  422. {
  423. $input = '/';
  424. $output = substr_replace($output, '', strrpos($output, '/'));
  425. }
  426. // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
  427. elseif ($input === '.' || $input === '..')
  428. {
  429. $input = '';
  430. }
  431. // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
  432. elseif (($pos = strpos($input, '/', 1)) !== false)
  433. {
  434. $output .= substr($input, 0, $pos);
  435. $input = substr_replace($input, '', 0, $pos);
  436. }
  437. else
  438. {
  439. $output .= $input;
  440. $input = '';
  441. }
  442. }
  443. return $output . $input;
  444. }
  445. /**
  446. * Replace invalid character with percent encoding
  447. *
  448. * @param string $string Input string
  449. * @param string $extra_chars Valid characters not in iunreserved or
  450. * iprivate (this is ASCII-only)
  451. * @param bool $iprivate Allow iprivate
  452. * @return string
  453. */
  454. protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
  455. {
  456. // Normalize as many pct-encoded sections as possible
  457. $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string);
  458. // Replace invalid percent characters
  459. $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
  460. // Add unreserved and % to $extra_chars (the latter is safe because all
  461. // pct-encoded sections are now valid).
  462. $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
  463. // Now replace any bytes that aren't allowed with their pct-encoded versions
  464. $position = 0;
  465. $strlen = strlen($string);
  466. while (($position += strspn($string, $extra_chars, $position)) < $strlen)
  467. {
  468. $value = ord($string[$position]);
  469. // Start position
  470. $start = $position;
  471. // By default we are valid
  472. $valid = true;
  473. // No one byte sequences are valid due to the while.
  474. // Two byte sequence:
  475. if (($value & 0xE0) === 0xC0)
  476. {
  477. $character = ($value & 0x1F) << 6;
  478. $length = 2;
  479. $remaining = 1;
  480. }
  481. // Three byte sequence:
  482. elseif (($value & 0xF0) === 0xE0)
  483. {
  484. $character = ($value & 0x0F) << 12;
  485. $length = 3;
  486. $remaining = 2;
  487. }
  488. // Four byte sequence:
  489. elseif (($value & 0xF8) === 0xF0)
  490. {
  491. $character = ($value & 0x07) << 18;
  492. $length = 4;
  493. $remaining = 3;
  494. }
  495. // Invalid byte:
  496. else
  497. {
  498. $valid = false;
  499. $length = 1;
  500. $remaining = 0;
  501. }
  502. if ($remaining)
  503. {
  504. if ($position + $length <= $strlen)
  505. {
  506. for ($position++; $remaining; $position++)
  507. {
  508. $value = ord($string[$position]);
  509. // Check that the byte is valid, then add it to the character:
  510. if (($value & 0xC0) === 0x80)
  511. {
  512. $character |= ($value & 0x3F) << (--$remaining * 6);
  513. }
  514. // If it is invalid, count the sequence as invalid and reprocess the current byte:
  515. else
  516. {
  517. $valid = false;
  518. $position--;
  519. break;
  520. }
  521. }
  522. }
  523. else
  524. {
  525. $position = $strlen - 1;
  526. $valid = false;
  527. }
  528. }
  529. // Percent encode anything invalid or not in ucschar
  530. if (
  531. // Invalid sequences
  532. !$valid
  533. // Non-shortest form sequences are invalid
  534. || $length > 1 && $character <= 0x7F
  535. || $length > 2 && $character <= 0x7FF
  536. || $length > 3 && $character <= 0xFFFF
  537. // Outside of range of ucschar codepoints
  538. // Noncharacters
  539. || ($character & 0xFFFE) === 0xFFFE
  540. || $character >= 0xFDD0 && $character <= 0xFDEF
  541. || (
  542. // Everything else not in ucschar
  543. $character > 0xD7FF && $character < 0xF900
  544. || $character < 0xA0
  545. || $character > 0xEFFFD
  546. )
  547. && (
  548. // Everything not in iprivate, if it applies
  549. !$iprivate
  550. || $character < 0xE000
  551. || $character > 0x10FFFD
  552. )
  553. )
  554. {
  555. // If we were a character, pretend we weren't, but rather an error.
  556. if ($valid)
  557. $position--;
  558. for ($j = $start; $j <= $position; $j++)
  559. {
  560. $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
  561. $j += 2;
  562. $position += 2;
  563. $strlen += 2;
  564. }
  565. }
  566. }
  567. return $string;
  568. }
  569. /**
  570. * Callback function for preg_replace_callback.
  571. *
  572. * Removes sequences of percent encoded bytes that represent UTF-8
  573. * encoded characters in iunreserved
  574. *
  575. * @param array $match PCRE match
  576. * @return string Replacement
  577. */
  578. protected function remove_iunreserved_percent_encoded($match)
  579. {
  580. // As we just have valid percent encoded sequences we can just explode
  581. // and ignore the first member of the returned array (an empty string).
  582. $bytes = explode('%', $match[0]);
  583. // Initialize the new string (this is what will be returned) and that
  584. // there are no bytes remaining in the current sequence (unsurprising
  585. // at the first byte!).
  586. $string = '';
  587. $remaining = 0;
  588. // Loop over each and every byte, and set $value to its value
  589. for ($i = 1, $len = count($bytes); $i < $len; $i++)
  590. {
  591. $value = hexdec($bytes[$i]);
  592. // If we're the first byte of sequence:
  593. if (!$remaining)
  594. {
  595. // Start position
  596. $start = $i;
  597. // By default we are valid
  598. $valid = true;
  599. // One byte sequence:
  600. if ($value <= 0x7F)
  601. {
  602. $character = $value;
  603. $length = 1;
  604. }
  605. // Two byte sequence:
  606. elseif (($value & 0xE0) === 0xC0)
  607. {
  608. $character = ($value & 0x1F) << 6;
  609. $length = 2;
  610. $remaining = 1;
  611. }
  612. // Three byte sequence:
  613. elseif (($value & 0xF0) === 0xE0)
  614. {
  615. $character = ($value & 0x0F) << 12;
  616. $length = 3;
  617. $remaining = 2;
  618. }
  619. // Four byte sequence:
  620. elseif (($value & 0xF8) === 0xF0)
  621. {
  622. $character = ($value & 0x07) << 18;
  623. $length = 4;
  624. $remaining = 3;
  625. }
  626. // Invalid byte:
  627. else
  628. {
  629. $valid = false;
  630. $remaining = 0;
  631. }
  632. }
  633. // Continuation byte:
  634. else
  635. {
  636. // Check that the byte is valid, then add it to the character:
  637. if (($value & 0xC0) === 0x80)
  638. {
  639. $remaining--;
  640. $character |= ($value & 0x3F) << ($remaining * 6);
  641. }
  642. // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
  643. else
  644. {
  645. $valid = false;
  646. $remaining = 0;
  647. $i--;
  648. }
  649. }
  650. // If we've reached the end of the current byte sequence, append it to Unicode::$data
  651. if (!$remaining)
  652. {
  653. // Percent encode anything invalid or not in iunreserved
  654. if (
  655. // Invalid sequences
  656. !$valid
  657. // Non-shortest form sequences are invalid
  658. || $length > 1 && $character <= 0x7F
  659. || $length > 2 && $character <= 0x7FF
  660. || $length > 3 && $character <= 0xFFFF
  661. // Outside of range of iunreserved codepoints
  662. || $character < 0x2D
  663. || $character > 0xEFFFD
  664. // Noncharacters
  665. || ($character & 0xFFFE) === 0xFFFE
  666. || $character >= 0xFDD0 && $character <= 0xFDEF
  667. // Everything else not in iunreserved (this is all BMP)
  668. || $character === 0x2F
  669. || $character > 0x39 && $character < 0x41
  670. || $character > 0x5A && $character < 0x61
  671. || $character > 0x7A && $character < 0x7E
  672. || $character > 0x7E && $character < 0xA0
  673. || $character > 0xD7FF && $character < 0xF900
  674. )
  675. {
  676. for ($j = $start; $j <= $i; $j++)
  677. {
  678. $string .= '%' . strtoupper($bytes[$j]);
  679. }
  680. }
  681. else
  682. {
  683. for ($j = $start; $j <= $i; $j++)
  684. {
  685. $string .= chr(hexdec($bytes[$j]));
  686. }
  687. }
  688. }
  689. }
  690. // If we have any bytes left over they are invalid (i.e., we are
  691. // mid-way through a multi-byte sequence)
  692. if ($remaining)
  693. {
  694. for ($j = $start; $j < $len; $j++)
  695. {
  696. $string .= '%' . strtoupper($bytes[$j]);
  697. }
  698. }
  699. return $string;
  700. }
  701. protected function scheme_normalization()
  702. {
  703. if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo'])
  704. {
  705. $this->iuserinfo = null;
  706. }
  707. if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost'])
  708. {
  709. $this->ihost = null;
  710. }
  711. if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port'])
  712. {
  713. $this->port = null;
  714. }
  715. if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath'])
  716. {
  717. $this->ipath = '';
  718. }
  719. if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery'])
  720. {
  721. $this->iquery = null;
  722. }
  723. if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment'])
  724. {
  725. $this->ifragment = null;
  726. }
  727. }
  728. /**
  729. * Check if the object represents a valid IRI. This needs to be done on each
  730. * call as some things change depending on another part of the IRI.
  731. *
  732. * @return bool
  733. */
  734. public function is_valid()
  735. {
  736. if ($this->ipath === '') return true;
  737. $isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
  738. $this->port !== null;
  739. if ($isauthority && $this->ipath[0] === '/') return true;
  740. if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false;
  741. // Relative urls cannot have a colon in the first path segment (and the
  742. // slashes themselves are not included so skip the first character).
  743. if (!$this->scheme && !$isauthority &&
  744. strpos($this->ipath, ':') !== false &&
  745. strpos($this->ipath, '/', 1) !== false &&
  746. strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false;
  747. return true;
  748. }
  749. /**
  750. * Set the entire IRI. Returns true on success, false on failure (if there
  751. * are any invalid characters).
  752. *
  753. * @param string $iri
  754. * @return bool
  755. */
  756. public function set_iri($iri, $clear_cache = false)
  757. {
  758. static $cache;
  759. if ($clear_cache)
  760. {
  761. $cache = null;
  762. return;
  763. }
  764. if (!$cache)
  765. {
  766. $cache = array();
  767. }
  768. if ($iri === null)
  769. {
  770. return true;
  771. }
  772. elseif (isset($cache[$iri]))
  773. {
  774. list($this->scheme,
  775. $this->iuserinfo,
  776. $this->ihost,
  777. $this->port,
  778. $this->ipath,
  779. $this->iquery,
  780. $this->ifragment,
  781. $return) = $cache[$iri];
  782. return $return;
  783. }
  784. else
  785. {
  786. $parsed = $this->parse_iri((string) $iri);
  787. if (!$parsed)
  788. {
  789. return false;
  790. }
  791. $return = $this->set_scheme($parsed['scheme'])
  792. && $this->set_authority($parsed['authority'])
  793. && $this->set_path($parsed['path'])
  794. && $this->set_query($parsed['query'])
  795. && $this->set_fragment($parsed['fragment']);
  796. $cache[$iri] = array($this->scheme,
  797. $this->iuserinfo,
  798. $this->ihost,
  799. $this->port,
  800. $this->ipath,
  801. $this->iquery,
  802. $this->ifragment,
  803. $return);
  804. return $return;
  805. }
  806. }
  807. /**
  808. * Set the scheme. Returns true on success, false on failure (if there are
  809. * any invalid characters).
  810. *
  811. * @param string $scheme
  812. * @return bool
  813. */
  814. public function set_scheme($scheme)
  815. {
  816. if ($scheme === null)
  817. {
  818. $this->scheme = null;
  819. }
  820. elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme))
  821. {
  822. $this->scheme = null;
  823. return false;
  824. }
  825. else
  826. {
  827. $this->scheme = strtolower($scheme);
  828. }
  829. return true;
  830. }
  831. /**
  832. * Set the authority. Returns true on success, false on failure (if there are
  833. * any invalid characters).
  834. *
  835. * @param string $authority
  836. * @return bool
  837. */
  838. public function set_authority($authority, $clear_cache = false)
  839. {
  840. static $cache;
  841. if ($clear_cache)
  842. {
  843. $cache = null;
  844. return;
  845. }
  846. if (!$cache)
  847. $cache = array();
  848. if ($authority === null)
  849. {
  850. $this->iuserinfo = null;
  851. $this->ihost = null;
  852. $this->port = null;
  853. return true;
  854. }
  855. elseif (isset($cache[$authority]))
  856. {
  857. list($this->iuserinfo,
  858. $this->ihost,
  859. $this->port,
  860. $return) = $cache[$authority];
  861. return $return;
  862. }
  863. else
  864. {
  865. $remaining = $authority;
  866. if (($iuserinfo_end = strrpos($remaining, '@')) !== false)
  867. {
  868. $iuserinfo = substr($remaining, 0, $iuserinfo_end);
  869. $remaining = substr($remaining, $iuserinfo_end + 1);
  870. }
  871. else
  872. {
  873. $iuserinfo = null;
  874. }
  875. if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false)
  876. {
  877. if (($port = substr($remaining, $port_start + 1)) === false)
  878. {
  879. $port = null;
  880. }
  881. $remaining = substr($remaining, 0, $port_start);
  882. }
  883. else
  884. {
  885. $port = null;
  886. }
  887. $return = $this->set_userinfo($iuserinfo) &&
  888. $this->set_host($remaining) &&
  889. $this->set_port($port);
  890. $cache[$authority] = array($this->iuserinfo,
  891. $this->ihost,
  892. $this->port,
  893. $return);
  894. return $return;
  895. }
  896. }
  897. /**
  898. * Set the iuserinfo.
  899. *
  900. * @param string $iuserinfo
  901. * @return bool
  902. */
  903. public function set_userinfo($iuserinfo)
  904. {
  905. if ($iuserinfo === null)
  906. {
  907. $this->iuserinfo = null;
  908. }
  909. else
  910. {
  911. $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
  912. $this->scheme_normalization();
  913. }
  914. return true;
  915. }
  916. /**
  917. * Set the ihost. Returns true on success, false on failure (if there are
  918. * any invalid characters).
  919. *
  920. * @param string $ihost
  921. * @return bool
  922. */
  923. public function set_host($ihost)
  924. {
  925. if ($ihost === null)
  926. {
  927. $this->ihost = null;
  928. return true;
  929. }
  930. elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']')
  931. {
  932. if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1)))
  933. {
  934. $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']';
  935. }
  936. else
  937. {
  938. $this->ihost = null;
  939. return false;
  940. }
  941. }
  942. else
  943. {
  944. $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
  945. // Lowercase, but ignore pct-encoded sections (as they should
  946. // remain uppercase). This must be done after the previous step
  947. // as that can add unescaped characters.
  948. $position = 0;
  949. $strlen = strlen($ihost);
  950. while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen)
  951. {
  952. if ($ihost[$position] === '%')
  953. {
  954. $position += 3;
  955. }
  956. else
  957. {
  958. $ihost[$position] = strtolower($ihost[$position]);
  959. $position++;
  960. }
  961. }
  962. $this->ihost = $ihost;
  963. }
  964. $this->scheme_normalization();
  965. return true;
  966. }
  967. /**
  968. * Set the port. Returns true on success, false on failure (if there are
  969. * any invalid characters).
  970. *
  971. * @param string $port
  972. * @return bool
  973. */
  974. public function set_port($port)
  975. {
  976. if ($port === null)
  977. {
  978. $this->port = null;
  979. return true;
  980. }
  981. elseif (strspn($port, '0123456789') === strlen($port))
  982. {
  983. $this->port = (int) $port;
  984. $this->scheme_normalization();
  985. return true;
  986. }
  987. else
  988. {
  989. $this->port = null;
  990. return false;
  991. }
  992. }
  993. /**
  994. * Set the ipath.
  995. *
  996. * @param string $ipath
  997. * @return bool
  998. */
  999. public function set_path($ipath, $clear_cache = false)
  1000. {
  1001. static $cache;
  1002. if ($clear_cache)
  1003. {
  1004. $cache = null;
  1005. return;
  1006. }
  1007. if (!$cache)
  1008. {
  1009. $cache = array();
  1010. }
  1011. $ipath = (string) $ipath;
  1012. if (isset($cache[$ipath]))
  1013. {
  1014. $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
  1015. }
  1016. else
  1017. {
  1018. $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
  1019. $removed = $this->remove_dot_segments($valid);
  1020. $cache[$ipath] = array($valid, $removed);
  1021. $this->ipath = ($this->scheme !== null) ? $removed : $valid;
  1022. }
  1023. $this->scheme_normalization();
  1024. return true;
  1025. }
  1026. /**
  1027. * Set the iquery.
  1028. *
  1029. * @param string $iquery
  1030. * @return bool
  1031. */
  1032. public function set_query($iquery)
  1033. {
  1034. if ($iquery === null)
  1035. {
  1036. $this->iquery = null;
  1037. }
  1038. else
  1039. {
  1040. $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
  1041. $this->scheme_normalization();
  1042. }
  1043. return true;
  1044. }
  1045. /**
  1046. * Set the ifragment.
  1047. *
  1048. * @param string $ifragment
  1049. * @return bool
  1050. */
  1051. public function set_fragment($ifragment)
  1052. {
  1053. if ($ifragment === null)
  1054. {
  1055. $this->ifragment = null;
  1056. }
  1057. else
  1058. {
  1059. $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
  1060. $this->scheme_normalization();
  1061. }
  1062. return true;
  1063. }
  1064. /**
  1065. * Convert an IRI to a URI (or parts thereof)
  1066. *
  1067. * @return string
  1068. */
  1069. public function to_uri($string)
  1070. {
  1071. static $non_ascii;
  1072. if (!$non_ascii)
  1073. {
  1074. $non_ascii = implode('', range("\x80", "\xFF"));
  1075. }
  1076. $position = 0;
  1077. $strlen = strlen($string);
  1078. while (($position += strcspn($string, $non_ascii, $position)) < $strlen)
  1079. {
  1080. $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
  1081. $position += 3;
  1082. $strlen += 2;
  1083. }
  1084. return $string;
  1085. }
  1086. /**
  1087. * Get the complete IRI
  1088. *
  1089. * @return string
  1090. */
  1091. public function get_iri()
  1092. {
  1093. if (!$this->is_valid())
  1094. {
  1095. return false;
  1096. }
  1097. $iri = '';
  1098. if ($this->scheme !== null)
  1099. {
  1100. $iri .= $this->scheme . ':';
  1101. }
  1102. if (($iauthority = $this->get_iauthority()) !== null)
  1103. {
  1104. $iri .= '//' . $iauthority;
  1105. }
  1106. if ($this->ipath !== '')
  1107. {
  1108. $iri .= $this->ipath;
  1109. }
  1110. elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '')
  1111. {
  1112. $iri .= $this->normalization[$this->scheme]['ipath'];
  1113. }
  1114. if ($this->iquery !== null)
  1115. {
  1116. $iri .= '?' . $this->iquery;
  1117. }
  1118. if ($this->ifragment !== null)
  1119. {
  1120. $iri .= '#' . $this->ifragment;
  1121. }
  1122. return $iri;
  1123. }
  1124. /**
  1125. * Get the complete URI
  1126. *
  1127. * @return string
  1128. */
  1129. public function get_uri()
  1130. {
  1131. return $this->to_uri($this->get_iri());
  1132. }
  1133. /**
  1134. * Get the complete iauthority
  1135. *
  1136. * @return string
  1137. */
  1138. protected function get_iauthority()
  1139. {
  1140. if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null)
  1141. {
  1142. $iauthority = '';
  1143. if ($this->iuserinfo !== null)
  1144. {
  1145. $iauthority .= $this->iuserinfo . '@';
  1146. }
  1147. if ($this->ihost !== null)
  1148. {
  1149. $iauthority .= $this->ihost;
  1150. }
  1151. if ($this->port !== null)
  1152. {
  1153. $iauthority .= ':' . $this->port;
  1154. }
  1155. return $iauthority;
  1156. }
  1157. else
  1158. {
  1159. return null;
  1160. }
  1161. }
  1162. /**
  1163. * Get the complete authority
  1164. *
  1165. * @return string
  1166. */
  1167. protected function get_authority()
  1168. {
  1169. $iauthority = $this->get_iauthority();
  1170. if (is_string($iauthority))
  1171. return $this->to_uri($iauthority);
  1172. else
  1173. return $iauthority;
  1174. }
  1175. }