LibOpml.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. <?php
  2. namespace marienfressinaud\LibOpml;
  3. /**
  4. * The LibOpml class provides the methods to read and write OPML files and
  5. * strings. It transforms OPML files or strings to PHP arrays (or the reverse).
  6. *
  7. * How to read this file?
  8. *
  9. * The first methods are dedicated to the parsing, and the next ones to the
  10. * reading. The three last methods are helpful methods, but you don't have to
  11. * worry too much about them.
  12. *
  13. * The main methods are the public ones: parseFile, parseString and render.
  14. * They call the other parse* and render* methods internally.
  15. *
  16. * These three main methods are available as functions (see the src/functions.php
  17. * file).
  18. *
  19. * What's the array format?
  20. *
  21. * As said before, LibOpml transforms OPML to PHP arrays, or the reverse. The
  22. * format is pretty simple. It contains four keys:
  23. *
  24. * - version: the version of the OPML;
  25. * - namespaces: an array of namespaces used in the OPML, if any;
  26. * - head: an array of OPML head elements, where keys are the names of the
  27. * elements;
  28. * - body: an array of arrays representing OPML outlines, where keys are the
  29. * name of the attributes (the special @outlines key contains the sub-outlines).
  30. *
  31. * When rendering, only the body key is required (version will default to 2.0).
  32. *
  33. * Example:
  34. *
  35. * [
  36. * version => '2.0',
  37. * namespaces => [],
  38. * head => [
  39. * title => 'An OPML file'
  40. * ],
  41. * body => [
  42. * [
  43. * text => 'Newspapers',
  44. * @outlines => [
  45. * [text => 'El País'],
  46. * [text => 'Le Monde'],
  47. * [text => 'The Guardian'],
  48. * [text => 'The New York Times'],
  49. * ]
  50. * ]
  51. * ]
  52. * ]
  53. *
  54. * @see http://opml.org/spec2.opml
  55. *
  56. * @author Marien Fressinaud <dev@marienfressinaud.fr>
  57. * @link https://framagit.org/marienfressinaud/lib_opml
  58. * @license MIT
  59. */
  60. class LibOpml
  61. {
  62. /**
  63. * The list of valid head elements.
  64. */
  65. public const HEAD_ELEMENTS = [
  66. 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
  67. 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
  68. 'windowLeft', 'windowBottom', 'windowRight'
  69. ];
  70. /**
  71. * The list of numeric head elements.
  72. */
  73. public const NUMERIC_HEAD_ELEMENTS = [
  74. 'vertScrollState',
  75. 'windowTop',
  76. 'windowLeft',
  77. 'windowBottom',
  78. 'windowRight',
  79. ];
  80. /** @var boolean */
  81. private $strict = true;
  82. /** @var string */
  83. private $version = '2.0';
  84. /** @var string[] */
  85. private $namespaces = [];
  86. /**
  87. * @param bool $strict
  88. * Set to true (default) to check for violations of the specification,
  89. * false otherwise.
  90. */
  91. public function __construct($strict = true)
  92. {
  93. $this->strict = $strict;
  94. }
  95. /**
  96. * Parse a XML file and return the corresponding array.
  97. *
  98. * @param string $filename
  99. * The XML file to parse.
  100. *
  101. * @throws \marienfressinaud\LibOpml\Exception
  102. * Raised if the file cannot be read. See also exceptions raised by the
  103. * parseString method.
  104. *
  105. * @return array
  106. * An array reflecting the OPML (the structure is described above).
  107. */
  108. public function parseFile($filename)
  109. {
  110. $file_content = @file_get_contents($filename);
  111. if ($file_content === false) {
  112. throw new Exception("OPML file {$filename} cannot be found or read");
  113. }
  114. return $this->parseString($file_content);
  115. }
  116. /**
  117. * Parse a XML string and return the corresponding array.
  118. *
  119. * @param string $xml
  120. * The XML string to parse.
  121. *
  122. * @throws \marienfressinaud\LibOpml\Exception
  123. * Raised if the XML cannot be parsed, if version is missing or
  124. * invalid, if head is missing or contains invalid (or not parsable)
  125. * elements, or if body is missing, empty or contain non outline
  126. * elements. The exceptions (except XML parsing errors) are not raised
  127. * if strict is false. See also exceptions raised by the parseOutline
  128. * method.
  129. *
  130. * @return array
  131. * An array reflecting the OPML (the structure is described above).
  132. */
  133. public function parseString($xml)
  134. {
  135. $xml = trim($xml);
  136. if (!$xml) {
  137. throw new Exception('OPML string cannot be empty');
  138. }
  139. $dom = new \DOMDocument();
  140. $dom->recover = true;
  141. $dom->encoding = 'UTF-8';
  142. libxml_use_internal_errors(true);
  143. try {
  144. $result = $dom->loadXML($xml, LIBXML_NONET | LIBXML_NOWARNING);
  145. $error = $this->getLibxmlError();
  146. } catch (\Exception | \Error $e) {
  147. $result = false;
  148. $error = $e->getMessage();
  149. }
  150. libxml_use_internal_errors(false);
  151. if ($error) {
  152. throw new Exception($error);
  153. }
  154. $opml_element = $dom->documentElement;
  155. // Load the custom namespaces of the document
  156. $xpath = new \DOMXPath($dom);
  157. $this->namespaces = [];
  158. foreach ($xpath->query('//namespace::*') as $node) {
  159. if ($node->prefix === 'xml') {
  160. // This is the base namespace, we don't need to store it
  161. continue;
  162. }
  163. $this->namespaces[$node->prefix] = $node->namespaceURI;
  164. }
  165. // Get the version of the document
  166. $version = $opml_element->getAttribute('version');
  167. if (!$version) {
  168. $this->throwExceptionIfStrict('OPML version attribute is required');
  169. }
  170. $version = trim($version);
  171. if ($version === '1.1') {
  172. $version = '1.0';
  173. }
  174. if ($version !== '1.0' && $version !== '2.0') {
  175. $this->throwExceptionIfStrict('OPML supported versions are 1.0 and 2.0');
  176. }
  177. $this->version = $version;
  178. // Get head and body child elements
  179. $head_elements = $opml_element->getElementsByTagName('head');
  180. $child_head_elements = [];
  181. if (count($head_elements) === 1) {
  182. $child_head_elements = $head_elements[0]->childNodes;
  183. } else {
  184. $this->throwExceptionIfStrict('OPML must contain one and only one head element');
  185. }
  186. $body_elements = $opml_element->getElementsByTagName('body');
  187. $child_body_elements = [];
  188. if (count($body_elements) === 1) {
  189. $child_body_elements = $body_elements[0]->childNodes;
  190. } else {
  191. $this->throwExceptionIfStrict('OPML must contain one and only one body element');
  192. }
  193. $array = [
  194. 'version' => $this->version,
  195. 'namespaces' => $this->namespaces,
  196. 'head' => [],
  197. 'body' => [],
  198. ];
  199. // Load the child head elements in the head array
  200. foreach ($child_head_elements as $child_head_element) {
  201. if ($child_head_element->nodeType !== XML_ELEMENT_NODE) {
  202. continue;
  203. }
  204. $name = $child_head_element->nodeName;
  205. $value = $child_head_element->nodeValue;
  206. $namespaced = $child_head_element->namespaceURI !== null;
  207. if (!in_array($name, self::HEAD_ELEMENTS) && !$namespaced) {
  208. $this->throwExceptionIfStrict(
  209. "OPML head {$name} element is not part of the specification"
  210. );
  211. }
  212. if ($name === 'dateCreated' || $name === 'dateModified') {
  213. try {
  214. $value = $this->parseDate($value);
  215. } catch (\DomainException $e) {
  216. $this->throwExceptionIfStrict(
  217. "OPML head {$name} element must be a valid RFC822 or RFC1123 date"
  218. );
  219. }
  220. } elseif ($name === 'ownerEmail') {
  221. // Testing email validity is hard. PHP filter_var() function is
  222. // too strict compared to the RFC 822, so we can't use it.
  223. if (strpos($value, '@') === false) {
  224. $this->throwExceptionIfStrict(
  225. 'OPML head ownerEmail element must be an email address'
  226. );
  227. }
  228. } elseif ($name === 'ownerId' || $name === 'docs') {
  229. if (!$this->checkHttpAddress($value)) {
  230. $this->throwExceptionIfStrict(
  231. "OPML head {$name} element must be a HTTP address"
  232. );
  233. }
  234. } elseif ($name === 'expansionState') {
  235. $numbers = explode(',', $value);
  236. $value = array_map(function ($str_number) {
  237. if (is_numeric($str_number)) {
  238. return intval($str_number);
  239. } else {
  240. $this->throwExceptionIfStrict(
  241. 'OPML head expansionState element must be a list of numbers'
  242. );
  243. return $str_number;
  244. }
  245. }, $numbers);
  246. } elseif (in_array($name, self::NUMERIC_HEAD_ELEMENTS)) {
  247. if (is_numeric($value)) {
  248. $value = intval($value);
  249. } else {
  250. $this->throwExceptionIfStrict("OPML head {$name} element must be a number");
  251. }
  252. }
  253. $array['head'][$name] = $value;
  254. }
  255. // Load the child body elements in the body array
  256. foreach ($child_body_elements as $child_body_element) {
  257. if ($child_body_element->nodeType !== XML_ELEMENT_NODE) {
  258. continue;
  259. }
  260. if ($child_body_element->nodeName === 'outline') {
  261. $array['body'][] = $this->parseOutline($child_body_element);
  262. } else {
  263. $this->throwExceptionIfStrict(
  264. 'OPML body element can only contain outline elements'
  265. );
  266. }
  267. }
  268. if (empty($array['body'])) {
  269. $this->throwExceptionIfStrict(
  270. 'OPML body element must contain at least one outline element'
  271. );
  272. }
  273. return $array;
  274. }
  275. /**
  276. * Parse a XML element as an outline element and return the corresponding array.
  277. *
  278. * @param \DOMElement $outline_element
  279. * The element to parse.
  280. *
  281. * @throws \marienfressinaud\LibOpml\Exception
  282. * Raised if the outline contains non-outline elements, if it doesn't
  283. * contain a text attribute (or if empty), if a special attribute is
  284. * not parsable, or if type attribute requirements are not met. The
  285. * exceptions are not raised if strict is false. The exception about
  286. * missing text attribute is not raised if version is 1.0.
  287. *
  288. * @return array
  289. * An array reflecting the OPML outline (the structure is described above).
  290. */
  291. private function parseOutline($outline_element)
  292. {
  293. $outline = [];
  294. // Load the element attributes in the outline array
  295. foreach ($outline_element->attributes as $outline_attribute) {
  296. $name = $outline_attribute->nodeName;
  297. $value = $outline_attribute->nodeValue;
  298. if ($name === 'created') {
  299. try {
  300. $value = $this->parseDate($value);
  301. } catch (\DomainException $e) {
  302. $this->throwExceptionIfStrict(
  303. 'OPML outline created attribute must be a valid RFC822 or RFC1123 date'
  304. );
  305. }
  306. } elseif ($name === 'category') {
  307. $categories = explode(',', $value);
  308. $categories = array_map(function ($category) {
  309. return trim($category);
  310. }, $categories);
  311. $value = $categories;
  312. } elseif ($name === 'isComment' || $name === 'isBreakpoint') {
  313. if ($value === 'true' || $value === 'false') {
  314. $value = $value === 'true';
  315. } else {
  316. $this->throwExceptionIfStrict(
  317. "OPML outline {$name} attribute must be a boolean (true or false)"
  318. );
  319. }
  320. } elseif ($name === 'type') {
  321. // type attribute is case-insensitive
  322. $value = strtolower($value);
  323. }
  324. $outline[$name] = $value;
  325. }
  326. if (empty($outline['text']) && $this->version !== '1.0') {
  327. $this->throwExceptionIfStrict(
  328. 'OPML outline text attribute is required'
  329. );
  330. }
  331. // Perform additional check based on the type of the outline
  332. $type = $outline['type'] ?? '';
  333. if ($type === 'rss') {
  334. if (empty($outline['xmlUrl'])) {
  335. $this->throwExceptionIfStrict(
  336. 'OPML outline xmlUrl attribute is required when type is "rss"'
  337. );
  338. } elseif (!$this->checkHttpAddress($outline['xmlUrl'])) {
  339. $this->throwExceptionIfStrict(
  340. 'OPML outline xmlUrl attribute must be a HTTP address when type is "rss"'
  341. );
  342. }
  343. } elseif ($type === 'link' || $type === 'include') {
  344. if (empty($outline['url'])) {
  345. $this->throwExceptionIfStrict(
  346. "OPML outline url attribute is required when type is \"{$type}\""
  347. );
  348. } elseif (!$this->checkHttpAddress($outline['url'])) {
  349. $this->throwExceptionIfStrict(
  350. "OPML outline url attribute must be a HTTP address when type is \"{$type}\""
  351. );
  352. }
  353. }
  354. // Load the sub-outlines in a @outlines array
  355. foreach ($outline_element->childNodes as $child_outline_element) {
  356. if ($child_outline_element->nodeType !== XML_ELEMENT_NODE) {
  357. continue;
  358. }
  359. if ($child_outline_element->nodeName === 'outline') {
  360. $outline['@outlines'][] = $this->parseOutline($child_outline_element);
  361. } else {
  362. $this->throwExceptionIfStrict(
  363. 'OPML body element can only contain outline elements'
  364. );
  365. }
  366. }
  367. return $outline;
  368. }
  369. /**
  370. * Parse a value as a date.
  371. *
  372. * @param string $value
  373. *
  374. * @throws \DomainException
  375. * Raised if the value cannot be parsed.
  376. *
  377. * @return \DateTime
  378. */
  379. private function parseDate($value)
  380. {
  381. $formats = [
  382. \DateTimeInterface::RFC822,
  383. \DateTimeInterface::RFC1123,
  384. ];
  385. foreach ($formats as $format) {
  386. $date = date_create_from_format($format, $value);
  387. if ($date !== false) {
  388. return $date;
  389. }
  390. }
  391. throw new \DomainException('The argument cannot be parsed as a date');
  392. }
  393. /**
  394. * Render an OPML array as a string or a \DOMDocument.
  395. *
  396. * @param array $array
  397. * The array to render, it must follow the structure defined above.
  398. * @param bool $as_dom_document
  399. * Set to false (default) to return the array as a string, true to
  400. * return as a \DOMDocument.
  401. *
  402. * @throws \marienfressinaud\LibOpml\Exception
  403. * Raised if the `head` array contains unknown or invalid elements
  404. * (i.e. not of correct type), or if the `body` array is missing or
  405. * empty. The exceptions are not raised if strict is false. See also
  406. * exceptions raised by the renderOutline method.
  407. *
  408. * @return string|\DOMDocument
  409. * The XML string or DOM document corresponding to the given array.
  410. */
  411. public function render($array, $as_dom_document = false)
  412. {
  413. $dom = new \DOMDocument('1.0', 'UTF-8');
  414. $opml_element = new \DOMElement('opml');
  415. $dom->appendChild($opml_element);
  416. // Set the version attribute of the OPML document
  417. $version = $array['version'] ?? '2.0';
  418. if ($version === '1.1') {
  419. $version = '1.0';
  420. }
  421. if ($version !== '1.0' && $version !== '2.0') {
  422. $this->throwExceptionIfStrict('OPML supported versions are 1.0 and 2.0');
  423. }
  424. $this->version = $version;
  425. $opml_element->setAttribute('version', $this->version);
  426. // Declare the namespace on the opml element
  427. $this->namespaces = $array['namespaces'] ?? [];
  428. foreach ($this->namespaces as $prefix => $namespace) {
  429. $opml_element->setAttributeNS(
  430. 'http://www.w3.org/2000/xmlns/',
  431. "xmlns:{$prefix}",
  432. $namespace
  433. );
  434. }
  435. // Add the head element to the OPML document. $array['head'] is
  436. // optional but head tag will always exist in the final XML.
  437. $head_element = new \DOMElement('head');
  438. $opml_element->appendChild($head_element);
  439. if (isset($array['head'])) {
  440. foreach ($array['head'] as $name => $value) {
  441. $namespace = $this->getNamespace($name);
  442. if (!in_array($name, self::HEAD_ELEMENTS, true) && !$namespace) {
  443. $this->throwExceptionIfStrict(
  444. "OPML head {$name} element is not part of the specification"
  445. );
  446. }
  447. if ($name === 'dateCreated' || $name === 'dateModified') {
  448. if ($value instanceof \DateTimeInterface) {
  449. $value = $value->format(\DateTimeInterface::RFC1123);
  450. } else {
  451. $this->throwExceptionIfStrict(
  452. "OPML head {$name} element must be a DateTime"
  453. );
  454. }
  455. } elseif ($name === 'ownerEmail') {
  456. // Testing email validity is hard. PHP filter_var() function is
  457. // too strict compared to the RFC 822, so we can't use it.
  458. if (strpos($value, '@') === false) {
  459. $this->throwExceptionIfStrict(
  460. 'OPML head ownerEmail element must be an email address'
  461. );
  462. }
  463. } elseif ($name === 'ownerId' || $name === 'docs') {
  464. if (!$this->checkHttpAddress($value)) {
  465. $this->throwExceptionIfStrict(
  466. "OPML head {$name} element must be a HTTP address"
  467. );
  468. }
  469. } elseif ($name === 'expansionState') {
  470. if (is_array($value)) {
  471. foreach ($value as $number) {
  472. if (!is_int($number)) {
  473. $this->throwExceptionIfStrict(
  474. 'OPML head expansionState element must be an array of integers'
  475. );
  476. }
  477. }
  478. $value = implode(', ', $value);
  479. } else {
  480. $this->throwExceptionIfStrict(
  481. 'OPML head expansionState element must be an array of integers'
  482. );
  483. }
  484. } elseif (in_array($name, self::NUMERIC_HEAD_ELEMENTS)) {
  485. if (!is_int($value)) {
  486. $this->throwExceptionIfStrict(
  487. "OPML head {$name} element must be an integer"
  488. );
  489. }
  490. }
  491. $child_head_element = new \DOMElement($name, $value, $namespace);
  492. $head_element->appendChild($child_head_element);
  493. }
  494. }
  495. // Check body is set and contains at least one element
  496. if (!isset($array['body'])) {
  497. $this->throwExceptionIfStrict('OPML array must contain a body key');
  498. }
  499. $array_body = $array['body'] ?? [];
  500. if (count($array_body) <= 0) {
  501. $this->throwExceptionIfStrict(
  502. 'OPML body element must contain at least one outline array'
  503. );
  504. }
  505. // Create outline elements in the body element
  506. $body_element = new \DOMElement('body');
  507. $opml_element->appendChild($body_element);
  508. foreach ($array_body as $outline) {
  509. $this->renderOutline($body_element, $outline);
  510. }
  511. // And return the final result
  512. if ($as_dom_document) {
  513. return $dom;
  514. } else {
  515. $dom->formatOutput = true;
  516. return $dom->saveXML();
  517. }
  518. }
  519. /**
  520. * Transform an outline array to a \DOMElement and add it to a parent element.
  521. *
  522. * @param \DOMElement $parent_element
  523. * The DOM parent element of the current outline.
  524. * @param array $outline
  525. * The outline array to transform in a \DOMElement, it must follow the
  526. * structure defined above.
  527. *
  528. * @throws \marienfressinaud\LibOpml\Exception
  529. * Raised if the outline is not an array, if it doesn't contain a text
  530. * attribute (or if empty), if the `@outlines` key is not an array, if
  531. * a special attribute does not match its corresponding type, or if
  532. * `type` key requirements are not met. The exceptions (except errors
  533. * about outline or suboutlines not being arrays) are not raised if
  534. * strict is false. The exception about missing text attribute is not
  535. * raised if version is 1.0.
  536. */
  537. private function renderOutline($parent_element, $outline)
  538. {
  539. // Perform initial checks to verify the outline is correctly declared
  540. if (!is_array($outline)) {
  541. throw new Exception(
  542. 'OPML outline element must be defined as an array'
  543. );
  544. }
  545. if (empty($outline['text']) && $this->version !== '1.0') {
  546. $this->throwExceptionIfStrict(
  547. 'OPML outline text attribute is required'
  548. );
  549. }
  550. if (isset($outline['type'])) {
  551. $type = strtolower($outline['type']);
  552. if ($type === 'rss') {
  553. if (empty($outline['xmlUrl'])) {
  554. $this->throwExceptionIfStrict(
  555. 'OPML outline xmlUrl attribute is required when type is "rss"'
  556. );
  557. } elseif (!$this->checkHttpAddress($outline['xmlUrl'])) {
  558. $this->throwExceptionIfStrict(
  559. 'OPML outline xmlUrl attribute must be a HTTP address when type is "rss"'
  560. );
  561. }
  562. } elseif ($type === 'link' || $type === 'include') {
  563. if (empty($outline['url'])) {
  564. $this->throwExceptionIfStrict(
  565. "OPML outline url attribute is required when type is \"{$type}\""
  566. );
  567. } elseif (!$this->checkHttpAddress($outline['url'])) {
  568. $this->throwExceptionIfStrict(
  569. "OPML outline url attribute must be a HTTP address when type is \"{$type}\""
  570. );
  571. }
  572. }
  573. }
  574. // Create the outline element and add it to the parent
  575. $outline_element = new \DOMElement('outline');
  576. $parent_element->appendChild($outline_element);
  577. // Load the sub-outlines as child elements
  578. if (isset($outline['@outlines'])) {
  579. $outline_children = $outline['@outlines'];
  580. if (!is_array($outline_children)) {
  581. throw new Exception(
  582. 'OPML outline element must be defined as an array'
  583. );
  584. }
  585. foreach ($outline_children as $outline_child) {
  586. $this->renderOutline($outline_element, $outline_child);
  587. }
  588. // We don't want the sub-outlines to be loaded as attributes, so we
  589. // remove the key from the array.
  590. unset($outline['@outlines']);
  591. }
  592. // Load the other elements of the array as attributes
  593. foreach ($outline as $name => $value) {
  594. $namespace = $this->getNamespace($name);
  595. if ($name === 'created') {
  596. if ($value instanceof \DateTimeInterface) {
  597. $value = $value->format(\DateTimeInterface::RFC1123);
  598. } else {
  599. $this->throwExceptionIfStrict(
  600. 'OPML outline created attribute must be a DateTime'
  601. );
  602. }
  603. } elseif ($name === 'isComment' || $name === 'isBreakpoint') {
  604. if (is_bool($value)) {
  605. $value = $value ? 'true' : 'false';
  606. } else {
  607. $this->throwExceptionIfStrict(
  608. "OPML outline {$name} attribute must be a boolean"
  609. );
  610. }
  611. } elseif (is_array($value)) {
  612. $value = implode(', ', $value);
  613. }
  614. $outline_element->setAttributeNS($namespace, $name, $value);
  615. }
  616. }
  617. /**
  618. * Return wether a value is a valid HTTP address or not.
  619. *
  620. * HTTP address is not strictly defined by the OPML spec, so it is assumed:
  621. *
  622. * - it can be parsed by parse_url
  623. * - it has a host part
  624. * - scheme is http or https
  625. *
  626. * filter_var is not used because it would reject internationalized URLs
  627. * (i.e. with non ASCII chars). An alternative would be to punycode such
  628. * URLs, but it's more work to do it properly, and lib_opml needs to stay
  629. * simple.
  630. *
  631. * @param string $value
  632. *
  633. * @return boolean
  634. * Return true if the value is a valid HTTP address, false otherwise.
  635. */
  636. public function checkHttpAddress($value)
  637. {
  638. $value = trim($value);
  639. $parsed_url = parse_url($value);
  640. if (!$parsed_url) {
  641. return false;
  642. }
  643. if (
  644. !isset($parsed_url['scheme']) ||
  645. !isset($parsed_url['host'])
  646. ) {
  647. return false;
  648. }
  649. if (
  650. $parsed_url['scheme'] !== 'http' &&
  651. $parsed_url['scheme'] !== 'https'
  652. ) {
  653. return false;
  654. }
  655. return true;
  656. }
  657. /**
  658. * Return the namespace of a qualified name. An empty string is returned if
  659. * the name is not namespaced.
  660. *
  661. * @param string $qualified_name
  662. *
  663. * @throws \marienfressinaud\LibOpml\Exception
  664. * Raised if the namespace prefix isn't declared.
  665. *
  666. * @return string
  667. */
  668. private function getNamespace($qualified_name)
  669. {
  670. $split_name = explode(':', $qualified_name, 2);
  671. // count will always be 1 or 2.
  672. if (count($split_name) === 1) {
  673. // If 1, there's no prefix, thus no namespace
  674. return '';
  675. } else {
  676. // If 2, it means it has a namespace prefix, so we get the
  677. // namespace from the declared ones.
  678. $namespace_prefix = $split_name[0];
  679. if (!isset($this->namespaces[$namespace_prefix])) {
  680. throw new Exception(
  681. "OPML namespace {$namespace_prefix} is not declared"
  682. );
  683. }
  684. return $this->namespaces[$namespace_prefix];
  685. }
  686. }
  687. /**
  688. * Raise an exception only if strict is true.
  689. *
  690. * @param string $message
  691. *
  692. * @throws \marienfressinaud\LibOpml\Exception
  693. */
  694. private function throwExceptionIfStrict($message)
  695. {
  696. if ($this->strict) {
  697. throw new Exception($message);
  698. }
  699. }
  700. /**
  701. * Return a formatted error if any libxml error is returned by
  702. * libxml_get_errors(). In non-strict mode, only fatal errors are reported.
  703. */
  704. private function getLibxmlError(): string
  705. {
  706. $libxml_error = '';
  707. $errors = libxml_get_errors();
  708. foreach ($errors as $error) {
  709. if (!$this->strict && $error->level < LIBXML_ERR_FATAL) {
  710. continue;
  711. }
  712. $message = trim($error->message);
  713. $message .= " (line {$error->line}, column {$error->column}, code {$error->code})";
  714. $libxml_error .= $message . "\n";
  715. }
  716. return trim($libxml_error);
  717. }
  718. }