| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805 |
- <?php
- namespace marienfressinaud\LibOpml;
- /**
- * The LibOpml class provides the methods to read and write OPML files and
- * strings. It transforms OPML files or strings to PHP arrays (or the reverse).
- *
- * How to read this file?
- *
- * The first methods are dedicated to the parsing, and the next ones to the
- * reading. The three last methods are helpful methods, but you don't have to
- * worry too much about them.
- *
- * The main methods are the public ones: parseFile, parseString and render.
- * They call the other parse* and render* methods internally.
- *
- * These three main methods are available as functions (see the src/functions.php
- * file).
- *
- * What's the array format?
- *
- * As said before, LibOpml transforms OPML to PHP arrays, or the reverse. The
- * format is pretty simple. It contains four keys:
- *
- * - version: the version of the OPML;
- * - namespaces: an array of namespaces used in the OPML, if any;
- * - head: an array of OPML head elements, where keys are the names of the
- * elements;
- * - body: an array of arrays representing OPML outlines, where keys are the
- * name of the attributes (the special @outlines key contains the sub-outlines).
- *
- * When rendering, only the body key is required (version will default to 2.0).
- *
- * Example:
- *
- * [
- * version => '2.0',
- * namespaces => [],
- * head => [
- * title => 'An OPML file'
- * ],
- * body => [
- * [
- * text => 'Newspapers',
- * @outlines => [
- * [text => 'El País'],
- * [text => 'Le Monde'],
- * [text => 'The Guardian'],
- * [text => 'The New York Times'],
- * ]
- * ]
- * ]
- * ]
- *
- * @see http://opml.org/spec2.opml
- *
- * @author Marien Fressinaud <dev@marienfressinaud.fr>
- * @link https://framagit.org/marienfressinaud/lib_opml
- * @license MIT
- */
- class LibOpml
- {
- /**
- * The list of valid head elements.
- */
- public const HEAD_ELEMENTS = [
- 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
- 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
- 'windowLeft', 'windowBottom', 'windowRight'
- ];
- /**
- * The list of numeric head elements.
- */
- public const NUMERIC_HEAD_ELEMENTS = [
- 'vertScrollState',
- 'windowTop',
- 'windowLeft',
- 'windowBottom',
- 'windowRight',
- ];
- /** @var boolean */
- private $strict = true;
- /** @var string */
- private $version = '2.0';
- /** @var string[] */
- private $namespaces = [];
- /**
- * @param bool $strict
- * Set to true (default) to check for violations of the specification,
- * false otherwise.
- */
- public function __construct($strict = true)
- {
- $this->strict = $strict;
- }
- /**
- * Parse a XML file and return the corresponding array.
- *
- * @param string $filename
- * The XML file to parse.
- *
- * @throws \marienfressinaud\LibOpml\Exception
- * Raised if the file cannot be read. See also exceptions raised by the
- * parseString method.
- *
- * @return array
- * An array reflecting the OPML (the structure is described above).
- */
- public function parseFile($filename)
- {
- $file_content = @file_get_contents($filename);
- if ($file_content === false) {
- throw new Exception("OPML file {$filename} cannot be found or read");
- }
- return $this->parseString($file_content);
- }
- /**
- * Parse a XML string and return the corresponding array.
- *
- * @param string $xml
- * The XML string to parse.
- *
- * @throws \marienfressinaud\LibOpml\Exception
- * Raised if the XML cannot be parsed, if version is missing or
- * invalid, if head is missing or contains invalid (or not parsable)
- * elements, or if body is missing, empty or contain non outline
- * elements. The exceptions (except XML parsing errors) are not raised
- * if strict is false. See also exceptions raised by the parseOutline
- * method.
- *
- * @return array
- * An array reflecting the OPML (the structure is described above).
- */
- public function parseString($xml)
- {
- $xml = trim($xml);
- if (!$xml) {
- throw new Exception('OPML string cannot be empty');
- }
- $dom = new \DOMDocument();
- $dom->recover = true;
- $dom->encoding = 'UTF-8';
- libxml_use_internal_errors(true);
- try {
- $result = $dom->loadXML($xml, LIBXML_NONET | LIBXML_NOWARNING);
- $error = $this->getLibxmlError();
- } catch (\Exception | \Error $e) {
- $result = false;
- $error = $e->getMessage();
- }
- libxml_use_internal_errors(false);
- if ($error) {
- throw new Exception($error);
- }
- $opml_element = $dom->documentElement;
- // Load the custom namespaces of the document
- $xpath = new \DOMXPath($dom);
- $this->namespaces = [];
- foreach ($xpath->query('//namespace::*') as $node) {
- if ($node->prefix === 'xml') {
- // This is the base namespace, we don't need to store it
- continue;
- }
- $this->namespaces[$node->prefix] = $node->namespaceURI;
- }
- // Get the version of the document
- $version = $opml_element->getAttribute('version');
- if (!$version) {
- $this->throwExceptionIfStrict('OPML version attribute is required');
- }
- $version = trim($version);
- if ($version === '1.1') {
- $version = '1.0';
- }
- if ($version !== '1.0' && $version !== '2.0') {
- $this->throwExceptionIfStrict('OPML supported versions are 1.0 and 2.0');
- }
- $this->version = $version;
- // Get head and body child elements
- $head_elements = $opml_element->getElementsByTagName('head');
- $child_head_elements = [];
- if (count($head_elements) === 1) {
- $child_head_elements = $head_elements[0]->childNodes;
- } else {
- $this->throwExceptionIfStrict('OPML must contain one and only one head element');
- }
- $body_elements = $opml_element->getElementsByTagName('body');
- $child_body_elements = [];
- if (count($body_elements) === 1) {
- $child_body_elements = $body_elements[0]->childNodes;
- } else {
- $this->throwExceptionIfStrict('OPML must contain one and only one body element');
- }
- $array = [
- 'version' => $this->version,
- 'namespaces' => $this->namespaces,
- 'head' => [],
- 'body' => [],
- ];
- // Load the child head elements in the head array
- foreach ($child_head_elements as $child_head_element) {
- if ($child_head_element->nodeType !== XML_ELEMENT_NODE) {
- continue;
- }
- $name = $child_head_element->nodeName;
- $value = $child_head_element->nodeValue;
- $namespaced = $child_head_element->namespaceURI !== null;
- if (!in_array($name, self::HEAD_ELEMENTS) && !$namespaced) {
- $this->throwExceptionIfStrict(
- "OPML head {$name} element is not part of the specification"
- );
- }
- if ($name === 'dateCreated' || $name === 'dateModified') {
- try {
- $value = $this->parseDate($value);
- } catch (\DomainException $e) {
- $this->throwExceptionIfStrict(
- "OPML head {$name} element must be a valid RFC822 or RFC1123 date"
- );
- }
- } elseif ($name === 'ownerEmail') {
- // Testing email validity is hard. PHP filter_var() function is
- // too strict compared to the RFC 822, so we can't use it.
- if (strpos($value, '@') === false) {
- $this->throwExceptionIfStrict(
- 'OPML head ownerEmail element must be an email address'
- );
- }
- } elseif ($name === 'ownerId' || $name === 'docs') {
- if (!$this->checkHttpAddress($value)) {
- $this->throwExceptionIfStrict(
- "OPML head {$name} element must be a HTTP address"
- );
- }
- } elseif ($name === 'expansionState') {
- $numbers = explode(',', $value);
- $value = array_map(function ($str_number) {
- if (is_numeric($str_number)) {
- return intval($str_number);
- } else {
- $this->throwExceptionIfStrict(
- 'OPML head expansionState element must be a list of numbers'
- );
- return $str_number;
- }
- }, $numbers);
- } elseif (in_array($name, self::NUMERIC_HEAD_ELEMENTS)) {
- if (is_numeric($value)) {
- $value = intval($value);
- } else {
- $this->throwExceptionIfStrict("OPML head {$name} element must be a number");
- }
- }
- $array['head'][$name] = $value;
- }
- // Load the child body elements in the body array
- foreach ($child_body_elements as $child_body_element) {
- if ($child_body_element->nodeType !== XML_ELEMENT_NODE) {
- continue;
- }
- if ($child_body_element->nodeName === 'outline') {
- $array['body'][] = $this->parseOutline($child_body_element);
- } else {
- $this->throwExceptionIfStrict(
- 'OPML body element can only contain outline elements'
- );
- }
- }
- if (empty($array['body'])) {
- $this->throwExceptionIfStrict(
- 'OPML body element must contain at least one outline element'
- );
- }
- return $array;
- }
- /**
- * Parse a XML element as an outline element and return the corresponding array.
- *
- * @param \DOMElement $outline_element
- * The element to parse.
- *
- * @throws \marienfressinaud\LibOpml\Exception
- * Raised if the outline contains non-outline elements, if it doesn't
- * contain a text attribute (or if empty), if a special attribute is
- * not parsable, or if type attribute requirements are not met. The
- * exceptions are not raised if strict is false. The exception about
- * missing text attribute is not raised if version is 1.0.
- *
- * @return array
- * An array reflecting the OPML outline (the structure is described above).
- */
- private function parseOutline($outline_element)
- {
- $outline = [];
- // Load the element attributes in the outline array
- foreach ($outline_element->attributes as $outline_attribute) {
- $name = $outline_attribute->nodeName;
- $value = $outline_attribute->nodeValue;
- if ($name === 'created') {
- try {
- $value = $this->parseDate($value);
- } catch (\DomainException $e) {
- $this->throwExceptionIfStrict(
- 'OPML outline created attribute must be a valid RFC822 or RFC1123 date'
- );
- }
- } elseif ($name === 'category') {
- $categories = explode(',', $value);
- $categories = array_map(function ($category) {
- return trim($category);
- }, $categories);
- $value = $categories;
- } elseif ($name === 'isComment' || $name === 'isBreakpoint') {
- if ($value === 'true' || $value === 'false') {
- $value = $value === 'true';
- } else {
- $this->throwExceptionIfStrict(
- "OPML outline {$name} attribute must be a boolean (true or false)"
- );
- }
- } elseif ($name === 'type') {
- // type attribute is case-insensitive
- $value = strtolower($value);
- }
- $outline[$name] = $value;
- }
- if (empty($outline['text']) && $this->version !== '1.0') {
- $this->throwExceptionIfStrict(
- 'OPML outline text attribute is required'
- );
- }
- // Perform additional check based on the type of the outline
- $type = $outline['type'] ?? '';
- if ($type === 'rss') {
- if (empty($outline['xmlUrl'])) {
- $this->throwExceptionIfStrict(
- 'OPML outline xmlUrl attribute is required when type is "rss"'
- );
- } elseif (!$this->checkHttpAddress($outline['xmlUrl'])) {
- $this->throwExceptionIfStrict(
- 'OPML outline xmlUrl attribute must be a HTTP address when type is "rss"'
- );
- }
- } elseif ($type === 'link' || $type === 'include') {
- if (empty($outline['url'])) {
- $this->throwExceptionIfStrict(
- "OPML outline url attribute is required when type is \"{$type}\""
- );
- } elseif (!$this->checkHttpAddress($outline['url'])) {
- $this->throwExceptionIfStrict(
- "OPML outline url attribute must be a HTTP address when type is \"{$type}\""
- );
- }
- }
- // Load the sub-outlines in a @outlines array
- foreach ($outline_element->childNodes as $child_outline_element) {
- if ($child_outline_element->nodeType !== XML_ELEMENT_NODE) {
- continue;
- }
- if ($child_outline_element->nodeName === 'outline') {
- $outline['@outlines'][] = $this->parseOutline($child_outline_element);
- } else {
- $this->throwExceptionIfStrict(
- 'OPML body element can only contain outline elements'
- );
- }
- }
- return $outline;
- }
- /**
- * Parse a value as a date.
- *
- * @param string $value
- *
- * @throws \DomainException
- * Raised if the value cannot be parsed.
- *
- * @return \DateTime
- */
- private function parseDate($value)
- {
- $formats = [
- \DateTimeInterface::RFC822,
- \DateTimeInterface::RFC1123,
- ];
- foreach ($formats as $format) {
- $date = date_create_from_format($format, $value);
- if ($date !== false) {
- return $date;
- }
- }
- throw new \DomainException('The argument cannot be parsed as a date');
- }
- /**
- * Render an OPML array as a string or a \DOMDocument.
- *
- * @param array $array
- * The array to render, it must follow the structure defined above.
- * @param bool $as_dom_document
- * Set to false (default) to return the array as a string, true to
- * return as a \DOMDocument.
- *
- * @throws \marienfressinaud\LibOpml\Exception
- * Raised if the `head` array contains unknown or invalid elements
- * (i.e. not of correct type), or if the `body` array is missing or
- * empty. The exceptions are not raised if strict is false. See also
- * exceptions raised by the renderOutline method.
- *
- * @return string|\DOMDocument
- * The XML string or DOM document corresponding to the given array.
- */
- public function render($array, $as_dom_document = false)
- {
- $dom = new \DOMDocument('1.0', 'UTF-8');
- $opml_element = new \DOMElement('opml');
- $dom->appendChild($opml_element);
- // Set the version attribute of the OPML document
- $version = $array['version'] ?? '2.0';
- if ($version === '1.1') {
- $version = '1.0';
- }
- if ($version !== '1.0' && $version !== '2.0') {
- $this->throwExceptionIfStrict('OPML supported versions are 1.0 and 2.0');
- }
- $this->version = $version;
- $opml_element->setAttribute('version', $this->version);
- // Declare the namespace on the opml element
- $this->namespaces = $array['namespaces'] ?? [];
- foreach ($this->namespaces as $prefix => $namespace) {
- $opml_element->setAttributeNS(
- 'http://www.w3.org/2000/xmlns/',
- "xmlns:{$prefix}",
- $namespace
- );
- }
- // Add the head element to the OPML document. $array['head'] is
- // optional but head tag will always exist in the final XML.
- $head_element = new \DOMElement('head');
- $opml_element->appendChild($head_element);
- if (isset($array['head'])) {
- foreach ($array['head'] as $name => $value) {
- $namespace = $this->getNamespace($name);
- if (!in_array($name, self::HEAD_ELEMENTS, true) && !$namespace) {
- $this->throwExceptionIfStrict(
- "OPML head {$name} element is not part of the specification"
- );
- }
- if ($name === 'dateCreated' || $name === 'dateModified') {
- if ($value instanceof \DateTimeInterface) {
- $value = $value->format(\DateTimeInterface::RFC1123);
- } else {
- $this->throwExceptionIfStrict(
- "OPML head {$name} element must be a DateTime"
- );
- }
- } elseif ($name === 'ownerEmail') {
- // Testing email validity is hard. PHP filter_var() function is
- // too strict compared to the RFC 822, so we can't use it.
- if (strpos($value, '@') === false) {
- $this->throwExceptionIfStrict(
- 'OPML head ownerEmail element must be an email address'
- );
- }
- } elseif ($name === 'ownerId' || $name === 'docs') {
- if (!$this->checkHttpAddress($value)) {
- $this->throwExceptionIfStrict(
- "OPML head {$name} element must be a HTTP address"
- );
- }
- } elseif ($name === 'expansionState') {
- if (is_array($value)) {
- foreach ($value as $number) {
- if (!is_int($number)) {
- $this->throwExceptionIfStrict(
- 'OPML head expansionState element must be an array of integers'
- );
- }
- }
- $value = implode(', ', $value);
- } else {
- $this->throwExceptionIfStrict(
- 'OPML head expansionState element must be an array of integers'
- );
- }
- } elseif (in_array($name, self::NUMERIC_HEAD_ELEMENTS)) {
- if (!is_int($value)) {
- $this->throwExceptionIfStrict(
- "OPML head {$name} element must be an integer"
- );
- }
- }
- $child_head_element = new \DOMElement($name, $value, $namespace);
- $head_element->appendChild($child_head_element);
- }
- }
- // Check body is set and contains at least one element
- if (!isset($array['body'])) {
- $this->throwExceptionIfStrict('OPML array must contain a body key');
- }
- $array_body = $array['body'] ?? [];
- if (count($array_body) <= 0) {
- $this->throwExceptionIfStrict(
- 'OPML body element must contain at least one outline array'
- );
- }
- // Create outline elements in the body element
- $body_element = new \DOMElement('body');
- $opml_element->appendChild($body_element);
- foreach ($array_body as $outline) {
- $this->renderOutline($body_element, $outline);
- }
- // And return the final result
- if ($as_dom_document) {
- return $dom;
- } else {
- $dom->formatOutput = true;
- return $dom->saveXML();
- }
- }
- /**
- * Transform an outline array to a \DOMElement and add it to a parent element.
- *
- * @param \DOMElement $parent_element
- * The DOM parent element of the current outline.
- * @param array $outline
- * The outline array to transform in a \DOMElement, it must follow the
- * structure defined above.
- *
- * @throws \marienfressinaud\LibOpml\Exception
- * Raised if the outline is not an array, if it doesn't contain a text
- * attribute (or if empty), if the `@outlines` key is not an array, if
- * a special attribute does not match its corresponding type, or if
- * `type` key requirements are not met. The exceptions (except errors
- * about outline or suboutlines not being arrays) are not raised if
- * strict is false. The exception about missing text attribute is not
- * raised if version is 1.0.
- */
- private function renderOutline($parent_element, $outline)
- {
- // Perform initial checks to verify the outline is correctly declared
- if (!is_array($outline)) {
- throw new Exception(
- 'OPML outline element must be defined as an array'
- );
- }
- if (empty($outline['text']) && $this->version !== '1.0') {
- $this->throwExceptionIfStrict(
- 'OPML outline text attribute is required'
- );
- }
- if (isset($outline['type'])) {
- $type = strtolower($outline['type']);
- if ($type === 'rss') {
- if (empty($outline['xmlUrl'])) {
- $this->throwExceptionIfStrict(
- 'OPML outline xmlUrl attribute is required when type is "rss"'
- );
- } elseif (!$this->checkHttpAddress($outline['xmlUrl'])) {
- $this->throwExceptionIfStrict(
- 'OPML outline xmlUrl attribute must be a HTTP address when type is "rss"'
- );
- }
- } elseif ($type === 'link' || $type === 'include') {
- if (empty($outline['url'])) {
- $this->throwExceptionIfStrict(
- "OPML outline url attribute is required when type is \"{$type}\""
- );
- } elseif (!$this->checkHttpAddress($outline['url'])) {
- $this->throwExceptionIfStrict(
- "OPML outline url attribute must be a HTTP address when type is \"{$type}\""
- );
- }
- }
- }
- // Create the outline element and add it to the parent
- $outline_element = new \DOMElement('outline');
- $parent_element->appendChild($outline_element);
- // Load the sub-outlines as child elements
- if (isset($outline['@outlines'])) {
- $outline_children = $outline['@outlines'];
- if (!is_array($outline_children)) {
- throw new Exception(
- 'OPML outline element must be defined as an array'
- );
- }
- foreach ($outline_children as $outline_child) {
- $this->renderOutline($outline_element, $outline_child);
- }
- // We don't want the sub-outlines to be loaded as attributes, so we
- // remove the key from the array.
- unset($outline['@outlines']);
- }
- // Load the other elements of the array as attributes
- foreach ($outline as $name => $value) {
- $namespace = $this->getNamespace($name);
- if ($name === 'created') {
- if ($value instanceof \DateTimeInterface) {
- $value = $value->format(\DateTimeInterface::RFC1123);
- } else {
- $this->throwExceptionIfStrict(
- 'OPML outline created attribute must be a DateTime'
- );
- }
- } elseif ($name === 'isComment' || $name === 'isBreakpoint') {
- if (is_bool($value)) {
- $value = $value ? 'true' : 'false';
- } else {
- $this->throwExceptionIfStrict(
- "OPML outline {$name} attribute must be a boolean"
- );
- }
- } elseif (is_array($value)) {
- $value = implode(', ', $value);
- }
- $outline_element->setAttributeNS($namespace, $name, $value);
- }
- }
- /**
- * Return wether a value is a valid HTTP address or not.
- *
- * HTTP address is not strictly defined by the OPML spec, so it is assumed:
- *
- * - it can be parsed by parse_url
- * - it has a host part
- * - scheme is http or https
- *
- * filter_var is not used because it would reject internationalized URLs
- * (i.e. with non ASCII chars). An alternative would be to punycode such
- * URLs, but it's more work to do it properly, and lib_opml needs to stay
- * simple.
- *
- * @param string $value
- *
- * @return boolean
- * Return true if the value is a valid HTTP address, false otherwise.
- */
- public function checkHttpAddress($value)
- {
- $value = trim($value);
- $parsed_url = parse_url($value);
- if (!$parsed_url) {
- return false;
- }
- if (
- !isset($parsed_url['scheme']) ||
- !isset($parsed_url['host'])
- ) {
- return false;
- }
- if (
- $parsed_url['scheme'] !== 'http' &&
- $parsed_url['scheme'] !== 'https'
- ) {
- return false;
- }
- return true;
- }
- /**
- * Return the namespace of a qualified name. An empty string is returned if
- * the name is not namespaced.
- *
- * @param string $qualified_name
- *
- * @throws \marienfressinaud\LibOpml\Exception
- * Raised if the namespace prefix isn't declared.
- *
- * @return string
- */
- private function getNamespace($qualified_name)
- {
- $split_name = explode(':', $qualified_name, 2);
- // count will always be 1 or 2.
- if (count($split_name) === 1) {
- // If 1, there's no prefix, thus no namespace
- return '';
- } else {
- // If 2, it means it has a namespace prefix, so we get the
- // namespace from the declared ones.
- $namespace_prefix = $split_name[0];
- if (!isset($this->namespaces[$namespace_prefix])) {
- throw new Exception(
- "OPML namespace {$namespace_prefix} is not declared"
- );
- }
- return $this->namespaces[$namespace_prefix];
- }
- }
- /**
- * Raise an exception only if strict is true.
- *
- * @param string $message
- *
- * @throws \marienfressinaud\LibOpml\Exception
- */
- private function throwExceptionIfStrict($message)
- {
- if ($this->strict) {
- throw new Exception($message);
- }
- }
- /**
- * Return a formatted error if any libxml error is returned by
- * libxml_get_errors(). In non-strict mode, only fatal errors are reported.
- */
- private function getLibxmlError(): string
- {
- $libxml_error = '';
- $errors = libxml_get_errors();
- foreach ($errors as $error) {
- if (!$this->strict && $error->level < LIBXML_ERR_FATAL) {
- continue;
- }
- $message = trim($error->message);
- $message .= " (line {$error->line}, column {$error->column}, code {$error->code})";
- $libxml_error .= $message . "\n";
- }
- return trim($libxml_error);
- }
- }
|