lib_opml.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. <?php
  2. /**
  3. * lib_opml is a free library to manage OPML format in PHP.
  4. *
  5. * By default, it takes in consideration version 2.0 but can be compatible with
  6. * OPML 1.0. More information on http://dev.opml.org.
  7. * Difference is "text" attribute is optional in version 1.0. It is highly
  8. * recommended to use this attribute.
  9. *
  10. * lib_opml requires SimpleXML (php.net/simplexml) and DOMDocument (php.net/domdocument)
  11. *
  12. * @author Marien Fressinaud <dev@marienfressinaud.fr>
  13. * @link https://github.com/marienfressinaud/lib_opml
  14. * @version 0.2-FreshRSS~1.20.0
  15. * @license public domain
  16. *
  17. * Usages:
  18. * > include('lib_opml.php');
  19. * > $filename = 'my_opml_file.xml';
  20. * > $opml_array = libopml_parse_file($filename);
  21. * > print_r($opml_array);
  22. *
  23. * > $opml_string = [...];
  24. * > $opml_array = libopml_parse_string($opml_string);
  25. * > print_r($opml_array);
  26. *
  27. * > $opml_array = [...];
  28. * > $opml_string = libopml_render($opml_array);
  29. * > $opml_object = libopml_render($opml_array, true);
  30. * > echo $opml_string;
  31. * > print_r($opml_object);
  32. *
  33. * You can set $strict argument to false if you want to bypass "text" attribute
  34. * requirement.
  35. *
  36. * If parsing fails for any reason (e.g. not an XML string, does not match with
  37. * the specifications), a LibOPML_Exception is raised.
  38. *
  39. * lib_opml array format is described here:
  40. * $array = array(
  41. * 'head' => array( // 'head' element is optional (but recommended)
  42. * 'key' => 'value', // key must be a part of available OPML head elements
  43. * ),
  44. * 'body' => array( // body is required
  45. * array( // this array represents an outline (at least one)
  46. * 'text' => 'value', // 'text' element is required if $strict is true
  47. * 'key' => 'value', // key and value are what you want (optional)
  48. * '@outlines' = array( // @outlines is a special value and represents sub-outlines
  49. * array(
  50. * [...] // where [...] is a valid outline definition
  51. * ),
  52. * ),
  53. * ),
  54. * array( // other outline definitions
  55. * [...]
  56. * ),
  57. * [...],
  58. * )
  59. * )
  60. *
  61. */
  62. /**
  63. * A simple Exception class which represents any kind of OPML problem.
  64. * Message should precise the current problem.
  65. */
  66. class LibOPML_Exception extends Exception {}
  67. // Define the list of available head attributes. All of them are optional.
  68. define('HEAD_ELEMENTS', serialize(array(
  69. 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
  70. 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
  71. 'windowLeft', 'windowBottom', 'windowRight'
  72. )));
  73. /**
  74. * Parse an XML object as an outline object and return corresponding array
  75. *
  76. * @param SimpleXMLElement $outline_xml the XML object we want to parse
  77. * @param bool $strict true if "text" attribute is required, false else
  78. * @return array corresponding to an outline and following format described above
  79. * @throws LibOPML_Exception
  80. * @access private
  81. */
  82. function libopml_parse_outline($outline_xml, $strict = true) {
  83. $outline = array();
  84. // An outline may contain any kind of attributes but "text" attribute is
  85. // required !
  86. $text_is_present = false;
  87. $elem = dom_import_simplexml($outline_xml);
  88. /** @var DOMAttr $attr */
  89. foreach ($elem->attributes as $attr) {
  90. $key = $attr->localName;
  91. if ($attr->namespaceURI == '') {
  92. $outline[$key] = $attr->value;
  93. } else {
  94. $outline[$key] = [
  95. 'namespace' => $attr->namespaceURI,
  96. 'value' => $attr->value,
  97. ];
  98. }
  99. if ($key === 'text') {
  100. $text_is_present = true;
  101. }
  102. }
  103. if (!$text_is_present && $strict) {
  104. throw new LibOPML_Exception(
  105. 'Outline does not contain any text attribute'
  106. );
  107. }
  108. if (empty($outline['text']) && isset($outline['title'])) {
  109. $outline['text'] = $outline['title'];
  110. }
  111. foreach ($outline_xml->children() as $key => $value) {
  112. // An outline may contain any number of outline children
  113. if ($key === 'outline') {
  114. $outline['@outlines'][] = libopml_parse_outline($value, $strict);
  115. }
  116. }
  117. return $outline;
  118. }
  119. /**
  120. * Reformat the XML document as a hierarchy when
  121. * the OPML 2.0 category attribute is used
  122. */
  123. function preprocessing_categories($doc) {
  124. $outline_categories = array();
  125. $body = $doc->getElementsByTagName('body')->item(0);
  126. $xpath = new DOMXpath($doc);
  127. $outlines = $xpath->query('/opml/body/outline[@category]');
  128. foreach ($outlines as $outline) {
  129. $category = trim($outline->getAttribute('category'));
  130. if ($category != '') {
  131. $outline_category = null;
  132. if (!isset($outline_categories[$category])) {
  133. $outline_category = $doc->createElement('outline');
  134. $outline_category->setAttribute('text', $category);
  135. $body->insertBefore($outline_category, $body->firstChild);
  136. $outline_categories[$category] = $outline_category;
  137. } else {
  138. $outline_category = $outline_categories[$category];
  139. }
  140. $outline->parentNode->removeChild($outline);
  141. $outline_category->appendChild($outline);
  142. }
  143. }
  144. }
  145. /**
  146. * Parse a string as a XML one and returns the corresponding array
  147. *
  148. * @param string $xml is the string we want to parse
  149. * @param bool $strict true to perform some validation (e.g. require "text" attribute), false to relax
  150. * @return array corresponding to the XML string and following format described above
  151. * @throws LibOPML_Exception
  152. * @access public
  153. */
  154. function libopml_parse_string($xml, $strict = true) {
  155. $dom = new DOMDocument();
  156. $dom->recover = true;
  157. $dom->strictErrorChecking = false;
  158. $dom->loadXML($xml);
  159. $dom->encoding = 'UTF-8';
  160. //Partial compatibility with the category attribute of OPML 2.0
  161. preprocessing_categories($dom);
  162. $opml = simplexml_import_dom($dom);
  163. if (!$opml) {
  164. throw new LibOPML_Exception();
  165. }
  166. $array = array(
  167. 'version' => (string)$opml['version'],
  168. 'head' => array(),
  169. 'body' => array()
  170. );
  171. // First, we get all "head" elements. Head is required but its sub-elements
  172. // are optional.
  173. foreach ($opml->head->children() as $key => $value) {
  174. if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
  175. $array['head'][$key] = (string)$value;
  176. } elseif ($strict) {
  177. throw new LibOPML_Exception(
  178. $key . ' is not part of the OPML 2.0 specification'
  179. );
  180. }
  181. }
  182. // Then, we get body oulines. Body must contain at least one outline
  183. // element.
  184. $at_least_one_outline = false;
  185. foreach ($opml->body->children() as $key => $value) {
  186. if ($key === 'outline') {
  187. $at_least_one_outline = true;
  188. $array['body'][] = libopml_parse_outline($value, $strict);
  189. }
  190. }
  191. if (!$at_least_one_outline) {
  192. throw new LibOPML_Exception(
  193. 'OPML body must contain at least one outline element'
  194. );
  195. }
  196. return $array;
  197. }
  198. /**
  199. * Parse a string contained into a file as a XML string and returns the corresponding array
  200. *
  201. * @param string $filename should indicates a valid XML file
  202. * @param bool $strict true if "text" attribute is required, false else
  203. * @return array corresponding to the file content and following format described above
  204. * @throws LibOPML_Exception
  205. * @access public
  206. */
  207. function libopml_parse_file($filename, $strict = true) {
  208. $file_content = file_get_contents($filename);
  209. if ($file_content === false) {
  210. throw new LibOPML_Exception(
  211. $filename . ' cannot be found'
  212. );
  213. }
  214. return libopml_parse_string($file_content, $strict);
  215. }
  216. /**
  217. * Create a XML outline object in a parent object.
  218. *
  219. * @param SimpleXMLElement $parent_elt is the parent object of current outline
  220. * @param array $outline array representing an outline object
  221. * @param bool $strict true if "text" attribute is required, false else
  222. * @throws LibOPML_Exception
  223. * @access private
  224. */
  225. function libopml_render_outline($parent_elt, $outline, $strict) {
  226. // Outline MUST be an array!
  227. if (!is_array($outline)) {
  228. throw new LibOPML_Exception(
  229. 'Outline element must be defined as array'
  230. );
  231. }
  232. $outline_elt = $parent_elt->addChild('outline');
  233. $text_is_present = false;
  234. foreach ($outline as $key => $value) {
  235. // Only outlines can be an array and so we consider children are also
  236. // outline elements.
  237. if ($key === '@outlines' && is_array($value)) {
  238. foreach ($value as $outline_child) {
  239. libopml_render_outline($outline_elt, $outline_child, $strict);
  240. }
  241. } elseif (is_array($value) && !isset($value['namespace'])) {
  242. throw new LibOPML_Exception(
  243. 'Type of outline elements cannot be array (except for providing a namespace): ' . $key
  244. );
  245. } else {
  246. // Detect text attribute is present, that's good :)
  247. if ($key === 'text') {
  248. $text_is_present = true;
  249. }
  250. if (is_array($value)) {
  251. if (!empty($value['namespace']) && !empty($value['value'])) {
  252. $outline_elt->addAttribute($key, $value['value'], $value['namespace']);
  253. }
  254. } else {
  255. $outline_elt->addAttribute($key, $value);
  256. }
  257. }
  258. }
  259. if (!$text_is_present && $strict) {
  260. throw new LibOPML_Exception(
  261. 'You must define at least a text element for all outlines'
  262. );
  263. }
  264. }
  265. /**
  266. * Render an array as an OPML string or a XML object.
  267. *
  268. * @param array $array is the array we want to render and must follow structure defined above
  269. * @param bool $as_xml_object false if function must return a string, true for a XML object
  270. * @param bool $strict true if "text" attribute is required, false else
  271. * @return string|SimpleXMLElement XML string corresponding to $array or XML object
  272. * @throws LibOPML_Exception
  273. * @access public
  274. */
  275. function libopml_render($array, $as_xml_object = false, $strict = true) {
  276. $opml = new SimpleXMLElement('<opml></opml>');
  277. $opml->addAttribute('version', $strict ? '2.0' : '1.0');
  278. // Create head element. $array['head'] is optional but head element will
  279. // exist in the final XML object.
  280. $head = $opml->addChild('head');
  281. if (isset($array['head'])) {
  282. foreach ($array['head'] as $key => $value) {
  283. if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
  284. $head->addChild($key, $value);
  285. }
  286. }
  287. }
  288. // Check body is set and contains at least one element
  289. if (!isset($array['body'])) {
  290. throw new LibOPML_Exception(
  291. '$array must contain a body element'
  292. );
  293. }
  294. if (count($array['body']) <= 0) {
  295. throw new LibOPML_Exception(
  296. 'Body element must contain at least one element (array)'
  297. );
  298. }
  299. // Create outline elements
  300. $body = $opml->addChild('body');
  301. foreach ($array['body'] as $outline) {
  302. libopml_render_outline($body, $outline, $strict);
  303. }
  304. // And return the final result
  305. if ($as_xml_object) {
  306. return $opml;
  307. } else {
  308. $dom = dom_import_simplexml($opml)->ownerDocument;
  309. $dom->formatOutput = true;
  310. $dom->encoding = 'UTF-8';
  311. return $dom->saveXML();
  312. }
  313. }