lib_opml.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. <?php
  2. /**
  3. * lib_opml is a free library to manage OPML format in PHP.
  4. *
  5. * By default, it takes in consideration version 2.0 but can be compatible with
  6. * OPML 1.0. More information on http://dev.opml.org.
  7. * Difference is "text" attribute is optional in version 1.0. It is highly
  8. * recommended to use this attribute.
  9. *
  10. * lib_opml requires SimpleXML (php.net/simplexml) and DOMDocument (php.net/domdocument)
  11. *
  12. * @author Marien Fressinaud <dev@marienfressinaud.fr>
  13. * @link https://github.com/marienfressinaud/lib_opml
  14. * @version 0.2-FreshRSS~1.5.1
  15. * @license public domain
  16. *
  17. * Usages:
  18. * > include('lib_opml.php');
  19. * > $filename = 'my_opml_file.xml';
  20. * > $opml_array = libopml_parse_file($filename);
  21. * > print_r($opml_array);
  22. *
  23. * > $opml_string = [...];
  24. * > $opml_array = libopml_parse_string($opml_string);
  25. * > print_r($opml_array);
  26. *
  27. * > $opml_array = [...];
  28. * > $opml_string = libopml_render($opml_array);
  29. * > $opml_object = libopml_render($opml_array, true);
  30. * > echo $opml_string;
  31. * > print_r($opml_object);
  32. *
  33. * You can set $strict argument to false if you want to bypass "text" attribute
  34. * requirement.
  35. *
  36. * If parsing fails for any reason (e.g. not an XML string, does not match with
  37. * the specifications), a LibOPML_Exception is raised.
  38. *
  39. * lib_opml array format is described here:
  40. * $array = array(
  41. * 'head' => array( // 'head' element is optional (but recommended)
  42. * 'key' => 'value', // key must be a part of available OPML head elements
  43. * ),
  44. * 'body' => array( // body is required
  45. * array( // this array represents an outline (at least one)
  46. * 'text' => 'value', // 'text' element is required if $strict is true
  47. * 'key' => 'value', // key and value are what you want (optional)
  48. * '@outlines' = array( // @outlines is a special value and represents sub-outlines
  49. * array(
  50. * [...] // where [...] is a valid outline definition
  51. * ),
  52. * ),
  53. * ),
  54. * array( // other outline definitions
  55. * [...]
  56. * ),
  57. * [...],
  58. * )
  59. * )
  60. *
  61. */
  62. /**
  63. * A simple Exception class which represents any kind of OPML problem.
  64. * Message should precise the current problem.
  65. */
  66. class LibOPML_Exception extends Exception {}
  67. // Define the list of available head attributes. All of them are optional.
  68. define('HEAD_ELEMENTS', serialize(array(
  69. 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
  70. 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
  71. 'windowLeft', 'windowBottom', 'windowRight'
  72. )));
  73. /**
  74. * Parse an XML object as an outline object and return corresponding array
  75. *
  76. * @param SimpleXMLElement $outline_xml the XML object we want to parse
  77. * @param bool $strict true if "text" attribute is required, false else
  78. * @return array corresponding to an outline and following format described above
  79. * @throws LibOPML_Exception
  80. * @access private
  81. */
  82. function libopml_parse_outline($outline_xml, $strict = true) {
  83. $outline = array();
  84. // An outline may contain any kind of attributes but "text" attribute is
  85. // required !
  86. $text_is_present = false;
  87. foreach ($outline_xml->attributes() as $key => $value) {
  88. $outline[$key] = (string)$value;
  89. if ($key === 'text') {
  90. $text_is_present = true;
  91. }
  92. }
  93. if (!$text_is_present && $strict) {
  94. throw new LibOPML_Exception(
  95. 'Outline does not contain any text attribute'
  96. );
  97. }
  98. if (empty($outline['text']) && isset($outline['title'])) {
  99. $outline['text'] = $outline['title'];
  100. }
  101. foreach ($outline_xml->children() as $key => $value) {
  102. // An outline may contain any number of outline children
  103. if ($key === 'outline') {
  104. $outline['@outlines'][] = libopml_parse_outline($value, $strict);
  105. } else {
  106. throw new LibOPML_Exception(
  107. 'Body can contain only outline elements'
  108. );
  109. }
  110. }
  111. return $outline;
  112. }
  113. /**
  114. * Reformat the XML document as a hierarchy when
  115. * the OPML 2.0 category attribute is used
  116. */
  117. function preprocessing_categories($doc) {
  118. $outline_categories = array();
  119. $body = $doc->getElementsByTagName('body')->item(0);
  120. $xpath = new DOMXpath($doc);
  121. $outlines = $xpath->query('/opml/body/outline[@category]');
  122. foreach ($outlines as $outline) {
  123. $category = trim($outline->getAttribute('category'));
  124. if ($category != '') {
  125. $outline_categorie = null;
  126. if (!isset($outline_categories[$category])) {
  127. $outline_categorie = $doc->createElement('outline');
  128. $outline_categorie->setAttribute('text', $category);
  129. $body->insertBefore($outline_categorie, $body->firstChild);
  130. $outline_categories[$category] = $outline_categorie;
  131. } else {
  132. $outline_categorie = $outline_categories[$category];
  133. }
  134. $outline->parentNode->removeChild($outline);
  135. $outline_categorie->appendChild($outline);
  136. }
  137. }
  138. }
  139. /**
  140. * Parse a string as a XML one and returns the corresponding array
  141. *
  142. * @param string $xml is the string we want to parse
  143. * @param bool $strict true if "text" attribute is required, false else
  144. * @return array corresponding to the XML string and following format described above
  145. * @throws LibOPML_Exception
  146. * @access public
  147. */
  148. function libopml_parse_string($xml, $strict = true) {
  149. $dom = new DOMDocument();
  150. $dom->recover = true;
  151. $dom->strictErrorChecking = false;
  152. $dom->loadXML($xml);
  153. $dom->encoding = 'UTF-8';
  154. //Partial compatibility with the category attribute of OPML 2.0
  155. preprocessing_categories($dom);
  156. $opml = simplexml_import_dom($dom);
  157. if (!$opml) {
  158. throw new LibOPML_Exception();
  159. }
  160. $array = array(
  161. 'version' => (string)$opml['version'],
  162. 'head' => array(),
  163. 'body' => array()
  164. );
  165. // First, we get all "head" elements. Head is required but its sub-elements
  166. // are optional.
  167. foreach ($opml->head->children() as $key => $value) {
  168. if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
  169. $array['head'][$key] = (string)$value;
  170. } else {
  171. throw new LibOPML_Exception(
  172. $key . 'is not part of OPML format'
  173. );
  174. }
  175. }
  176. // Then, we get body oulines. Body must contain at least one outline
  177. // element.
  178. $at_least_one_outline = false;
  179. foreach ($opml->body->children() as $key => $value) {
  180. if ($key === 'outline') {
  181. $at_least_one_outline = true;
  182. $array['body'][] = libopml_parse_outline($value, $strict);
  183. } else {
  184. throw new LibOPML_Exception(
  185. 'Body can contain only outline elements'
  186. );
  187. }
  188. }
  189. if (!$at_least_one_outline) {
  190. throw new LibOPML_Exception(
  191. 'Body must contain at least one outline element'
  192. );
  193. }
  194. return $array;
  195. }
  196. /**
  197. * Parse a string contained into a file as a XML string and returns the corresponding array
  198. *
  199. * @param string $filename should indicates a valid XML file
  200. * @param bool $strict true if "text" attribute is required, false else
  201. * @return array corresponding to the file content and following format described above
  202. * @throws LibOPML_Exception
  203. * @access public
  204. */
  205. function libopml_parse_file($filename, $strict = true) {
  206. $file_content = file_get_contents($filename);
  207. if ($file_content === false) {
  208. throw new LibOPML_Exception(
  209. $filename . ' cannot be found'
  210. );
  211. }
  212. return libopml_parse_string($file_content, $strict);
  213. }
  214. /**
  215. * Create a XML outline object in a parent object.
  216. *
  217. * @param SimpleXMLElement $parent_elt is the parent object of current outline
  218. * @param array $outline array representing an outline object
  219. * @param bool $strict true if "text" attribute is required, false else
  220. * @throws LibOPML_Exception
  221. * @access private
  222. */
  223. function libopml_render_outline($parent_elt, $outline, $strict) {
  224. // Outline MUST be an array!
  225. if (!is_array($outline)) {
  226. throw new LibOPML_Exception(
  227. 'Outline element must be defined as array'
  228. );
  229. }
  230. $outline_elt = $parent_elt->addChild('outline');
  231. $text_is_present = false;
  232. foreach ($outline as $key => $value) {
  233. // Only outlines can be an array and so we consider children are also
  234. // outline elements.
  235. if ($key === '@outlines' && is_array($value)) {
  236. foreach ($value as $outline_child) {
  237. libopml_render_outline($outline_elt, $outline_child, $strict);
  238. }
  239. } elseif (is_array($value)) {
  240. throw new LibOPML_Exception(
  241. 'Type of outline elements cannot be array: ' . $key
  242. );
  243. } else {
  244. // Detect text attribute is present, that's good :)
  245. if ($key === 'text') {
  246. $text_is_present = true;
  247. }
  248. $outline_elt->addAttribute($key, $value);
  249. }
  250. }
  251. if (!$text_is_present && $strict) {
  252. throw new LibOPML_Exception(
  253. 'You must define at least a text element for all outlines'
  254. );
  255. }
  256. }
  257. /**
  258. * Render an array as an OPML string or a XML object.
  259. *
  260. * @param array $array is the array we want to render and must follow structure defined above
  261. * @param bool $as_xml_object false if function must return a string, true for a XML object
  262. * @param bool $strict true if "text" attribute is required, false else
  263. * @return string|SimpleXMLElement XML string corresponding to $array or XML object
  264. * @throws LibOPML_Exception
  265. * @access public
  266. */
  267. function libopml_render($array, $as_xml_object = false, $strict = true) {
  268. $opml = new SimpleXMLElement('<opml></opml>');
  269. $opml->addAttribute('version', $strict ? '2.0' : '1.0');
  270. // Create head element. $array['head'] is optional but head element will
  271. // exist in the final XML object.
  272. $head = $opml->addChild('head');
  273. if (isset($array['head'])) {
  274. foreach ($array['head'] as $key => $value) {
  275. if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
  276. $head->addChild($key, $value);
  277. }
  278. }
  279. }
  280. // Check body is set and contains at least one element
  281. if (!isset($array['body'])) {
  282. throw new LibOPML_Exception(
  283. '$array must contain a body element'
  284. );
  285. }
  286. if (count($array['body']) <= 0) {
  287. throw new LibOPML_Exception(
  288. 'Body element must contain at least one element (array)'
  289. );
  290. }
  291. // Create outline elements
  292. $body = $opml->addChild('body');
  293. foreach ($array['body'] as $outline) {
  294. libopml_render_outline($body, $outline, $strict);
  295. }
  296. // And return the final result
  297. if ($as_xml_object) {
  298. return $opml;
  299. } else {
  300. $dom = dom_import_simplexml($opml)->ownerDocument;
  301. $dom->formatOutput = true;
  302. $dom->encoding = 'UTF-8';
  303. return $dom->saveXML();
  304. }
  305. }