lib_opml.php 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. <?php
  2. /**
  3. * lib_opml is a free library to manage OPML format in PHP.
  4. *
  5. * By default, it takes in consideration version 2.0 but can be compatible with
  6. * OPML 1.0. More information on http://dev.opml.org.
  7. * Difference is "text" attribute is optional in version 1.0. It is highly
  8. * recommended to use this attribute.
  9. *
  10. * lib_opml requires SimpleXML (php.net/simplexml) and DOMDocument (php.net/domdocument)
  11. *
  12. * @author Marien Fressinaud <dev@marienfressinaud.fr>
  13. * @link https://github.com/marienfressinaud/lib_opml
  14. * @version 0.2-FreshRSS~1.5.1
  15. * @license public domain
  16. *
  17. * Usages:
  18. * > include('lib_opml.php');
  19. * > $filename = 'my_opml_file.xml';
  20. * > $opml_array = libopml_parse_file($filename);
  21. * > print_r($opml_array);
  22. *
  23. * > $opml_string = [...];
  24. * > $opml_array = libopml_parse_string($opml_string);
  25. * > print_r($opml_array);
  26. *
  27. * > $opml_array = [...];
  28. * > $opml_string = libopml_render($opml_array);
  29. * > $opml_object = libopml_render($opml_array, true);
  30. * > echo $opml_string;
  31. * > print_r($opml_object);
  32. *
  33. * You can set $strict argument to false if you want to bypass "text" attribute
  34. * requirement.
  35. *
  36. * If parsing fails for any reason (e.g. not an XML string, does not match with
  37. * the specifications), a LibOPML_Exception is raised.
  38. *
  39. * lib_opml array format is described here:
  40. * $array = array(
  41. * 'head' => array( // 'head' element is optional (but recommended)
  42. * 'key' => 'value', // key must be a part of available OPML head elements
  43. * ),
  44. * 'body' => array( // body is required
  45. * array( // this array represents an outline (at least one)
  46. * 'text' => 'value', // 'text' element is required if $strict is true
  47. * 'key' => 'value', // key and value are what you want (optional)
  48. * '@outlines' = array( // @outlines is a special value and represents sub-outlines
  49. * array(
  50. * [...] // where [...] is a valid outline definition
  51. * ),
  52. * ),
  53. * ),
  54. * array( // other outline definitions
  55. * [...]
  56. * ),
  57. * [...],
  58. * )
  59. * )
  60. *
  61. */
  62. /**
  63. * A simple Exception class which represents any kind of OPML problem.
  64. * Message should precise the current problem.
  65. */
  66. class LibOPML_Exception extends Exception {}
  67. // Define the list of available head attributes. All of them are optional.
  68. define('HEAD_ELEMENTS', serialize(array(
  69. 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
  70. 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
  71. 'windowLeft', 'windowBottom', 'windowRight'
  72. )));
  73. /**
  74. * Parse an XML object as an outline object and return corresponding array
  75. *
  76. * @param SimpleXMLElement $outline_xml the XML object we want to parse
  77. * @param bool $strict true if "text" attribute is required, false else
  78. * @return array corresponding to an outline and following format described above
  79. * @throws LibOPML_Exception
  80. * @access private
  81. */
  82. function libopml_parse_outline($outline_xml, $strict = true) {
  83. $outline = array();
  84. // An outline may contain any kind of attributes but "text" attribute is
  85. // required !
  86. $text_is_present = false;
  87. foreach ($outline_xml->attributes() as $key => $value) {
  88. $outline[$key] = (string)$value;
  89. if ($key === 'text') {
  90. $text_is_present = true;
  91. }
  92. }
  93. if (!$text_is_present && $strict) {
  94. throw new LibOPML_Exception(
  95. 'Outline does not contain any text attribute'
  96. );
  97. }
  98. if (empty($outline['text']) && isset($outline['title'])) {
  99. $outline['text'] = $outline['title'];
  100. }
  101. foreach ($outline_xml->children() as $key => $value) {
  102. // An outline may contain any number of outline children
  103. if ($key === 'outline') {
  104. $outline['@outlines'][] = libopml_parse_outline($value, $strict);
  105. }
  106. }
  107. return $outline;
  108. }
  109. /**
  110. * Reformat the XML document as a hierarchy when
  111. * the OPML 2.0 category attribute is used
  112. */
  113. function preprocessing_categories($doc) {
  114. $outline_categories = array();
  115. $body = $doc->getElementsByTagName('body')->item(0);
  116. $xpath = new DOMXpath($doc);
  117. $outlines = $xpath->query('/opml/body/outline[@category]');
  118. foreach ($outlines as $outline) {
  119. $category = trim($outline->getAttribute('category'));
  120. if ($category != '') {
  121. $outline_categorie = null;
  122. if (!isset($outline_categories[$category])) {
  123. $outline_categorie = $doc->createElement('outline');
  124. $outline_categorie->setAttribute('text', $category);
  125. $body->insertBefore($outline_categorie, $body->firstChild);
  126. $outline_categories[$category] = $outline_categorie;
  127. } else {
  128. $outline_categorie = $outline_categories[$category];
  129. }
  130. $outline->parentNode->removeChild($outline);
  131. $outline_categorie->appendChild($outline);
  132. }
  133. }
  134. }
  135. /**
  136. * Parse a string as a XML one and returns the corresponding array
  137. *
  138. * @param string $xml is the string we want to parse
  139. * @param bool $strict true if "text" attribute is required, false else
  140. * @return array corresponding to the XML string and following format described above
  141. * @throws LibOPML_Exception
  142. * @access public
  143. */
  144. function libopml_parse_string($xml, $strict = true) {
  145. $dom = new DOMDocument();
  146. $dom->recover = true;
  147. $dom->strictErrorChecking = false;
  148. $dom->loadXML($xml);
  149. $dom->encoding = 'UTF-8';
  150. //Partial compatibility with the category attribute of OPML 2.0
  151. preprocessing_categories($dom);
  152. $opml = simplexml_import_dom($dom);
  153. if (!$opml) {
  154. throw new LibOPML_Exception();
  155. }
  156. $array = array(
  157. 'version' => (string)$opml['version'],
  158. 'head' => array(),
  159. 'body' => array()
  160. );
  161. // First, we get all "head" elements. Head is required but its sub-elements
  162. // are optional.
  163. foreach ($opml->head->children() as $key => $value) {
  164. if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
  165. $array['head'][$key] = (string)$value;
  166. } else {
  167. throw new LibOPML_Exception(
  168. $key . 'is not part of OPML format'
  169. );
  170. }
  171. }
  172. // Then, we get body oulines. Body must contain at least one outline
  173. // element.
  174. $at_least_one_outline = false;
  175. foreach ($opml->body->children() as $key => $value) {
  176. if ($key === 'outline') {
  177. $at_least_one_outline = true;
  178. $array['body'][] = libopml_parse_outline($value, $strict);
  179. }
  180. }
  181. if (!$at_least_one_outline) {
  182. throw new LibOPML_Exception(
  183. 'OPML body must contain at least one outline element'
  184. );
  185. }
  186. return $array;
  187. }
  188. /**
  189. * Parse a string contained into a file as a XML string and returns the corresponding array
  190. *
  191. * @param string $filename should indicates a valid XML file
  192. * @param bool $strict true if "text" attribute is required, false else
  193. * @return array corresponding to the file content and following format described above
  194. * @throws LibOPML_Exception
  195. * @access public
  196. */
  197. function libopml_parse_file($filename, $strict = true) {
  198. $file_content = file_get_contents($filename);
  199. if ($file_content === false) {
  200. throw new LibOPML_Exception(
  201. $filename . ' cannot be found'
  202. );
  203. }
  204. return libopml_parse_string($file_content, $strict);
  205. }
  206. /**
  207. * Create a XML outline object in a parent object.
  208. *
  209. * @param SimpleXMLElement $parent_elt is the parent object of current outline
  210. * @param array $outline array representing an outline object
  211. * @param bool $strict true if "text" attribute is required, false else
  212. * @throws LibOPML_Exception
  213. * @access private
  214. */
  215. function libopml_render_outline($parent_elt, $outline, $strict) {
  216. // Outline MUST be an array!
  217. if (!is_array($outline)) {
  218. throw new LibOPML_Exception(
  219. 'Outline element must be defined as array'
  220. );
  221. }
  222. $outline_elt = $parent_elt->addChild('outline');
  223. $text_is_present = false;
  224. foreach ($outline as $key => $value) {
  225. // Only outlines can be an array and so we consider children are also
  226. // outline elements.
  227. if ($key === '@outlines' && is_array($value)) {
  228. foreach ($value as $outline_child) {
  229. libopml_render_outline($outline_elt, $outline_child, $strict);
  230. }
  231. } elseif (is_array($value)) {
  232. throw new LibOPML_Exception(
  233. 'Type of outline elements cannot be array: ' . $key
  234. );
  235. } else {
  236. // Detect text attribute is present, that's good :)
  237. if ($key === 'text') {
  238. $text_is_present = true;
  239. }
  240. $outline_elt->addAttribute($key, $value);
  241. }
  242. }
  243. if (!$text_is_present && $strict) {
  244. throw new LibOPML_Exception(
  245. 'You must define at least a text element for all outlines'
  246. );
  247. }
  248. }
  249. /**
  250. * Render an array as an OPML string or a XML object.
  251. *
  252. * @param array $array is the array we want to render and must follow structure defined above
  253. * @param bool $as_xml_object false if function must return a string, true for a XML object
  254. * @param bool $strict true if "text" attribute is required, false else
  255. * @return string|SimpleXMLElement XML string corresponding to $array or XML object
  256. * @throws LibOPML_Exception
  257. * @access public
  258. */
  259. function libopml_render($array, $as_xml_object = false, $strict = true) {
  260. $opml = new SimpleXMLElement('<opml></opml>');
  261. $opml->addAttribute('version', $strict ? '2.0' : '1.0');
  262. // Create head element. $array['head'] is optional but head element will
  263. // exist in the final XML object.
  264. $head = $opml->addChild('head');
  265. if (isset($array['head'])) {
  266. foreach ($array['head'] as $key => $value) {
  267. if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
  268. $head->addChild($key, $value);
  269. }
  270. }
  271. }
  272. // Check body is set and contains at least one element
  273. if (!isset($array['body'])) {
  274. throw new LibOPML_Exception(
  275. '$array must contain a body element'
  276. );
  277. }
  278. if (count($array['body']) <= 0) {
  279. throw new LibOPML_Exception(
  280. 'Body element must contain at least one element (array)'
  281. );
  282. }
  283. // Create outline elements
  284. $body = $opml->addChild('body');
  285. foreach ($array['body'] as $outline) {
  286. libopml_render_outline($body, $outline, $strict);
  287. }
  288. // And return the final result
  289. if ($as_xml_object) {
  290. return $opml;
  291. } else {
  292. $dom = dom_import_simplexml($opml)->ownerDocument;
  293. $dom->formatOutput = true;
  294. $dom->encoding = 'UTF-8';
  295. return $dom->saveXML();
  296. }
  297. }