Răsfoiți Sursa

Better OPML import / export

- use a new OPML library (https://github.com/marienfressinaud/lib_opml)
- import has been completely rewritten (far better!)
- introduce addFeedObject and addCategoryObject (in DAO for the moment).
  Permit to add easily feeds and categories (check if they already exist in DB)
- introduce html_chars_utf8 (wrap htmlspecialchars for UTF-8)
Marien Fressinaud 12 ani în urmă
părinte
comite
9ea3819402

+ 77 - 47
app/Controllers/importExportController.php

@@ -129,71 +129,101 @@ class FreshRSS_importExport_Controller extends Minz_ActionController {
 	}
 
 	private function import_opml($opml_file) {
-		$categories = array();
-		$feeds = array();
+		$opml_array = array();
 		try {
-			list($categories, $feeds) = opml_import($opml_file);
-		} catch (FreshRSS_Opml_Exception $e) {
+			$opml_array = libopml_parse_string($opml_file);
+		} catch (LibOPML_Exception $e) {
 			Minz_Log::warning($e->getMessage());
 			return true;
 		}
 
 		$this->catDAO->checkDefault();
 
-		// on ajoute les catégories en masse dans une fonction à part
-		$this->addCategories($categories);
-
-		// on calcule la date des articles les plus anciens qu'on accepte
-		$nb_month_old = $this->view->conf->old_entries;
-		$date_min = time() - (3600 * 24 * 30 * $nb_month_old);
+		return $this->addOpmlElements($opml_array['body']);
+	}
 
-		// la variable $error permet de savoir si une erreur est survenue
-		// Le but est de ne pas arrêter l'import même en cas d'erreur
-		// L'utilisateur sera mis au courant s'il y a eu des erreurs, mais
-		// ne connaîtra pas les détails. Ceux-ci seront toutefois logguées
+	private function addOpmlElements($opml_elements, $parent_cat = null) {
 		$error = false;
-		foreach ($feeds as $feed) {
-			try {
-				$values = array(
-					'id' => $feed->id(),
-					'url' => $feed->url(),
-					'category' => $feed->category(),
-					'name' => $feed->name(),
-					'website' => $feed->website(),
-					'description' => $feed->description(),
-					'lastUpdate' => 0,
-					'httpAuth' => $feed->httpAuth()
-				);
+		foreach ($opml_elements as $elt) {
+			$res = false;
+			if (isset($elt['xmlUrl'])) {
+				$res = $this->addFeedOpml($elt, $parent_cat);
+			} else {
+				$res = $this->addCategoryOpml($elt, $parent_cat);
+			}
 
-				// ajout du flux que s'il n'est pas déjà en BDD
-				if (!$this->feedDAO->searchByUrl($values['url'])) {
-					$id = $this->feedDAO->addFeed($values);
-					if ($id) {
-						$feed->_id($id);
-						$feed->faviconPrepare();
-					} else {
-						$error = true;
-					}
-				}
-			} catch (FreshRSS_Feed_Exception $e) {
-				$error = true;
-				Minz_Log::record($e->getMessage(), Minz_Log::WARNING);
+			if (!$error && $res) {
+				// oops: there is at least one error!
+				$error = $res;
 			}
 		}
 
 		return $error;
 	}
 
-	private function addCategories($categories) {
-		foreach ($categories as $cat) {
-			if (!$this->catDAO->searchByName($cat->name())) {
-				$values = array(
-					'id' => $cat->id(),
-					'name' => $cat->name(),
-				);
-				$this->catDAO->addCategory($values);
+	private function addFeedOpml($feed_elt, $parent_cat) {
+		if (is_null($parent_cat)) {
+			// This feed has no parent category so we get the default one
+			$parent_cat = $catDAO->getDefault()->name();
+		}
+
+		$cat = $this->catDAO->searchByName($parent_cat);
+
+		if (!$cat) {
+			return true;
+		}
+
+		// We get different useful information
+		$url = html_chars_utf8($feed_elt['xmlUrl']);
+		$name = html_chars_utf8($feed_elt['text']);
+		$website = '';
+		if (isset($feed_elt['htmlUrl'])) {
+			$website = html_chars_utf8($feed_elt['htmlUrl']);
+		}
+		$description = '';
+		if (isset($feed_elt['description'])) {
+			$description = html_chars_utf8($feed_elt['description']);
+		}
+
+		$error = false;
+		try {
+			// Create a Feed object and add it in DB
+			$feed = new FreshRSS_Feed($url);
+			$feed->_category($cat->id());
+			$feed->_name($name);
+			$feed->_website($website);
+			$feed->_description($description);
+
+			// addFeedObject checks if feed is already in DB so nothing else to
+			// check here
+			$id = $this->feedDAO->addFeedObject($feed);
+			$error = ($id === false);
+		} catch (FreshRSS_Feed_Exception $e) {
+			Minz_Log::record($e->getMessage(), Minz_Log::WARNING);
+			$error = true;
+		}
+
+		return $error;
+	}
+
+	private function addCategoryOpml($cat_elt, $parent_cat) {
+		// Create a new Category object
+		$cat = new FreshRSS_Category(html_chars_utf8($cat_elt['text']));
+
+		$id = $this->catDAO->addCategoryObject($cat);
+		$error = ($id === false);
+
+		if (isset($cat_elt['@outlines'])) {
+			// Our cat_elt contains more categories or more feeds, so we
+			// add them recursively.
+			// Note: FreshRSS does not support yet category arborescence
+			$res = $this->addOpmlElements($cat_elt['@outlines'], $cat->name());
+			if (!$error && $res) {
+				$error = true;
 			}
 		}
+
+		return $error;
 	}
 
 	private function import_articles($article_file, $starred = false) {

+ 0 - 6
app/Exceptions/OpmlException.php

@@ -1,6 +0,0 @@
-<?php
-class FreshRSS_Opml_Exception extends FreshRSS_Feed_Exception {
-	public function __construct ($name_file) {
-		parent::__construct ('OPML file is invalid');
-	}
-}

+ 12 - 0
app/Models/CategoryDAO.php

@@ -18,6 +18,18 @@ class FreshRSS_CategoryDAO extends Minz_ModelPdo {
 		}
 	}
 
+	public function addCategoryObject($category) {
+		if (!$this->searchByName($category->name())) {
+			// Category does not exist yet in DB so we add it before continue
+			$values = array(
+				'name' => $category->name(),
+			);
+			return $this->addCategory($values);
+		}
+
+		return false;
+	}
+
 	public function updateCategory ($id, $valuesTmp) {
 		$sql = 'UPDATE `' . $this->prefix . 'category` SET name=? WHERE id=?';
 		$stm = $this->bd->prepare ($sql);

+ 29 - 0
app/Models/FeedDAO.php

@@ -24,6 +24,35 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo {
 		}
 	}
 
+	public function addFeedObject($feed) {
+		// TODO: not sure if we should write this method in DAO since DAO
+		// should not be aware about feed class
+
+		// Add feed only if we don't find it in DB
+		if (!$this->searchByUrl($feed->url())) {
+			$values = array(
+				'id' => $feed->id(),
+				'url' => $feed->url(),
+				'category' => $feed->category(),
+				'name' => $feed->name(),
+				'website' => $feed->website(),
+				'description' => $feed->description(),
+				'lastUpdate' => 0,
+				'httpAuth' => $feed->httpAuth()
+			);
+
+			$id = $this->addFeed($values);
+			if ($id) {
+				$feed->_id($id);
+				$feed->faviconPrepare();
+			}
+
+			return $id;
+		}
+
+		return false;
+	}
+
 	public function updateFeed ($id, $valuesTmp) {
 		$set = '';
 		foreach ($valuesTmp as $key => $v) {

+ 29 - 14
app/views/helpers/export/opml.phtml

@@ -1,15 +1,30 @@
 <?php
-require_once(LIB_PATH . '/lib_opml.php');
-
-echo '<?xml version="1.0" encoding="UTF-8" ?>';
-?>
-<!-- Generated by <?php echo Minz_Configuration::title (); ?> -->
-<opml version="2.0">
-	<head>
-		<title><?php echo Minz_Configuration::title (); ?> OPML Feed</title>
-		<dateCreated><?php echo date('D, d M Y H:i:s'); ?></dateCreated>
-	</head>
-	<body>
-<?php echo opml_export ($this->categories); ?>
-	</body>
-</opml>
+
+$opml_array = array(
+	'head' => array(
+		'title' => Minz_Configuration::title(),
+		'dateCreated' => date('D, d M Y H:i:s')
+	),
+	'body' => array()
+);
+
+foreach ($this->categories as $key => $cat) {
+	$opml_array['body'][$key] = array(
+		'text' => $cat['name'],
+		'@outlines' => array()
+	);
+
+	foreach ($cat['feeds'] as $feed) {
+		$opml_array['body'][$key]['@outlines'][] = array(
+			'text' => $feed->name(),
+			'type' => 'rss',
+			'xmlUrl' => $feed->url(),
+			'htmlUrl' => $feed->website(),
+			'description' => htmlspecialchars(
+				$feed->description(), ENT_COMPAT, 'UTF-8'
+			)
+		);
+	}
+}
+
+echo libopml_render($opml_array);

+ 194 - 83
lib/lib_opml.php

@@ -1,23 +1,86 @@
 <?php
-function opml_export ($cats) {
-	$txt = '';
 
-	foreach ($cats as $cat) {
-		$txt .= '<outline text="' . $cat['name'] . '">' . "\n";
-
-		foreach ($cat['feeds'] as $feed) {
-			$txt .= "\t" . '<outline text="' . $feed->name () . '" type="rss" xmlUrl="' . $feed->url () . '" htmlUrl="' . $feed->website () . '" description="' . htmlspecialchars($feed->description(), ENT_COMPAT, 'UTF-8') . '" />' . "\n";
+/* *
+ * lib_opml is a free library to manage OPML format in PHP.
+ * It takes in consideration only version 2.0 (http://dev.opml.org/spec2.html).
+ * Basically it means "text" attribute for outline elements is required.
+ *
+ * lib_opml requires SimpleXML (http://php.net/manual/en/book.simplexml.php)
+ *
+ * Usages:
+ * > include('lib_opml.php');
+ * > $filename = 'my_opml_file.xml';
+ * > $opml_array = libopml_parse_file($filename);
+ * > print_r($opml_array);
+ *
+ * > $opml_string = [...];
+ * > $opml_array = libopml_parse_string($opml_string);
+ * > print_r($opml_array);
+ *
+ * > $opml_array = [...];
+ * > $opml_string = libopml_render($opml_array);
+ * > $opml_object = libopml_render($opml_array, true);
+ * > echo $opml_string;
+ * > print_r($opml_object);
+ *
+ * If parsing fails for any reason (e.g. not an XML string, does not match with
+ * the specifications), a LibOPML_Exception is raised.
+ *
+ * Author: Marien Fressinaud <dev@marienfressinaud.fr>
+ * Url: https://github.com/marienfressinaud/lib_opml
+ * Version: 0.1
+ * Date: 2014-03-29
+ * License: public domain
+ *
+ * */
+
+class LibOPML_Exception extends Exception {}
+
+
+// These elements are optional
+define('HEAD_ELEMENTS', serialize(array(
+	'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
+	'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
+	'windowLeft', 'windowBottom', 'windowRight'
+)));
+
+
+function libopml_parse_outline($outline_xml) {
+	$outline = array();
+
+	// An outline may contain any kind of attributes but "text" attribute is
+	// required !
+	$text_is_present = false;
+	foreach ($outline_xml->attributes() as $key => $value) {
+		$outline[$key] = (string)$value;
+
+		if ($key === 'text') {
+			$text_is_present = true;
 		}
+	}
 
-		$txt .= '</outline>' . "\n";
+	if (!$text_is_present) {
+		throw new LibOPML_Exception(
+			'Outline does not contain any text attribute'
+		);
 	}
 
-	return $txt;
+	foreach ($outline_xml->children() as $key => $value) {
+		// An outline may contain any number of outline children
+		if ($key === 'outline') {
+			$outline['@outlines'][] = libopml_parse_outline($value);
+		} else {
+			throw new LibOPML_Exception(
+				'Body can contain only outline elements'
+			);
+		}
+	}
+
+	return $outline;
 }
 
-function opml_import ($xml) {
-	$xml = html_only_entity_decode($xml);	//!\ Assume UTF-8
 
+function libopml_parse_string($xml) {
 	$dom = new DOMDocument();
 	$dom->recover = true;
 	$dom->strictErrorChecking = false;
@@ -27,94 +90,142 @@ function opml_import ($xml) {
 	$opml = simplexml_import_dom($dom);
 
 	if (!$opml) {
-		throw new FreshRSS_Opml_Exception ();
+		throw new LibOPML_Exception();
 	}
 
-	$catDAO = new FreshRSS_CategoryDAO();
-	$catDAO->checkDefault();
-	$defCat = $catDAO->getDefault();
+	$array = array(
+		'version' => (string)$opml['version'],
+		'head' => array(),
+		'body' => array()
+	);
+
+	// First, we get all "head" elements. Head is required but its sub-elements
+	// are optional.
+	foreach ($opml->head->children() as $key => $value) {
+		if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
+			$array['head'][$key] = (string)$value;
+		} else {
+			throw new LibOPML_Exception(
+				$key . 'is not part of OPML format'
+			);
+		}
+	}
 
-	$categories = array ();
-	$feeds = array ();
+	// Then, we get body oulines. Body must contain at least one outline
+	// element.
+	$at_least_one_outline = false;
+	foreach ($opml->body->children() as $key => $value) {
+		if ($key === 'outline') {
+			$at_least_one_outline = true;
+			$array['body'][] = libopml_parse_outline($value);
+		} else {
+			throw new LibOPML_Exception(
+				'Body can contain only outline elements'
+			);
+		}
+	}
+
+	if (!$at_least_one_outline) {
+		throw new LibOPML_Exception(
+			'Body must contain at least one outline element'
+		);
+	}
 
-	foreach ($opml->body->outline as $outline) {
-		if (!isset ($outline['xmlUrl'])) {
-			// Catégorie
-			$title = '';
+	return $array;
+}
 
-			if (isset ($outline['text'])) {
-				$title = (string) $outline['text'];
-			} elseif (isset ($outline['title'])) {
-				$title = (string) $outline['title'];
-			}
 
-			if ($title) {
-				// Permet d'éviter les soucis au niveau des id :
-				// ceux-ci sont générés en fonction de la date,
-				// un flux pourrait être dans une catégorie X avec l'id Y
-				// alors qu'il existe déjà la catégorie X mais avec l'id Z
-				// Y ne sera pas ajouté et le flux non plus vu que l'id
-				// de sa catégorie n'exisera pas
-				$title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8');
-				$catDAO = new FreshRSS_CategoryDAO ();
-				$cat = $catDAO->searchByName ($title);
-				if ($cat == null) {
-					$cat = new FreshRSS_Category ($title);
-					$values = array (
-						'name' => $cat->name ()
-					);
-					$cat->_id ($catDAO->addCategory ($values));
-				}
-
-				$feeds = array_merge ($feeds, getFeedsOutline ($outline, $cat->id ()));
+function libopml_parse_file($filename) {
+	$file_content = file_get_contents($filename);
+
+	if ($file_content === false) {
+		throw new LibOPML_Exception(
+			$filename . ' cannot be found'
+		);
+	}
+
+	return libopml_parse_string($file_content);
+}
+
+
+function libopml_render_outline($parent_elt, $outline) {
+	// Outline MUST be an array!
+	if (!is_array($outline)) {
+		throw new LibOPML_Exception(
+			'Outline element must be defined as array'
+		);
+	}
+
+	$outline_elt = $parent_elt->addChild('outline');
+	$text_is_present = false;
+	foreach ($outline as $key => $value) {
+		// Only outlines can be an array and so we consider children are also
+		// outline elements.
+		if ($key === '@outlines' && is_array($value)) {
+			foreach ($value as $outline_child) {
+				libopml_render_outline($outline_elt, $outline_child);
 			}
+		} elseif (is_array($value)) {
+			throw new LibOPML_Exception(
+				'Type of outline elements cannot be array: ' . $key
+			);
 		} else {
-			// Flux rss sans catégorie, on récupère l'ajoute dans la catégorie par défaut
-			$feeds[] = getFeed ($outline, $defCat->id());
+			// Detect text attribute is present, that's good :)
+			if ($key === 'text') {
+				$text_is_present = true;
+			}
+
+			$outline_elt->addAttribute($key, $value);
 		}
 	}
 
-	return array ($categories, $feeds);
+	if (!$text_is_present) {
+		throw new LibOPML_Exception(
+			'You must define at least a text element for all outlines'
+		);
+	}
 }
 
-/**
- * import all feeds of a given outline tag
- */
-function getFeedsOutline ($outline, $cat_id) {
-	$feeds = array ();
 
-	foreach ($outline->children () as $child) {
-		if (isset ($child['xmlUrl'])) {
-			$feeds[] = getFeed ($child, $cat_id);
-		} else {
-			$feeds = array_merge(
-				$feeds,
-				getFeedsOutline ($child, $cat_id)
-			);
+function libopml_render($array, $as_xml_object = false) {
+	$opml = new SimpleXMLElement('<opml version="2.0"></opml>');
+
+	// Create head element. $array['head'] is optional but head element will
+	// exist in the final XML object.
+	$head = $opml->addChild('head');
+	if (isset($array['head'])) {
+		foreach ($array['head'] as $key => $value) {
+			if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
+				$head->addChild($key, $value);
+			}
 		}
 	}
 
-	return $feeds;
-}
+	// Check body is set and contains at least one element
+	if (!isset($array['body'])) {
+		throw new LibOPML_Exception(
+			'$array must contain a body element'
+		);
+	}
+	if (count($array['body']) <= 0) {
+		throw new LibOPML_Exception(
+			'Body element must contain at least one element (array)'
+		);
+	}
 
-function getFeed ($outline, $cat_id) {
-	$url = (string) $outline['xmlUrl'];
-	$url = htmlspecialchars($url, ENT_COMPAT, 'UTF-8');
-	$title = '';
-	if (isset ($outline['text'])) {
-		$title = (string) $outline['text'];
-	} elseif (isset ($outline['title'])) {
-		$title = (string) $outline['title'];
-	}
-	$title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8');
-	$feed = new FreshRSS_Feed ($url);
-	$feed->_category ($cat_id);
-	$feed->_name ($title);
-	if (isset($outline['htmlUrl'])) {
-		$feed->_website(htmlspecialchars((string)$outline['htmlUrl'], ENT_COMPAT, 'UTF-8'));
-	}
-	if (isset($outline['description'])) {
-		$feed->_description(sanitizeHTML((string)$outline['description']));
-	}
-	return $feed;
+	// Create outline elements
+	$body = $opml->addChild('body');
+	foreach ($array['body'] as $outline) {
+		libopml_render_outline($body, $outline);
+	}
+
+	// And return the final result
+	if ($as_xml_object) {
+		return $opml;
+	} else {
+		$dom = dom_import_simplexml($opml)->ownerDocument;
+		$dom->formatOutput = true;
+		$dom->encoding = 'UTF-8';
+		return $dom->saveXML();
+	}
 }

+ 4 - 0
lib/lib_rss.php

@@ -244,3 +244,7 @@ function cryptAvailable() {
 	}
 	return false;
 }
+
+function html_chars_utf8($str) {
+	return htmlspecialchars($str, ENT_COMPAT, 'UTF-8');
+}