Browse Source

Décode les entités HTML en conservant les entités XML

N'ayant pas trouvé comment régler SimplePie pour ne pas avoir d'entités
HTML comme `é`, voici un patch qui les décode en sortie de
SimplePie tout en conservant les entités XML comme `&`.
Contribue à https://github.com/marienfressinaud/FreshRSS/issues/247
Alexandre Alapetite 12 years ago
parent
commit
a4fc7becb8
1 changed files with 15 additions and 4 deletions
  1. 15 4
      app/models/Feed.php

+ 15 - 4
app/models/Feed.php

@@ -241,12 +241,22 @@ class Feed extends Model {
 			}
 			}
 		}
 		}
 	}
 	}
+	static function html_only_entity_decode($text) {
+		static $htmlEntitiesOnly = null;
+		if ($htmlEntitiesOnly === null) {
+			$htmlEntitiesOnly = array_flip(array_diff(
+				get_html_translation_table(HTML_ENTITIES, ENT_NOQUOTES, 'UTF-8'),	//Decode HTML entities
+				get_html_translation_table(HTML_SPECIALCHARS, ENT_NOQUOTES, 'UTF-8')	//Preserve XML entities
+			));
+		}
+		return strtr($text, $htmlEntitiesOnly);
+	}
 	private function loadEntries ($feed) {
 	private function loadEntries ($feed) {
 		$entries = array ();
 		$entries = array ();
 
 
 		foreach ($feed->get_items () as $item) {
 		foreach ($feed->get_items () as $item) {
-			$title = strip_tags($item->get_title ());
-			$author = $item->get_author ();
+			$title = self::html_only_entity_decode (strip_tags ($item->get_title ()));
+			$author = self::html_only_entity_decode ($item->get_author ());
 			$link = $item->get_permalink ();
 			$link = $item->get_permalink ();
 			$date = strtotime ($item->get_date ());
 			$date = strtotime ($item->get_date ());
 
 
@@ -255,11 +265,12 @@ class Feed extends Model {
 			$tags = array ();
 			$tags = array ();
 			if (!is_null ($tags_tmp)) {
 			if (!is_null ($tags_tmp)) {
 				foreach ($tags_tmp as $tag) {
 				foreach ($tags_tmp as $tag) {
-					$tags[] = $tag->get_label ();
+					$tags[] = self::html_only_entity_decode ($tag->get_label ());
 				}
 				}
 			}
 			}
 
 
-			$content = $item->get_content ();
+			$content = self::html_only_entity_decode ($item->get_content ());
+
 			$elinks = array();
 			$elinks = array();
 			foreach ($item->get_enclosures() as $enclosure) {
 			foreach ($item->get_enclosures() as $enclosure) {
 				$elink = $enclosure->get_link();
 				$elink = $enclosure->get_link();