Przeglądaj źródła

Use cURL for GET full content (#1913)

* Use cURL for GET full content

Fix https://github.com/FreshRSS/FreshRSS/issues/1870

* Changelog 1870

https://github.com/FreshRSS/FreshRSS/issues/1870
https://github.com/FreshRSS/FreshRSS/pull/1913
Alexandre Alapetite 7 lat temu
rodzic
commit
ccc62b0a2c
3 zmienionych plików z 42 dodań i 4 usunięć
  1. 1 0
      CHANGELOG.md
  2. 2 1
      app/Models/Entry.php
  3. 39 3
      lib/lib_rss.php

+ 1 - 0
CHANGELOG.md

@@ -45,6 +45,7 @@
 	* Updated German [#1856](https://github.com/FreshRSS/FreshRSS/pull/1856)
 	* Updated German [#1856](https://github.com/FreshRSS/FreshRSS/pull/1856)
 	* Updated Dutch [#1903](https://github.com/FreshRSS/FreshRSS/pull/1903)
 	* Updated Dutch [#1903](https://github.com/FreshRSS/FreshRSS/pull/1903)
 * Misc.
 * Misc.
+	* Use cURL for fetching full articles content [#1870](https://github.com/FreshRSS/FreshRSS/issues/1870)
 	* Add error log information when SQLite has not enough temp space [#1816](https://github.com/FreshRSS/FreshRSS/issues/1816)
 	* Add error log information when SQLite has not enough temp space [#1816](https://github.com/FreshRSS/FreshRSS/issues/1816)
 	* Allow extension dir to be a symlink [#1911](https://github.com/FreshRSS/FreshRSS/pull/1911)
 	* Allow extension dir to be a symlink [#1911](https://github.com/FreshRSS/FreshRSS/pull/1911)
 
 

+ 2 - 1
app/Models/Entry.php

@@ -193,7 +193,8 @@ class FreshRSS_Entry extends Minz_Model {
 				try {
 				try {
 					// l'article n'est pas en BDD, on va le chercher sur le site
 					// l'article n'est pas en BDD, on va le chercher sur le site
 					$this->content = get_content_by_parsing(
 					$this->content = get_content_by_parsing(
-						htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries
+						htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries,
+						$this->feed->attributes()
 					);
 					);
 				} catch (Exception $e) {
 				} catch (Exception $e) {
 					// rien à faire, on garde l'ancien contenu(requête a échoué)
 					// rien à faire, on garde l'ancien contenu(requête a échoué)

+ 39 - 3
lib/lib_rss.php

@@ -254,11 +254,47 @@ function sanitizeHTML($data, $base = '') {
 }
 }
 
 
 /* permet de récupérer le contenu d'un article pour un flux qui n'est pas complet */
 /* permet de récupérer le contenu d'un article pour un flux qui n'est pas complet */
-function get_content_by_parsing ($url, $path) {
+function get_content_by_parsing($url, $path, $attributes = array()) {
 	require_once(LIB_PATH . '/lib_phpQuery.php');
 	require_once(LIB_PATH . '/lib_phpQuery.php');
+	$system_conf = Minz_Configuration::get('system');
+	$limits = $system_conf->limits;
+	$feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']);
+
+	if ($system_conf->simplepie_syslog_enabled) {
+		syslog(LOG_INFO, 'FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url));
+	}
+
+	$ch = curl_init();
+	curl_setopt_array($ch, array(
+		CURLOPT_URL => $url,
+		CURLOPT_REFERER => SimplePie_Misc::url_remove_credentials($url),
+		CURLOPT_HTTPHEADER => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
+		CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
+		CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
+		CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
+		//CURLOPT_FAILONERROR => true;
+		CURLOPT_MAXREDIRS => 4,
+		CURLOPT_RETURNTRANSFER => true,
+	));
+	if (version_compare(PHP_VERSION, '5.6.0') >= 0 || ini_get('open_basedir') == '') {
+		curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);	//Keep option separated for open_basedir PHP bug 65646
+	}
+	if (defined('CURLOPT_ENCODING')) {
+		curl_setopt($ch, CURLOPT_ENCODING, '');	//Enable all encodings
+	}
+	curl_setopt_array($ch, $system_conf->curl_options);
+	if (isset($attributes['ssl_verify'])) {
+		curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, $attributes['ssl_verify'] ? 2 : 0);
+		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, $attributes['ssl_verify'] ? true : false);
+	}
+	$html = curl_exec($ch);
+	$c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+	$c_error = curl_error($ch);
+	curl_close($ch);
 
 
-	Minz_Log::notice('FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url));
-	$html = file_get_contents($url);
+	if ($c_status != 200 || $c_error != '') {
+		Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
+	}
 
 
 	if ($html) {
 	if ($html) {
 		$doc = phpQuery::newDocument($html);
 		$doc = phpQuery::newDocument($html);