Browse Source

Refactor some cURL options and use CURLOPT_USERPWD (#6177)

* Refactor some cURL options and use CURLOPT_USERPWD
fix https://github.com/FreshRSS/FreshRSS/issues/6176

* Fixes
Alexandre Alapetite 2 years ago
parent
commit
d0072b9fb7
5 changed files with 56 additions and 52 deletions
  1. 1 8
      app/Controllers/feedController.php
  2. 9 6
      app/Models/Category.php
  3. 16 13
      app/Models/Entry.php
  4. 22 22
      app/Models/Feed.php
  5. 8 3
      lib/lib_rss.php

+ 1 - 8
app/Controllers/feedController.php

@@ -1141,16 +1141,9 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController {
 			return;
 		}
 
-		$attributes = $feed->attributes();
-		$attributes['path_entries_filter'] = Minz_Request::paramString('selector_filter', true);
-
 		//Fetch & select content.
 		try {
-			$fullContent = FreshRSS_Entry::getContentByParsing(
-				htmlspecialchars_decode($entry->link(), ENT_QUOTES),
-				htmlspecialchars_decode($content_selector, ENT_QUOTES),
-				$attributes
-			);
+			$fullContent = $entry->getContentByParsing();
 
 			if ($fullContent != '') {
 				$this->view->selectorSuccess = true;

+ 9 - 6
app/Models/Category.php

@@ -94,6 +94,11 @@ class FreshRSS_Category extends Minz_Model {
 		return $this->nbNotRead;
 	}
 
+	/** @return array<int,mixed> */
+	public function curlOptions(): array {
+		return [];	// TODO (e.g., credentials for Dynamic OPML)
+	}
+
 	/**
 	 * @return array<int,FreshRSS_Feed>
 	 * @throws Minz_ConfigurationNamespaceException
@@ -158,11 +163,10 @@ class FreshRSS_Category extends Minz_Model {
 	}
 
 	/**
-	 * @param array<string> $attributes
 	 * @throws FreshRSS_Context_Exception
 	 */
-	public static function cacheFilename(string $url, array $attributes): string {
-		$simplePie = customSimplePie($attributes);
+	public function cacheFilename(string $url): string {
+		$simplePie = customSimplePie($this->attributes(), $this->curlOptions());
 		$filename = $simplePie->get_cache_filename($url);
 		return CACHE_PATH . '/' . $filename . '.opml.xml';
 	}
@@ -173,9 +177,8 @@ class FreshRSS_Category extends Minz_Model {
 			return false;
 		}
 		$ok = true;
-		$attributes = [];	//TODO
-		$cachePath = self::cacheFilename($url, $attributes);
-		$opml = httpGet($url, $cachePath, 'opml', $attributes);
+		$cachePath = $this->cacheFilename($url);
+		$opml = httpGet($url, $cachePath, 'opml', $this->attributes(), $this->curlOptions());
 		if ($opml == '') {
 			Minz_Log::warning('Error getting dynamic OPML for category ' . $this->id() . '! ' .
 				SimplePie_Misc::url_remove_credentials($url));

+ 16 - 13
app/Models/Entry.php

@@ -690,12 +690,18 @@ HTML;
 	}
 
 	/**
-	 * @param array<string,mixed> $attributes
+	 * @param string $url Overridden URL. Will default to the entry URL.
 	 * @throws Minz_Exception
 	 */
-	public static function getContentByParsing(string $url, string $path, array $attributes = [], int $maxRedirs = 3): string {
-		$cachePath = FreshRSS_Feed::cacheFilename($url, $attributes, FreshRSS_Feed::KIND_HTML_XPATH);
-		$html = httpGet($url, $cachePath, 'html', $attributes);
+	public function getContentByParsing(string $url = '', int $maxRedirs = 3): string {
+		$url = $url ?: htmlspecialchars_decode($this->link(), ENT_QUOTES);
+		$feed = $this->feed();
+		if ($url === '' || $feed === null || $feed->pathEntries() === '') {
+			return '';
+		}
+
+		$cachePath = $feed->cacheFilename($url . '#' . $feed->pathEntries());
+		$html = httpGet($url, $cachePath, 'html');
 		if (strlen($html) > 0) {
 			$doc = new DOMDocument();
 			$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
@@ -709,7 +715,7 @@ HTML;
 						$refresh = preg_replace('/^[0-9.; ]*\s*(url\s*=)?\s*/i', '', trim($meta->getAttribute('content')));
 						$refresh = SimplePie_Misc::absolutize_url($refresh, $url);
 						if ($refresh != false && $refresh !== $url) {
-							return self::getContentByParsing($refresh, $path, $attributes, $maxRedirs - 1);
+							return $this->getContentByParsing($refresh, $maxRedirs - 1);
 						}
 					}
 				}
@@ -724,11 +730,12 @@ HTML;
 			}
 
 			$content = '';
-			$nodes = $xpath->query((new Gt\CssXPath\Translator($path))->asXPath());
+			$nodes = $xpath->query((new Gt\CssXPath\Translator($feed->pathEntries()))->asXPath());
 			if ($nodes != false) {
+				$path_entries_filter = $feed->attributeString('path_entries_filter');
 				foreach ($nodes as $node) {
-					if (!empty($attributes['path_entries_filter'])) {
-						$filterednodes = $xpath->query((new Gt\CssXPath\Translator($attributes['path_entries_filter']))->asXPath(), $node) ?: [];
+					if ($path_entries_filter != null) {
+						$filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter))->asXPath(), $node) ?: [];
 						foreach ($filterednodes as $filterednode) {
 							if ($filterednode->parentNode === null) {
 								continue;
@@ -760,11 +767,7 @@ HTML;
 			} else {
 				try {
 					// The article is not yet in the database, so let’s fetch it
-					$fullContent = self::getContentByParsing(
-						htmlspecialchars_decode($this->link(), ENT_QUOTES),
-						htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES),
-						$feed->attributes()
-					);
+					$fullContent = $this->getContentByParsing();
 					if ('' !== $fullContent) {
 						$fullContent = "<!-- FULLCONTENT start //-->{$fullContent}<!-- FULLCONTENT end //-->";
 						$originalContent = $this->originalContent();

+ 22 - 22
app/Models/Feed.php

@@ -174,6 +174,16 @@ class FreshRSS_Feed extends Minz_Model {
 				];
 		}
 	}
+
+	/** @return array<int,mixed> */
+	public function curlOptions(): array {
+		$curl_options = [];
+		if ($this->httpAuth !== '') {
+			$curl_options[CURLOPT_USERPWD] = htmlspecialchars_decode($this->httpAuth, ENT_QUOTES);
+		}
+		return $curl_options;
+	}
+
 	public function inError(): bool {
 		return $this->error;
 	}
@@ -348,11 +358,8 @@ class FreshRSS_Feed extends Minz_Model {
 					Minz_Exception::ERROR
 				);
 			} else {
+				$simplePie = customSimplePie($this->attributes(), $this->curlOptions());
 				$url = htmlspecialchars_decode($this->url, ENT_QUOTES);
-				if ($this->httpAuth != '') {
-					$url = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $url) ?? '';
-				}
-				$simplePie = customSimplePie($this->attributes());
 				if (substr($url, -11) === '#force_feed') {
 					$simplePie->force_feed(true);
 					$url = substr($url, 0, -11);
@@ -636,16 +643,12 @@ class FreshRSS_Feed extends Minz_Model {
 			return null;
 		}
 		$feedSourceUrl = htmlspecialchars_decode($this->url, ENT_QUOTES);
-		if ($this->httpAuth != '') {
-			$feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl);
-		}
 		if ($feedSourceUrl == null) {
 			return null;
 		}
 
-		$cachePath = FreshRSS_Feed::cacheFilename($feedSourceUrl, $this->attributes(), $this->kind());
 		$httpAccept = 'json';
-		$json = httpGet($feedSourceUrl, $cachePath, $httpAccept, $this->attributes());
+		$json = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions());
 		if (strlen($json) <= 0) {
 			return null;
 		}
@@ -672,9 +675,6 @@ class FreshRSS_Feed extends Minz_Model {
 			return null;
 		}
 		$feedSourceUrl = htmlspecialchars_decode($this->url, ENT_QUOTES);
-		if ($this->httpAuth != '') {
-			$feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl);
-		}
 		if ($feedSourceUrl == null) {
 			return null;
 		}
@@ -698,9 +698,8 @@ class FreshRSS_Feed extends Minz_Model {
 			return null;
 		}
 
-		$cachePath = FreshRSS_Feed::cacheFilename($feedSourceUrl, $this->attributes(), $this->kind());
-		$html = httpGet($feedSourceUrl, $cachePath,
-			$this->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'xml' : 'html', $this->attributes());
+		$httpAccept = $this->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'xml' : 'html';
+		$html = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions());
 		if (strlen($html) <= 0) {
 			return null;
 		}
@@ -892,15 +891,16 @@ class FreshRSS_Feed extends Minz_Model {
 	}
 
 	/**
-	 * @param array<string,mixed> $attributes
+	 * @param string $url Overridden URL. Will default to the feed URL.
 	 * @throws FreshRSS_Context_Exception
 	 */
-	public static function cacheFilename(string $url, array $attributes, int $kind = FreshRSS_Feed::KIND_RSS): string {
-		$simplePie = customSimplePie($attributes);
+	public function cacheFilename(string $url = ''): string {
+		$simplePie = customSimplePie($this->attributes(), $this->curlOptions());
+		$url = $url ?: htmlspecialchars_decode($this->url);
 		$filename = $simplePie->get_cache_filename($url);
-		if ($kind === FreshRSS_Feed::KIND_HTML_XPATH) {
+		if ($this->kind === FreshRSS_Feed::KIND_HTML_XPATH) {
 			return CACHE_PATH . '/' . $filename . '.html';
-		} elseif ($kind === FreshRSS_Feed::KIND_XML_XPATH) {
+		} elseif ($this->kind === FreshRSS_Feed::KIND_XML_XPATH) {
 			return CACHE_PATH . '/' . $filename . '.xml';
 		} else {
 			return CACHE_PATH . '/' . $filename . '.spc';
@@ -908,12 +908,12 @@ class FreshRSS_Feed extends Minz_Model {
 	}
 
 	public function clearCache(): bool {
-		return @unlink(FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind));
+		return @unlink($this->cacheFilename());
 	}
 
 	/** @return int|false */
 	public function cacheModifiedTime() {
-		$filename = FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind);
+		$filename = $this->cacheFilename();
 		clearstatcache(true, $filename);
 		return @filemtime($filename);
 	}

+ 8 - 3
lib/lib_rss.php

@@ -303,9 +303,10 @@ function sensitive_log($log) {
 
 /**
  * @param array<string,mixed> $attributes
+ * @param array<int,mixed> $curl_options
  * @throws FreshRSS_Context_Exception
  */
-function customSimplePie(array $attributes = array()): SimplePie {
+function customSimplePie(array $attributes = [], array $curl_options = []): SimplePie {
 	$limits = FreshRSS_Context::systemConf()->limits;
 	$simplePie = new SimplePie();
 	$simplePie->set_useragent(FRESHRSS_USERAGENT);
@@ -318,7 +319,7 @@ function customSimplePie(array $attributes = array()): SimplePie {
 	$feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : (int)$attributes['timeout'];
 	$simplePie->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']);
 
-	$curl_options = FreshRSS_Context::systemConf()->curl_options;
+	$curl_options = array_replace(FreshRSS_Context::systemConf()->curl_options, $curl_options);
 	if (isset($attributes['ssl_verify'])) {
 		$curl_options[CURLOPT_SSL_VERIFYHOST] = $attributes['ssl_verify'] ? 2 : 0;
 		$curl_options[CURLOPT_SSL_VERIFYPEER] = (bool)$attributes['ssl_verify'];
@@ -482,8 +483,9 @@ function enforceHttpEncoding(string $html, string $contentType = ''): string {
 /**
  * @param string $type {html,json,opml,xml}
  * @param array<string,mixed> $attributes
+ * @param array<int,mixed> $curl_options
  */
-function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = []): string {
+function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = [], array $curl_options = []): string {
 	$limits = FreshRSS_Context::systemConf()->limits;
 	$feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : intval($attributes['timeout']);
 
@@ -548,6 +550,9 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
 			curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1');
 		}
 	}
+
+	curl_setopt_array($ch, $curl_options);
+
 	$body = curl_exec($ch);
 	$c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
 	$c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE);