Przeglądaj źródła

Create separate `Retry-After` files for proxies (#8029)

* Create separate `Retry-After` files for proxies
Bad proxies are able to send a false `Retry-After` header and affect the availability of feeds (domain-wide) for other users.
This PR starts including the address of the proxy if present in filenames for `Retry-After` to mitigate the issue.

* Reduce code changes

* Sync SimplePie fork
https://github.com/FreshRSS/simplepie/pull/62

---------

Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
Inverle 5 miesięcy temu
rodzic
commit
7d4854a0a4

+ 1 - 1
app/Models/Feed.php

@@ -552,7 +552,7 @@ class FreshRSS_Feed extends Minz_Model {
 					Minz_Exception::ERROR
 				);
 			} else {
-				if (($retryAfter = FreshRSS_http_Util::getRetryAfter($this->url)) > 0) {
+				if (($retryAfter = FreshRSS_http_Util::getRetryAfter($this->url, $this->proxyParam())) > 0) {
 					throw new FreshRSS_Feed_Exception('For that domain, will first retry after ' . date('c', $retryAfter) .
 						'. ' . $this->url(includeCredentials: false), code: 503);
 				}

+ 3 - 2
app/Models/SimplePieResponse.php

@@ -4,7 +4,7 @@ declare(strict_types=1);
 final class FreshRSS_SimplePieResponse extends \SimplePie\File
 {
 	#[\Override]
-	protected function on_http_response($response): void {
+	protected function on_http_response($response, array $curl_options = []): void {
 		syslog(LOG_INFO, 'FreshRSS SimplePie GET ' . $this->get_status_code() . ' ' . \SimplePie\Misc::url_remove_credentials($this->get_final_requested_uri()));
 
 		if (in_array($this->get_status_code(), [429, 503], true)) {
@@ -15,7 +15,8 @@ final class FreshRSS_SimplePieResponse extends \SimplePie\File
 				$headers = [];
 			}
 
-			$retryAfter = FreshRSS_http_Util::setRetryAfter($this->get_final_requested_uri(), $headers['retry-after'] ?? '');
+			$proxy = is_string($curl_options[CURLOPT_PROXY] ?? null) ? $curl_options[CURLOPT_PROXY] : '';
+			$retryAfter = FreshRSS_http_Util::setRetryAfter($this->get_final_requested_uri(), $proxy, $headers['retry-after'] ?? '');
 			if ($retryAfter > 0) {
 				$domain = parse_url($this->get_final_requested_uri(), PHP_URL_HOST);
 				if (is_string($domain) && $domain !== '') {

+ 6 - 6
app/Utils/httpUtil.php

@@ -5,7 +5,7 @@ final class FreshRSS_http_Util {
 
 	private const RETRY_AFTER_PATH = DATA_PATH . '/Retry-After/';
 
-	private static function getRetryAfterFile(string $url): string {
+	private static function getRetryAfterFile(string $url, string $proxy): string {
 		$domain = parse_url($url, PHP_URL_HOST);
 		if (!is_string($domain) || $domain === '') {
 			return '';
@@ -14,7 +14,7 @@ final class FreshRSS_http_Util {
 		if (is_int($port)) {
 			$domain .= ':' . $port;
 		}
-		return self::RETRY_AFTER_PATH . urlencode($domain) . '.txt';
+		return self::RETRY_AFTER_PATH . urlencode($domain) . (empty($proxy) ? '' : ('_' . urlencode($proxy))) . '.txt';
 	}
 
 	/**
@@ -39,11 +39,11 @@ final class FreshRSS_http_Util {
 	 * Check whether the URL needs to wait for a Retry-After period.
 	 * @return int The timestamp of when the Retry-After expires, or 0 if not set.
 	 */
-	public static function getRetryAfter(string $url): int {
+	public static function getRetryAfter(string $url, string $proxy): int {
 		if (rand(0, 30) === 1) {	// Remove old files once in a while
 			self::cleanRetryAfters();
 		}
-		$txt = self::getRetryAfterFile($url);
+		$txt = self::getRetryAfterFile($url, $proxy);
 		if ($txt === '') {
 			return 0;
 		}
@@ -61,8 +61,8 @@ final class FreshRSS_http_Util {
 	/**
 	 * Store the HTTP Retry-After header value of an HTTP `429 Too Many Requests` or `503 Service Unavailable` response.
 	 */
-	public static function setRetryAfter(string $url, string $retryAfter): int {
-		$txt = self::getRetryAfterFile($url);
+	public static function setRetryAfter(string $url, string $proxy, string $retryAfter): int {
+		$txt = self::getRetryAfterFile($url, $proxy);
 		if ($txt === '') {
 			return 0;
 		}

+ 1 - 1
lib/composer.json

@@ -14,7 +14,7 @@
         "marienfressinaud/lib_opml": "0.5.1",
         "phpgt/cssxpath": "v1.4.0",
         "phpmailer/phpmailer": "7.0.0",
-        "simplepie/simplepie": "dev-freshrss#187c2f28c6a7050e46e7bbfa5579552f78a6c1df"
+        "simplepie/simplepie": "dev-freshrss#e7b26b4f01d377dc8174d5d4aee961604534d065"
     },
     "config": {
         "sort-packages": true,

+ 22 - 19
lib/lib_rss.php

@@ -735,7 +735,24 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
 		cleanCache(CLEANCACHE_HOURS);
 	}
 
-	if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) {
+	$options = [];
+	$accept = '';
+	$proxy = is_string(FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] ?? null) ? FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] : '';
+	if (is_array($attributes['curl_params'] ?? null)) {
+		$options = sanitizeCurlParams($attributes['curl_params']);
+		$proxy = is_string($options[CURLOPT_PROXY]) ? $options[CURLOPT_PROXY] : '';
+		if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
+			// Remove headers problematic for security
+			$options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER],
+				fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header));
+			// Add Accept header if it is not set
+			if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) {
+				$options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept;
+			}
+		}
+	}
+
+	if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url, $proxy)) > 0) {
 		Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url));
 		return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true];
 	}
@@ -744,7 +761,6 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
 		syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url));
 	}
 
-	$accept = '';
 	switch ($type) {
 		case 'json':
 			$accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7';
@@ -782,6 +798,9 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
 		//CURLOPT_VERBOSE => 1,	// To debug sent HTTP headers
 	]);
 
+	curl_setopt_array($ch, $options);
+	curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
+
 	$responseHeaders = '';
 	curl_setopt($ch, CURLOPT_HEADERFUNCTION, function (\CurlHandle $ch, string $header) use (&$responseHeaders) {
 		if (trim($header) !== '') {	// Skip e.g. separation with trailer headers
@@ -790,22 +809,6 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
 		return strlen($header);
 	});
 
-	curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
-
-	if (is_array($attributes['curl_params'] ?? null)) {
-		$options = sanitizeCurlParams($attributes['curl_params']);
-		if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
-			// Remove headers problematic for security
-			$options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER],
-				fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header));
-			// Add Accept header if it is not set
-			if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) {
-				$options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept;
-			}
-		}
-		curl_setopt_array($ch, $options);
-	}
-
 	if (isset($attributes['ssl_verify'])) {
 		curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2);
 		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']);
@@ -838,7 +841,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
 		$body = '';
 		Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
 		if (in_array($c_status, [429, 503], true)) {
-			$retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? '');
+			$retryAfter = FreshRSS_http_Util::setRetryAfter($url, $proxy, $headers['retry-after'] ?? '');
 			if ($c_status === 429) {
 				$errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
 			} elseif ($c_status === 503) {

+ 5 - 4
lib/simplepie/simplepie/src/File.php

@@ -145,7 +145,7 @@ class File implements Response
                 $responseHeaders .= "\r\n";
                 if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) {
                     $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS
-                    $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody);
+                    $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options);
                     $this->error = null; // FreshRSS
                     curl_setopt($fp, CURLOPT_ENCODING, 'none');
                     $responseHeaders = '';
@@ -156,7 +156,7 @@ class File implements Response
                 if (curl_errno($fp) !== CURLE_OK) {
                     $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
                     $this->success = false;
-                    $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody);
+                    $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options);
                 } else {
                     // For PHPStan: `curl_exec` returns `false` only on error so the `is_string` check will always pass.
                     \assert(is_string($responseBody));
@@ -164,7 +164,7 @@ class File implements Response
                         // TODO: Replace with `CURLOPT_SUPPRESS_CONNECT_HEADERS` once PHP 7.2 support is dropped.
                         $responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders($responseHeaders);
                     }
-                    $this->on_http_response($responseHeaders . $responseBody);
+                    $this->on_http_response($responseHeaders . $responseBody, $curl_options);
                     if (\PHP_VERSION_ID < 80000) {
                         curl_close($fp);
                     }
@@ -332,8 +332,9 @@ class File implements Response
      * Triggered just after an HTTP response is received.
      * @param string|false $response The raw HTTP response headers and body, or false in case of failure (as returned by curl_exec()).
      * FreshRSS.
+     * @param array<int, mixed> $curl_options
      */
-    protected function on_http_response($response): void
+    protected function on_http_response($response, array $curl_options = []): void
     {
     }