Jelajahi Sumber

getContentByParsing follow HTML redirections (#2985)

* getContentByParsing follow HTML redirections

Add the ability to follow HTML redirections in getContentByParsing:

```html
<meta http-equiv="Refresh" content="1; url=https://example.net/article123.html" />
```

* Better regex

* Trim http-equiv
Alexandre Alapetite 5 tahun lalu
induk
melakukan
b906d79d61
1 mengubah file dengan 16 tambahan dan 1 penghapusan
  1. 16 1
      app/Models/Entry.php

+ 16 - 1
app/Models/Entry.php

@@ -352,7 +352,7 @@ class FreshRSS_Entry extends Minz_Model {
 		}
 	}
 
-	public static function getContentByParsing($url, $path, $attributes = array()) {
+	public static function getContentByParsing($url, $path, $attributes = array(), $maxRedirs = 3) {
 		$system_conf = Minz_Configuration::get('system');
 		$limits = $system_conf->limits;
 		$feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']);
@@ -392,6 +392,21 @@ class FreshRSS_Entry extends Minz_Model {
 		if ($html) {
 			require_once(LIB_PATH . '/lib_phpQuery.php');
 			$doc = phpQuery::newDocument($html);
+
+			if ($maxRedirs > 0) {
+				//Follow any HTML redirection
+				$metas = $doc->find('meta[http-equiv][content]');
+				foreach ($metas as $meta) {
+					if (strtolower(trim($meta->getAttribute('http-equiv'))) === 'refresh') {
+						$refresh = preg_replace('/^[0-9.; ]*\s*(url\s*=)?\s*/i', '', trim($meta->getAttribute('content')));
+						$refresh = SimplePie_Misc::absolutize_url($refresh, $url);
+						if ($refresh != false && $refresh !== $url) {
+							return self::getContentByParsing($refresh, $path, $attributes, $maxRedirs - 1);
+						}
+					}
+				}
+			}
+
 			$content = $doc->find($path);
 			return trim(sanitizeHTML($content->__toString(), $url));
 		} else {