Просмотр исходного кода

New feed mode: HTML + XPath + JSON dot notation (JSON in HTML) (#6888)

* New feed mode: HTML + XPath + JSON dot notation (JSON in HTML)
Same as `JSON+DotNotation` but first extracting the JSON string from an HTML document thanks to an XPath expression.
Example: `//script[@type='application/json']`
fix https://github.com/FreshRSS/FreshRSS/discussions/6876

* JavaScript UI to show/hide new field

* Casing xPathToJson

* Slight renaming
Alexandre Alapetite 1 год назад
Родитель
Сommit
ccb132523a

+ 9 - 1
app/Controllers/feedController.php

@@ -260,7 +260,7 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController {
 				if (!empty($xPathSettings)) {
 					$attributes['xpath'] = $xPathSettings;
 				}
-			} elseif ($feed_kind === FreshRSS_Feed::KIND_JSON_DOTNOTATION) {
+			} elseif ($feed_kind === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed_kind === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) {
 				$jsonSettings = [];
 				if (Minz_Request::paramString('jsonFeedTitle') !== '') {
 					$jsonSettings['feedTitle'] = Minz_Request::paramString('jsonFeedTitle', true);
@@ -298,6 +298,9 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController {
 				if (!empty($jsonSettings)) {
 					$attributes['json_dotnotation'] = $jsonSettings;
 				}
+				if (Minz_Request::paramString('xPathToJson', plaintext: true) !== '') {
+					$attributes['xPathToJson'] = Minz_Request::paramString('xPathToJson', plaintext: true);
+				}
 			}
 
 			try {
@@ -509,6 +512,11 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController {
 					if ($simplePie === null) {
 						throw new FreshRSS_Feed_Exception('JSON Feed parsing failed for [' . $feed->url(false) . ']');
 					}
+				} elseif ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) {
+					$simplePie = $feed->loadJson();
+					if ($simplePie === null) {
+						throw new FreshRSS_Feed_Exception('HTML+XPath+JSON parsing failed for [' . $feed->url(false) . ']');
+					}
 				} else {
 					$simplePie = $feed->load(false, $feedIsNew);
 				}

+ 4 - 1
app/Controllers/subscriptionController.php

@@ -244,7 +244,7 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController {
 					$xPathSettings['itemUid'] = Minz_Request::paramString('xPathItemUid', true);
 				if (!empty($xPathSettings))
 					$feed->_attribute('xpath', $xPathSettings);
-			} elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION) {
+			} elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) {
 				$jsonSettings = [];
 				if (Minz_Request::paramString('jsonFeedTitle') !== '') {
 					$jsonSettings['feedTitle'] = Minz_Request::paramString('jsonFeedTitle', true);
@@ -282,6 +282,9 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController {
 				if (!empty($jsonSettings)) {
 					$feed->_attribute('json_dotnotation', $jsonSettings);
 				}
+				if (Minz_Request::paramString('xPathToJson', plaintext: true) !== '') {
+					$feed->_attribute('xPathToJson', Minz_Request::paramString('xPathToJson', plaintext: true));
+				}
 			}
 
 			$feed->_attribute('path_entries_filter', Minz_Request::paramString('path_entries_filter', true));

+ 31 - 4
app/Models/Feed.php

@@ -32,6 +32,8 @@ class FreshRSS_Feed extends Minz_Model {
 
 	public const KIND_JSONFEED = 25;
 	public const KIND_JSON_DOTNOTATION = 30;
+	/** JSON embedded in HTML */
+	public const KIND_HTML_XPATH_JSON_DOTNOTATION = 35;
 
 	public const PRIORITY_IMPORTANT = 20;
 	public const PRIORITY_MAIN_STREAM = 10;
@@ -639,6 +641,24 @@ class FreshRSS_Feed extends Minz_Model {
 		];
 	}
 
+	private function extractJsonFromHtml(string $html): ?string {
+		$xPathToJson = $this->attributeString('xPathToJson') ?? '';
+		if ($xPathToJson === '') {
+			return null;
+		}
+
+		$doc = new DOMDocument();
+		$doc->recover = true;
+		$doc->strictErrorChecking = false;
+		if (!$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
+			return null;
+		}
+
+		$xpath = new DOMXPath($doc);
+		$json = @$xpath->evaluate('normalize-space(' . $xPathToJson . ')');
+		return is_string($json) ? $json : null;
+	}
+
 	public function loadJson(): ?\SimplePie\SimplePie {
 		if ($this->url == '') {
 			return null;
@@ -648,14 +668,21 @@ class FreshRSS_Feed extends Minz_Model {
 			return null;
 		}
 
-		$httpAccept = 'json';
-		$json = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions());
-		if (strlen($json) <= 0) {
+		$httpAccept = $this->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ? 'html' : 'json';
+		$content = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions());
+		if (strlen($content) <= 0) {
 			return null;
 		}
 
+		if ($this->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) {
+			$content = $this->extractJsonFromHtml($content);
+			if ($content == null) {
+				return null;
+			}
+		}
+
 		//check if the content is actual JSON
-		$jf = json_decode($json, true);
+		$jf = json_decode($content, true);
 		if (json_last_error() !== JSON_ERROR_NONE || !is_array($jf)) {
 			return null;
 		}

+ 1 - 0
app/Services/ExportService.php

@@ -23,6 +23,7 @@ class FreshRSS_Export_Service {
 	final public const TYPE_JSON_DOTPATH = 'JSON+DotPath';	// Legacy 1.24.0-dev
 	final public const TYPE_JSON_DOTNOTATION = 'JSON+DotNotation';
 	final public const TYPE_JSONFEED = 'JSONFeed';
+	final public const TYPE_HTML_XPATH_JSON_DOTNOTATION = 'HTML+XPath+JSON+DotNotation';
 
 	/**
 	 * Initialize the service for the given user.

+ 4 - 0
app/Services/ImportService.php

@@ -168,6 +168,9 @@ class FreshRSS_Import_Service {
 				case strtolower(FreshRSS_Export_Service::TYPE_JSONFEED):
 					$feed->_kind(FreshRSS_Feed::KIND_JSONFEED);
 					break;
+				case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION):
+					$feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION);
+					break;
 				default:
 					$feed->_kind(FreshRSS_Feed::KIND_RSS);
 					break;
@@ -257,6 +260,7 @@ class FreshRSS_Import_Service {
 			if (!empty($jsonSettings)) {
 				$feed->_attribute('json_dotnotation', $jsonSettings);
 			}
+			$feed->_attribute('xPathToJson', $feed_elt['frss:xPathToJson'] ?? null);
 
 			$curl_params = [];
 			if (isset($feed_elt['frss:CURLOPT_COOKIE'])) {

+ 8 - 1
app/i18n/cs/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimální počet článků pro ponechání',
 		'kind' => array(
 			'_' => 'Typ zdroje feedu',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'JSON s tečkovým zápisem používá tečky mezi objekty a závorky pro pole. (e.g. <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'vyhledávání nových <strong>položek</strong><br /><small>(nejdůležitější)</small>',
-					'help' => 'JSON cesta k poli obsahujícímu položky, např.: <code>newsItems</code>',
+					'help' => 'JSON cesta k poli obsahujícímu položky, např.: <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => 'autor položky',
 				'item_categories' => 'štítky položky',

+ 8 - 1
app/i18n/de/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimale Anzahl an Artikeln, die behalten wird',
 		'kind' => array(
 			'_' => 'Art der Feed-Quelle',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Webseite scannen)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'JSON punktnotiert nutzt Punkte zwischen den Objekten und eckige Klammern für Arrays (e.g. <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'News <strong>Items</strong> finden<br /><small>(sehr wichtig)</small>',
-					'help' => 'JSON-Pfad zum Array, das die Items enthält, z.B. <code>newsItems</code>',
+					'help' => 'JSON-Pfad zum Array, das die Items enthält, z.B. <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => 'Item Autor',
 				'item_categories' => 'Item Hashtags',

+ 8 - 1
app/i18n/el/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimum number of articles to keep',	// TODO
 		'kind' => array(
 			'_' => 'Type of feed source',	// TODO
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// TODO
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',	// TODO
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// TODO
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// TODO
 				),
 				'item_author' => 'item author',	// TODO
 				'item_categories' => 'item tags',	// TODO

+ 8 - 1
app/i18n/en-us/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimum number of articles to keep',	// IGNORE
 		'kind' => array(
 			'_' => 'Type of feed source',	// IGNORE
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// IGNORE
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// IGNORE
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// IGNORE
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// IGNORE
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',	// IGNORE
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// IGNORE
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// IGNORE
 				),
 				'item_author' => 'item author',	// IGNORE
 				'item_categories' => 'item tags',	// IGNORE

+ 8 - 1
app/i18n/en/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimum number of articles to keep',
 		'kind' => array(
 			'_' => 'Type of feed source',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',
 				),
 				'item_author' => 'item author',
 				'item_categories' => 'item tags',

+ 8 - 1
app/i18n/es/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Número mínimo de artículos a conservar',
 		'kind' => array(
 			'_' => 'Tipo de origen de la fuente',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'Un punto JSON anotado utiliza puntos entre objetos y corchetes para matrices (ejemplo: <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'buscando nuevos <strong>items</strong><br /><small>(más importante)</small>',
-					'help' => 'Ruta JSON a la matriz que contiene los elementos, ejemplo: <code>newsItems</code>',
+					'help' => 'Ruta JSON a la matriz que contiene los elementos, ejemplo: <code>$</code> o <code>newsItems</code>',
 				),
 				'item_author' => 'autor del item',
 				'item_categories' => 'etiquetas del item',

+ 8 - 1
app/i18n/fa/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => ' حداقل تعداد مقالات برای نگهداری',
 		'kind' => array(
 			'_' => ' نوع منبع خوراک',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => ' HTML + XPath (خراش دادن وب)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',	// TODO
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// TODO
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// TODO
 				),
 				'item_author' => 'item author',	// TODO
 				'item_categories' => 'item tags',	// TODO

+ 8 - 1
app/i18n/fr/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Nombre minimum d’articles à conserver',
 		'kind' => array(
 			'_' => 'Type de source de flux',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON notation point (JSON dans HTML)',
+				'xpath' => array(
+					'_' => 'XPath pour JSON dans HTML',
+					'help' => 'Exemple : <code>//script[@type="application/json"]</code>',
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Moissonnage du Web)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'La notation point pour JSON utilise le point comme séparateur objet, et des crochets pour un tableau : (ex : <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'trouver les <strong>articles</strong><br /><small>(c’est le plus important)</small>',
-					'help' => 'Chemin vers le tableau contenant les articles, par exemple <code>newsItems</code>',
+					'help' => 'Chemin vers le tableau contenant les articles, par exemple <code>$</code> ou <code>newsItems</code>',
 				),
 				'item_author' => 'auteur de l’article',
 				'item_categories' => 'catégories (tags) de l’article',

+ 8 - 1
app/i18n/he/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'מסםר מינימלי של מאמרים לשמור',
 		'kind' => array(
 			'_' => 'Type of feed source',	// TODO
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// TODO
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',	// TODO
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// TODO
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// TODO
 				),
 				'item_author' => 'item author',	// TODO
 				'item_categories' => 'item tags',	// TODO

+ 8 - 1
app/i18n/hu/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Megtartandó cikkek minimális száma',
 		'kind' => array(
 			'_' => 'Hírforrás típusa',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON pontjelölés pontokat használ az objektumok között és zárójeleket a tömbökhöz (pl. <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'hírek keresése <strong>elemek</strong><br /><small>(legfontosabb)</small>',
-					'help' => 'JSON útvonal az elemeket tartalmazó tömbhöz, pl. <code>newsItems</code>',
+					'help' => 'JSON útvonal az elemeket tartalmazó tömbhöz, pl. <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => 'elem szerző',
 				'item_categories' => 'elem címkék',

+ 8 - 1
app/i18n/id/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimum number of articles to keep',	// TODO
 		'kind' => array(
 			'_' => 'Type of feed source',	// TODO
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// TODO
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',	// TODO
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// TODO
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// TODO
 				),
 				'item_author' => 'item author',	// TODO
 				'item_categories' => 'item tags',	// TODO

+ 8 - 1
app/i18n/it/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Numero minimo di articoli da mantenere',
 		'kind' => array(
 			'_' => 'Tipo di sorgente del feed',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => 'ricerca nuovi <strong>elementi</strong><br /><small>(più importante)</small>',
-					'help' => 'percorso JSON per l’array contenente gli elementi, es. <code>newsItems</code>',
+					'help' => 'percorso JSON per l’array contenente gli elementi, es. <code>$</code> o <code>newsItems</code>',
 				),
 				'item_author' => 'autore elemento',
 				'item_categories' => 'tag elemento',

+ 8 - 1
app/i18n/ja/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => '最小数の記事は保持されます',
 		'kind' => array(
 			'_' => 'フィードソースの種類',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (ウェブスクレイピング)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'JSONのドット記法は、オブジェクトの間にドットを使用し、配列には括弧を使用します。例: <code>data.items[0].title</code>',
 				'item' => array(
 					'_' => 'ニュース<strong>項目</strong>を探す<br /><small>(最重要)</small>',
-					'help' => '項目を含む配列へのJSONパス。 例: <code>newsItems</code>',
+					'help' => '項目を含む配列へのJSONパス。 例: <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => '項目の著者',
 				'item_categories' => '項目のタグ',

+ 7 - 0
app/i18n/ko/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => '최소 유지 글 개수',
 		'kind' => array(
 			'_' => '피드 소스 유형',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (웹 스크래핑)',
 				'feed_title' => array(

+ 8 - 1
app/i18n/lv/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimālais saglabājamo izstrādājumu skaits',
 		'kind' => array(
 			'_' => 'Barotnes avota veids',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Tīmekļa nolasīšana)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',	// TODO
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// TODO
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// TODO
 				),
 				'item_author' => 'item author',	// TODO
 				'item_categories' => 'item tags',	// TODO

+ 7 - 0
app/i18n/nl/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimum aantal artikelen om te houden',
 		'kind' => array(
 			'_' => 'Feedbron-type',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(

+ 8 - 1
app/i18n/oc/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Nombre minimum d’articles de servar',
 		'kind' => array(
 			'_' => 'Tipe de font de flux',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => 'finding news <strong>items</strong><br /><small>(most important)</small>',	// TODO
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// TODO
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// TODO
 				),
 				'item_author' => 'item author',	// TODO
 				'item_categories' => 'item tags',	// TODO

+ 8 - 1
app/i18n/pl/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimalna liczba wiadomości do do przechowywania',
 		'kind' => array(
 			'_' => 'Rodzaj źródła kanału',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'JSON oddzielający obiekty kropkami i używający nawiasów kwadratowych dla tablic (na przykład <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'odnajdywanie <strong>wiadomości</strong><br /><small>(najważniejsze)</small>',
-					'help' => 'Ścieżka w JSON-ie do tablicy zawierającej wiadomości, na przykład <code>newsItems</code>',
+					'help' => 'Ścieżka w JSON-ie do tablicy zawierającej wiadomości, na przykład <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => 'autor wiadomości',
 				'item_categories' => 'tagi wiadomości',

+ 8 - 1
app/i18n/pt-br/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Número mínimo de artigos para manter',
 		'kind' => array(
 			'_' => 'Tipo de fonte de alimentação do Feed',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'Um JSON na notação de ponto usa pontos entre os objetos e colchetes para arrays (e.g. <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'encontrando novidades <strong>itens</strong><br /><small>(mais importante)</small>',
-					'help' => 'Caminho do JSON para o array contendo os itens, e.g. <code>newsItems</code>',
+					'help' => 'Caminho do JSON para o array contendo os itens, e.g. <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => 'autor do item',
 				'item_categories' => 'tags dos itens',

+ 8 - 1
app/i18n/ru/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Оставлять статей не менее',
 		'kind' => array(
 			'_' => 'Тип источника ленты',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (парсинг веб-страниц)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'JSON с точечной нотацией использует точки между объектами и квадратные скобки для массивов (например: <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'Найти новые <strong>элементы</strong><br /><small>(самое важное)</small>',
-					'help' => 'JSON-путь к массиву, содержащему элементы, например: <code>newsItems</code>',
+					'help' => 'JSON-путь к массиву, содержащему элементы, например: <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => 'автор элемента',
 				'item_categories' => 'теги элемента',

+ 8 - 1
app/i18n/sk/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'Minimálny počet článkov na uchovanie',
 		'kind' => array(
 			'_' => 'Typ zdroja kanála',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'JSON so zápisom s bodkou používa bodky na oddelenie objekov a zložené zátvorky pre polia (príklad: <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => 'hľadajú sa <strong>položky</strong> noviniek<br /><small>(najdôležitejšie)</small>',
-					'help' => 'JSON cesta k polu obsahujúce položky, príklad: <code>newsItems</code>',
+					'help' => 'JSON cesta k polu obsahujúce položky, príklad: <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => 'autor položky',
 				'item_categories' => 'značky položky',

+ 7 - 0
app/i18n/tr/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => 'En az tutulacak makale sayısı',
 		'kind' => array(
 			'_' => 'Akış kaynağının tipi',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web scraping)',	// IGNORE
 				'feed_title' => array(

+ 8 - 1
app/i18n/zh-cn/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => '至少保存的文章数',
 		'kind' => array(
 			'_' => '订阅源类型',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web 抓取)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'JSON 点表达式(JSON 路径)在对象之间使用点,在数组中使用中括号 (例如 <code>data.items[0].title</code>)',
 				'item' => array(
 					'_' => '寻找新的 <strong>文章</strong><br /><small>(最重要的参数)</small>',
-					'help' => '包含文章数组的 JSON 路径, 例如 <code>newsItems</code>',
+					'help' => '包含文章数组的 JSON 路径, 例如 <code>$</code> or <code>newsItems</code>',	// DIRTY
 				),
 				'item_author' => '文章作者',
 				'item_categories' => '文章标签',

+ 8 - 1
app/i18n/zh-tw/sub.php

@@ -83,6 +83,13 @@ return array(
 		'keep_min' => '至少保存的文章數',
 		'kind' => array(
 			'_' => '訂閱源類型',
+			'html_json' => array(
+				'_' => 'HTML + XPath + JSON dot notation (JSON in HTML)',	// TODO
+				'xpath' => array(
+					'_' => 'XPath for JSON in HTML',	// TODO
+					'help' => 'Example: <code>//script[@type="application/json"]</code>',	// TODO
+				),
+			),
 			'html_xpath' => array(
 				'_' => 'HTML + XPath (Web 抓取)',
 				'feed_title' => array(
@@ -139,7 +146,7 @@ return array(
 				'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)',	// TODO
 				'item' => array(
 					'_' => '找尋新聞 <strong>項目</strong><br /><small>(最重要的)</small>',
-					'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>',	// TODO
+					'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>',	// TODO
 				),
 				'item_author' => '項目作者',
 				'item_categories' => '項目標籤',

+ 7 - 1
app/views/helpers/export/opml.phtml

@@ -33,6 +33,9 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array {
 			case FreshRSS_Feed::KIND_JSONFEED:
 				$outline['type'] = FreshRSS_Export_Service::TYPE_JSONFEED;
 				break;
+			case FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION:
+				$outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION;
+				break;
 		}
 
 		if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH || $feed->kind() === FreshRSS_Feed::KIND_XML_XPATH) {
@@ -48,7 +51,7 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array {
 			$outline['frss:xPathItemThumbnail'] = $xPathSettings['itemThumbnail'] ?? null;
 			$outline['frss:xPathItemCategories'] = $xPathSettings['itemCategories'] ?? null;
 			$outline['frss:xPathItemUid'] = $xPathSettings['itemUid'] ?? null;
-		} elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION) {
+		} elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) {
 			/** @var array<string,string> */
 			$jsonSettings = $feed->attributeArray('json_dotnotation') ?? [];
 			$outline['frss:jsonItem'] = $jsonSettings['item'] ?? null;
@@ -61,6 +64,9 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array {
 			$outline['frss:jsonItemThumbnail'] = $jsonSettings['itemThumbnail'] ?? null;
 			$outline['frss:jsonItemCategories'] = $jsonSettings['itemCategories'] ?? null;
 			$outline['frss:jsonItemUid'] = $jsonSettings['itemUid'] ?? null;
+			if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) {
+				$outline['frss:xPathToJson'] = $feed->attributeString('xPathToJson');
+			}
 		}
 
 		if (!empty($feed->filtersAction('read'))) {

+ 13 - 1
app/views/helpers/feed/update.phtml

@@ -416,7 +416,10 @@
 						<option value="<?= FreshRSS_Feed::KIND_HTML_XPATH ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH ? 'selected="selected"' : '' ?> data-show="html_xpath"><?= _t('sub.feed.kind.html_xpath') ?></option>
 						<option value="<?= FreshRSS_Feed::KIND_XML_XPATH ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'selected="selected"' : '' ?> data-show="html_xpath"><?= _t('sub.feed.kind.xml_xpath') ?></option>
 						<option value="<?= FreshRSS_Feed::KIND_JSONFEED ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_JSONFEED ? 'selected="selected"' : '' ?>><?= _t('sub.feed.kind.jsonfeed') ?></option>
-						<option value="<?= FreshRSS_Feed::KIND_JSON_DOTNOTATION ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION ? 'selected="selected"' : '' ?> data-show="json_dotnotation"><?= _t('sub.feed.kind.json_dotnotation') ?></option>
+						<option value="<?= FreshRSS_Feed::KIND_JSON_DOTNOTATION ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION ? 'selected="selected"' : '' ?>
+							data-show="json_dotnotation"><?= _t('sub.feed.kind.json_dotnotation') ?></option>
+						<option value="<?= FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ? 'selected="selected"' : '' ?>
+							data-show="json_dotnotation xPathToJsonGroup"><?= _t('sub.feed.kind.html_json') ?></option>
 					</select>
 				</div>
 			</div>
@@ -522,7 +525,16 @@
 			<?php
 				/** @var array<string,string> $jsonSettings */
 				$jsonSettings = Minz_Helper::htmlspecialchars_utf8($this->feed->attributeArray('json_dotnotation') ?? []);
+				$xPathToJson = Minz_Helper::htmlspecialchars_utf8($this->feed->attributeString('xPathToJson'));
 			?>
+			<div class="form-group" id="xPathToJsonGroup">
+				<label class="group-name" for="xPathToJson"><?= _t('sub.feed.kind.html_json.xpath') ?></label>
+				<div class="group-controls">
+					<textarea class="valid-xpath w100" name="xPathToJson" id="xPathToJson" rows="2" cols="64" spellcheck="false" data-leave-validation="<?= $xPathToJson ?>"><?= $xPathToJson ?? '' ?></textarea>
+					<p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.html_json.xpath.help') ?></p>
+				</div>
+			</div>
+
 			<p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.json_dotnotation.help') ?></p>
 			<div class="form-group">
 				<label class="group-name" for="jsonItem"><small><?= _t('sub.feed.kind.json_dotnotation.json') ?></small><br />

+ 9 - 0
app/views/subscription/add.phtml

@@ -73,6 +73,7 @@
 						<option value="<?= FreshRSS_Feed::KIND_XML_XPATH ?>" data-show="html_xpath"><?= _t('sub.feed.kind.xml_xpath') ?></option>
 						<option value="<?= FreshRSS_Feed::KIND_JSONFEED ?>"><?= _t('sub.feed.kind.jsonfeed') ?></option>
 						<option value="<?= FreshRSS_Feed::KIND_JSON_DOTNOTATION ?>" data-show="json_dotnotation"><?= _t('sub.feed.kind.json_dotnotation') ?></option>
+						<option value="<?= FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ?>" data-show="json_dotnotation xPathToJsonGroup"><?= _t('sub.feed.kind.html_json') ?></option>
 					</select>
 				</div>
 			</div>
@@ -167,6 +168,14 @@
 				</div>
 			</fieldset>
 			<fieldset id="json_dotnotation">
+				<div class="form-group" id="xPathToJsonGroup">
+					<label class="group-name" for="xPathToJson"><?= _t('sub.feed.kind.html_json.xpath') ?></label>
+					<div class="group-controls">
+						<textarea class="valid-xpath" name="xPathToJson" id="xPathToJson" rows="2" cols="64" spellcheck="false"></textarea>
+						<p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.html_json.xpath.help') ?></p>
+					</div>
+				</div>
+
 				<p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.json_dotnotation.help') ?></p>
 				<div class="form-group">
 					<label class="group-name" for="jsonFeedTitle"><small><?= _t('sub.feed.kind.json_dotnotation.json') ?></small><br />

+ 5 - 0
docs/en/developers/OPML.md

@@ -67,6 +67,11 @@ The following attributes are using similar naming conventions than [RSS-Bridge](
 
 * `<outline type="JSONFeed" ...`: Uses `JSON+DotNotation` behind the scenes to parse a [JSON Feed](https://www.jsonfeed.org/).
 
+### HTML+XPath+JSON
+
+* `<outline type="HTML+XPath+JSON+DotNotation" frss:xPathToJson="..." ...`: Same as `JSON+DotNotation` but first extracting the JSON string from an HTML document thanks to an XPath expression.
+	* Example: `//script[@type='application/json']`
+
 ### cURL
 
 A number of [cURL options](https://curl.se/libcurl/c/curl_easy_setopt.html) are supported:

+ 5 - 5
p/scripts/feed.js

@@ -66,9 +66,6 @@ function init_popup_preview_selector() {
 	});
 }
 
-/**
- * Allow a <select class="select-show"> to hide/show elements defined by <option data-show="elem-id"></option>
- */
 function init_disable_elements_on_update(parent) {
 	const inputs = parent.querySelectorAll('input[data-disable-update]');
 	for (const input of inputs) {
@@ -90,8 +87,11 @@ function init_select_show(parent) {
 		const options = select.querySelectorAll('option[data-show]');
 		const shows = {};	// To allow multiple options to show the same element
 		for (const option of options) {
-			if (!shows[option.dataset.show]) {
-				shows[option.dataset.show] = option.selected;
+			const targets = option.dataset.show.split(' ');	// Allow multiple targets
+			for (const target of targets) {
+				if (!shows[target]) {
+					shows[target] = option.selected;
+				}
 			}
 		}