ImportService.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Provide methods to import files.
  5. */
  6. class FreshRSS_Import_Service {
  7. private readonly FreshRSS_CategoryDAO $catDAO;
  8. private readonly FreshRSS_FeedDAO $feedDAO;
  9. /** true if success, false otherwise */
  10. private bool $lastStatus;
  11. /**
  12. * Initialize the service for the given user.
  13. */
  14. public function __construct(?string $username = null) {
  15. $this->catDAO = FreshRSS_Factory::createCategoryDao($username);
  16. $this->feedDAO = FreshRSS_Factory::createFeedDao($username);
  17. }
  18. /** @return bool true if success, false otherwise */
  19. public function lastStatus(): bool {
  20. return $this->lastStatus;
  21. }
  22. /**
  23. * This method parses and imports an OPML file.
  24. *
  25. * @param string $opml_file the OPML file content.
  26. * @param FreshRSS_Category|null $forced_category force the feeds to be associated to this category.
  27. * @param bool $dry_run true to not create categories and feeds in database.
  28. */
  29. public function importOpml(string $opml_file, ?FreshRSS_Category $forced_category = null, bool $dry_run = false): void {
  30. if (function_exists('set_time_limit')) {
  31. @set_time_limit(300);
  32. }
  33. $this->lastStatus = true;
  34. $opml_array = [];
  35. try {
  36. $libopml = new \marienfressinaud\LibOpml\LibOpml(false);
  37. /** @var array{body:array<array<mixed>>} $opml_array */
  38. $opml_array = $libopml->parseString($opml_file);
  39. } catch (\marienfressinaud\LibOpml\Exception $e) {
  40. self::log($e->getMessage());
  41. $this->lastStatus = false;
  42. return;
  43. }
  44. $this->catDAO->checkDefault();
  45. $default_category = $this->catDAO->getDefault();
  46. if ($default_category === null) {
  47. self::log('Cannot get the default category');
  48. $this->lastStatus = false;
  49. return;
  50. }
  51. // Get the categories by names so we can use this array to retrieve
  52. // existing categories later.
  53. $categories = $this->catDAO->listCategories(prePopulateFeeds: false);
  54. $categories_by_names = [];
  55. foreach ($categories as $category) {
  56. $categories_by_names[$category->name()] = $category;
  57. }
  58. // Get current numbers of categories and feeds, and the limits to
  59. // verify the user can import its categories/feeds.
  60. $nb_categories = count($categories);
  61. $nb_feeds = count($this->feedDAO->listFeeds());
  62. $limits = FreshRSS_Context::systemConf()->limits;
  63. // Process the OPML outlines to get a list of categories and a list of
  64. // feeds elements indexed by their categories names.
  65. [$categories_elements, $categories_to_feeds] = $this->loadFromOutlines($opml_array['body'], '');
  66. foreach ($categories_to_feeds as $category_name => $feeds_elements) {
  67. $category_element = $categories_elements[$category_name] ?? null;
  68. $category = null;
  69. if ($forced_category !== null) {
  70. // If the category is forced, ignore the actual category name
  71. $category = $forced_category;
  72. } elseif (isset($categories_by_names[$category_name])) {
  73. // If the category already exists, get it from $categories_by_names
  74. $category = $categories_by_names[$category_name];
  75. } elseif (is_array($category_element)) {
  76. // Otherwise, create the category (if possible)
  77. $limit_reached = $nb_categories >= $limits['max_categories'];
  78. $can_create_category = FreshRSS_Context::$isCli || !$limit_reached;
  79. if ($can_create_category) {
  80. $category = $this->createCategory($category_element, $dry_run);
  81. if ($category !== null) {
  82. $categories_by_names[$category->name()] = $category;
  83. $nb_categories++;
  84. }
  85. } else {
  86. Minz_Log::warning(
  87. _t('feedback.sub.category.over_max', $limits['max_categories'])
  88. );
  89. }
  90. }
  91. if ($category === null) {
  92. // Category can be null if the feeds weren't in a category
  93. // outline, or if we weren't able to create the category.
  94. $category = $default_category;
  95. }
  96. // Then, create the feeds one by one and attach them to the
  97. // category we just got.
  98. foreach ($feeds_elements as $feed_element) {
  99. $limit_reached = $nb_feeds >= $limits['max_feeds'];
  100. $can_create_feed = FreshRSS_Context::$isCli || !$limit_reached;
  101. if (!$can_create_feed) {
  102. Minz_Log::warning(
  103. _t('feedback.sub.feed.over_max', $limits['max_feeds'])
  104. );
  105. $this->lastStatus = false;
  106. break;
  107. }
  108. if ($this->createFeed($feed_element, $category, $dry_run) !== null) {
  109. // TODO what if the feed already exists in the database?
  110. $nb_feeds++;
  111. } else {
  112. $this->lastStatus = false;
  113. }
  114. }
  115. }
  116. }
  117. /**
  118. * Create a feed from a feed element (i.e. OPML outline).
  119. *
  120. * @param array<string,string> $feed_elt An OPML element (must be a feed element).
  121. * @param FreshRSS_Category $category The category to associate to the feed.
  122. * @param bool $dry_run true to not create the feed in database.
  123. * @return FreshRSS_Feed|null The created feed, or null if it failed.
  124. */
  125. private function createFeed(array $feed_elt, FreshRSS_Category $category, bool $dry_run): ?FreshRSS_Feed {
  126. $url = Minz_Helper::htmlspecialchars_utf8($feed_elt['xmlUrl']);
  127. $name = $feed_elt['text'] ?? $feed_elt['title'] ?? '';
  128. $name = Minz_Helper::htmlspecialchars_utf8($name);
  129. $website = Minz_Helper::htmlspecialchars_utf8($feed_elt['htmlUrl'] ?? '');
  130. $description = Minz_Helper::htmlspecialchars_utf8($feed_elt['description'] ?? '');
  131. try {
  132. // Create a Feed object and add it in DB
  133. $feed = new FreshRSS_Feed($url);
  134. $feed->_category($category);
  135. $feed->_name($name);
  136. $feed->_website($website);
  137. $feed->_description($description);
  138. switch (strtolower($feed_elt['type'] ?? '')) {
  139. case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH):
  140. $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH);
  141. break;
  142. case strtolower(FreshRSS_Export_Service::TYPE_XML_XPATH):
  143. $feed->_kind(FreshRSS_Feed::KIND_XML_XPATH);
  144. break;
  145. case strtolower(FreshRSS_Export_Service::TYPE_JSON_DOTNOTATION):
  146. case strtolower(FreshRSS_Export_Service::TYPE_JSON_DOTPATH):
  147. $feed->_kind(FreshRSS_Feed::KIND_JSON_DOTNOTATION);
  148. break;
  149. case strtolower(FreshRSS_Export_Service::TYPE_JSONFEED):
  150. $feed->_kind(FreshRSS_Feed::KIND_JSONFEED);
  151. break;
  152. case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION):
  153. $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION);
  154. break;
  155. default:
  156. $feed->_kind(FreshRSS_Feed::KIND_RSS);
  157. break;
  158. }
  159. if (isset($feed_elt['frss:cssFullContent'])) {
  160. $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($feed_elt['frss:cssFullContent']));
  161. }
  162. if (isset($feed_elt['frss:cssFullContentConditions'])) {
  163. $feed->_attribute(
  164. 'path_entries_conditions',
  165. preg_split('/\R/u', $feed_elt['frss:cssFullContentConditions']) ?: []
  166. );
  167. }
  168. if (isset($feed_elt['frss:cssContentFilter']) || isset($feed_elt['frss:cssFullContentFilter'])) {
  169. $feed->_attribute('path_entries_filter', $feed_elt['frss:cssContentFilter'] ?? $feed_elt['frss:cssFullContentFilter']);
  170. }
  171. if (isset($feed_elt['frss:filtersActionRead'])) {
  172. $feed->_filtersAction(
  173. 'read',
  174. preg_split('/\R/u', $feed_elt['frss:filtersActionRead']) ?: []
  175. );
  176. }
  177. $xPathSettings = [];
  178. if (isset($feed_elt['frss:xPathItem'])) {
  179. $xPathSettings['item'] = $feed_elt['frss:xPathItem'];
  180. }
  181. if (isset($feed_elt['frss:xPathItemTitle'])) {
  182. $xPathSettings['itemTitle'] = $feed_elt['frss:xPathItemTitle'];
  183. }
  184. if (isset($feed_elt['frss:xPathItemContent'])) {
  185. $xPathSettings['itemContent'] = $feed_elt['frss:xPathItemContent'];
  186. }
  187. if (isset($feed_elt['frss:xPathItemUri'])) {
  188. $xPathSettings['itemUri'] = $feed_elt['frss:xPathItemUri'];
  189. }
  190. if (isset($feed_elt['frss:xPathItemAuthor'])) {
  191. $xPathSettings['itemAuthor'] = $feed_elt['frss:xPathItemAuthor'];
  192. }
  193. if (isset($feed_elt['frss:xPathItemTimestamp'])) {
  194. $xPathSettings['itemTimestamp'] = $feed_elt['frss:xPathItemTimestamp'];
  195. }
  196. if (isset($feed_elt['frss:xPathItemTimeFormat'])) {
  197. $xPathSettings['itemTimeFormat'] = $feed_elt['frss:xPathItemTimeFormat'];
  198. }
  199. if (isset($feed_elt['frss:xPathItemThumbnail'])) {
  200. $xPathSettings['itemThumbnail'] = $feed_elt['frss:xPathItemThumbnail'];
  201. }
  202. if (isset($feed_elt['frss:xPathItemCategories'])) {
  203. $xPathSettings['itemCategories'] = $feed_elt['frss:xPathItemCategories'];
  204. }
  205. if (isset($feed_elt['frss:xPathItemUid'])) {
  206. $xPathSettings['itemUid'] = $feed_elt['frss:xPathItemUid'];
  207. }
  208. if (!empty($xPathSettings)) {
  209. $feed->_attribute('xpath', $xPathSettings);
  210. }
  211. $jsonSettings = [];
  212. if (isset($feed_elt['frss:jsonItem'])) {
  213. $jsonSettings['item'] = $feed_elt['frss:jsonItem'];
  214. }
  215. if (isset($feed_elt['frss:jsonItemTitle'])) {
  216. $jsonSettings['itemTitle'] = $feed_elt['frss:jsonItemTitle'];
  217. }
  218. if (isset($feed_elt['frss:jsonItemContent'])) {
  219. $jsonSettings['itemContent'] = $feed_elt['frss:jsonItemContent'];
  220. }
  221. if (isset($feed_elt['frss:jsonItemUri'])) {
  222. $jsonSettings['itemUri'] = $feed_elt['frss:jsonItemUri'];
  223. }
  224. if (isset($feed_elt['frss:jsonItemAuthor'])) {
  225. $jsonSettings['itemAuthor'] = $feed_elt['frss:jsonItemAuthor'];
  226. }
  227. if (isset($feed_elt['frss:jsonItemTimestamp'])) {
  228. $jsonSettings['itemTimestamp'] = $feed_elt['frss:jsonItemTimestamp'];
  229. }
  230. if (isset($feed_elt['frss:jsonItemTimeFormat'])) {
  231. $jsonSettings['itemTimeFormat'] = $feed_elt['frss:jsonItemTimeFormat'];
  232. }
  233. if (isset($feed_elt['frss:jsonItemThumbnail'])) {
  234. $jsonSettings['itemThumbnail'] = $feed_elt['frss:jsonItemThumbnail'];
  235. }
  236. if (isset($feed_elt['frss:jsonItemCategories'])) {
  237. $jsonSettings['itemCategories'] = $feed_elt['frss:jsonItemCategories'];
  238. }
  239. if (isset($feed_elt['frss:jsonItemUid'])) {
  240. $jsonSettings['itemUid'] = $feed_elt['frss:jsonItemUid'];
  241. }
  242. if (!empty($jsonSettings)) {
  243. $feed->_attribute('json_dotnotation', $jsonSettings);
  244. }
  245. $feed->_attribute('xPathToJson', $feed_elt['frss:xPathToJson'] ?? null);
  246. $curl_params = [];
  247. if (isset($feed_elt['frss:CURLOPT_COOKIE'])) {
  248. $curl_params[CURLOPT_COOKIE] = $feed_elt['frss:CURLOPT_COOKIE'];
  249. }
  250. if (isset($feed_elt['frss:CURLOPT_COOKIEFILE'])) {
  251. // Allow only an empty value just to enable the libcurl cookie engine
  252. $curl_params[CURLOPT_COOKIEFILE] = '';
  253. }
  254. if (isset($feed_elt['frss:CURLOPT_FOLLOWLOCATION'])) {
  255. $curl_params[CURLOPT_FOLLOWLOCATION] = (bool)$feed_elt['frss:CURLOPT_FOLLOWLOCATION'];
  256. }
  257. if (isset($feed_elt['frss:CURLOPT_HTTPHEADER'])) {
  258. $curl_params[CURLOPT_HTTPHEADER] = preg_split('/\R/u', $feed_elt['frss:CURLOPT_HTTPHEADER']) ?: [];
  259. }
  260. if (isset($feed_elt['frss:CURLOPT_MAXREDIRS'])) {
  261. $curl_params[CURLOPT_MAXREDIRS] = (int)$feed_elt['frss:CURLOPT_MAXREDIRS'];
  262. }
  263. if (isset($feed_elt['frss:CURLOPT_POST'])) {
  264. $curl_params[CURLOPT_POST] = (bool)$feed_elt['frss:CURLOPT_POST'];
  265. }
  266. if (isset($feed_elt['frss:CURLOPT_POSTFIELDS'])) {
  267. $curl_params[CURLOPT_POSTFIELDS] = $feed_elt['frss:CURLOPT_POSTFIELDS'];
  268. }
  269. if (isset($feed_elt['frss:CURLOPT_PROXY'])) {
  270. $curl_params[CURLOPT_PROXY] = $feed_elt['frss:CURLOPT_PROXY'];
  271. }
  272. if (isset($feed_elt['frss:CURLOPT_PROXYTYPE'])) {
  273. $curl_params[CURLOPT_PROXYTYPE] = (int)$feed_elt['frss:CURLOPT_PROXYTYPE'];
  274. if ($curl_params[CURLOPT_PROXYTYPE] === 3) { // Legacy for NONE
  275. $curl_params[CURLOPT_PROXYTYPE] = -1;
  276. }
  277. }
  278. if (isset($feed_elt['frss:CURLOPT_USERAGENT'])) {
  279. $curl_params[CURLOPT_USERAGENT] = $feed_elt['frss:CURLOPT_USERAGENT'];
  280. }
  281. if (!empty($curl_params)) {
  282. $feed->_attribute('curl_params', $curl_params);
  283. }
  284. // Call the extension hook
  285. /** @var FreshRSS_Feed|null */
  286. $feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed);
  287. if ($dry_run) {
  288. if ($feed !== null) {
  289. $category->addFeed($feed);
  290. }
  291. return $feed;
  292. }
  293. if ($feed !== null) {
  294. // addFeedObject checks if feed is already in DB
  295. $id = $this->feedDAO->addFeedObject($feed);
  296. if ($id == false) {
  297. $this->lastStatus = false;
  298. } else {
  299. $feed->_id($id);
  300. $category->addFeed($feed);
  301. return $feed;
  302. }
  303. }
  304. } catch (FreshRSS_Feed_Exception $e) {
  305. self::log($e->getMessage());
  306. $this->lastStatus = false;
  307. }
  308. $clean_url = \SimplePie\Misc::url_remove_credentials($url);
  309. self::log("Cannot create {$clean_url} feed in category {$category->name()}");
  310. return null;
  311. }
  312. /**
  313. * Create and return a category.
  314. *
  315. * @param array<string,string> $category_element An OPML element (must be a category element).
  316. * @param bool $dry_run true to not create the category in database.
  317. * @return FreshRSS_Category|null The created category, or null if it failed.
  318. */
  319. private function createCategory(array $category_element, bool $dry_run): ?FreshRSS_Category {
  320. $name = $category_element['text'] ?? $category_element['title'] ?? '';
  321. $name = Minz_Helper::htmlspecialchars_utf8($name);
  322. $category = new FreshRSS_Category($name);
  323. if (isset($category_element['frss:opmlUrl'])) {
  324. $opml_url = checkUrl($category_element['frss:opmlUrl']);
  325. if ($opml_url != '') {
  326. $category->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML);
  327. $category->_attribute('opml_url', $opml_url);
  328. }
  329. }
  330. if ($dry_run) {
  331. return $category;
  332. }
  333. $id = $this->catDAO->addCategoryObject($category);
  334. if ($id !== false) {
  335. $category->_id($id);
  336. return $category;
  337. } else {
  338. self::log("Cannot create category {$category->name()}");
  339. $this->lastStatus = false;
  340. return null;
  341. }
  342. }
  343. /**
  344. * Return the list of category and feed outlines by categories names.
  345. *
  346. * This method is applied to a list of outlines. It merges the different
  347. * list of feeds from several outlines into one array.
  348. *
  349. * @param array<array<mixed>> $outlines The outlines from which to extract the outlines.
  350. * @param string $parent_category_name The name of the parent category of the current outlines.
  351. * @return array{0:array<string,array<string,string>>,1:array<string,list<array<string,string>>>}
  352. */
  353. private function loadFromOutlines(array $outlines, string $parent_category_name): array {
  354. $categories_elements = [];
  355. $categories_to_feeds = [];
  356. foreach ($outlines as $outline) {
  357. if (!is_array($outline)) {
  358. continue;
  359. }
  360. // Get the categories and feeds from the child outline (it may
  361. // return several categories and feeds if the outline is a category).
  362. [$outline_categories, $outline_categories_to_feeds] = $this->loadFromOutline($outline, $parent_category_name);
  363. // Then, we merge the initial arrays with the arrays returned by
  364. // the outline.
  365. $categories_elements = array_merge($categories_elements, $outline_categories);
  366. foreach ($outline_categories_to_feeds as $category_name => $feeds) {
  367. if (!is_string($category_name) || !is_array($feeds)) {
  368. continue;
  369. }
  370. if (!isset($categories_to_feeds[$category_name])) {
  371. $categories_to_feeds[$category_name] = [];
  372. }
  373. $categories_to_feeds[$category_name] = array_merge(
  374. $categories_to_feeds[$category_name],
  375. $feeds
  376. );
  377. }
  378. }
  379. return [$categories_elements, $categories_to_feeds];
  380. }
  381. /**
  382. * Return the list of category and feed outlines by categories names.
  383. *
  384. * This method is applied to a specific outline. If the outline represents
  385. * a category (i.e. @outlines key exists), it will reapply loadFromOutlines()
  386. * to its children. If the outline represents a feed (i.e. xmlUrl key
  387. * exists), it will add the outline to an array accessible by its category
  388. * name.
  389. *
  390. * @param array<mixed> $outline The outline from which to extract the categories and feeds outlines.
  391. * @param string $parent_category_name The name of the parent category of the current outline.
  392. *
  393. * @return array{0:array<string,array<string,string>>,1:array<string,list<array<string,string>>>}
  394. */
  395. private function loadFromOutline(array $outline, string $parent_category_name): array {
  396. $categories_elements = [];
  397. $categories_to_feeds = [];
  398. if ($parent_category_name === '' && isset($outline['category']) && is_array($outline['category'])) {
  399. // The outline has no parent category, but its OPML category
  400. // attribute is set, so we use it as the category name.
  401. // lib_opml parses this attribute as an array of strings, so we
  402. // rebuild a string here.
  403. $parent_category_name = implode(', ', $outline['category']);
  404. $categories_elements[$parent_category_name] = [
  405. 'text' => $parent_category_name,
  406. ];
  407. }
  408. if (is_array($outline['@outlines'] ?? null)) {
  409. // The outline has children, it’s probably a category
  410. if (!empty($outline['text']) && is_string($outline['text'])) {
  411. $category_name = $outline['text'];
  412. } elseif (!empty($outline['title']) && is_string($outline['title'])) {
  413. $category_name = $outline['title'];
  414. } else {
  415. $category_name = $parent_category_name;
  416. }
  417. $children = array_filter($outline['@outlines'], 'is_array');
  418. [$categories_elements, $categories_to_feeds] = $this->loadFromOutlines($children, $category_name);
  419. unset($outline['@outlines']);
  420. $categories_elements[$category_name] = array_filter($outline, static fn($value, $key) => is_string($key) && is_string($value), ARRAY_FILTER_USE_BOTH);
  421. }
  422. // The xmlUrl means it’s a feed URL: add the outline to the array if it exists.
  423. if (isset($outline['xmlUrl'])) {
  424. if (!isset($categories_to_feeds[$parent_category_name])) {
  425. $categories_to_feeds[$parent_category_name] = [];
  426. }
  427. $feed = array_filter($outline, static fn($value, $key) => is_string($key) && is_string($value), ARRAY_FILTER_USE_BOTH);
  428. $categories_to_feeds[$parent_category_name][] = $feed;
  429. }
  430. return [$categories_elements, $categories_to_feeds];
  431. }
  432. private static function log(string $message): void {
  433. if (FreshRSS_Context::$isCli) {
  434. fwrite(STDERR, "FreshRSS error during OPML import: {$message}\n");
  435. } else {
  436. Minz_Log::warning("Error during OPML import: {$message}");
  437. }
  438. }
  439. }