ImportService.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Provide methods to import files.
  5. */
  6. class FreshRSS_Import_Service {
  7. private FreshRSS_CategoryDAO $catDAO;
  8. private FreshRSS_FeedDAO $feedDAO;
  9. /** true if success, false otherwise */
  10. private bool $lastStatus;
  11. /**
  12. * Initialize the service for the given user.
  13. */
  14. public function __construct(?string $username = null) {
  15. $this->catDAO = FreshRSS_Factory::createCategoryDao($username);
  16. $this->feedDAO = FreshRSS_Factory::createFeedDao($username);
  17. }
  18. /** @return bool true if success, false otherwise */
  19. public function lastStatus(): bool {
  20. return $this->lastStatus;
  21. }
  22. /**
  23. * This method parses and imports an OPML file.
  24. *
  25. * @param string $opml_file the OPML file content.
  26. * @param FreshRSS_Category|null $forced_category force the feeds to be associated to this category.
  27. * @param bool $dry_run true to not create categories and feeds in database.
  28. */
  29. public function importOpml(string $opml_file, ?FreshRSS_Category $forced_category = null, bool $dry_run = false): void {
  30. if (function_exists('set_time_limit')) {
  31. @set_time_limit(300);
  32. }
  33. $this->lastStatus = true;
  34. $opml_array = [];
  35. try {
  36. $libopml = new \marienfressinaud\LibOpml\LibOpml(false);
  37. $opml_array = $libopml->parseString($opml_file);
  38. } catch (\marienfressinaud\LibOpml\Exception $e) {
  39. self::log($e->getMessage());
  40. $this->lastStatus = false;
  41. return;
  42. }
  43. $this->catDAO->checkDefault();
  44. $default_category = $this->catDAO->getDefault();
  45. if ($default_category === null) {
  46. self::log('Cannot get the default category');
  47. $this->lastStatus = false;
  48. return;
  49. }
  50. // Get the categories by names so we can use this array to retrieve
  51. // existing categories later.
  52. $categories = $this->catDAO->listCategories(false) ?: [];
  53. $categories_by_names = [];
  54. foreach ($categories as $category) {
  55. $categories_by_names[$category->name()] = $category;
  56. }
  57. // Get current numbers of categories and feeds, and the limits to
  58. // verify the user can import its categories/feeds.
  59. $nb_categories = count($categories);
  60. $nb_feeds = count($this->feedDAO->listFeeds());
  61. $limits = FreshRSS_Context::systemConf()->limits;
  62. // Process the OPML outlines to get a list of categories and a list of
  63. // feeds elements indexed by their categories names.
  64. [$categories_elements, $categories_to_feeds] = $this->loadFromOutlines($opml_array['body'], '');
  65. foreach ($categories_to_feeds as $category_name => $feeds_elements) {
  66. $category_element = $categories_elements[$category_name] ?? null;
  67. $category = null;
  68. if ($forced_category) {
  69. // If the category is forced, ignore the actual category name
  70. $category = $forced_category;
  71. } elseif (isset($categories_by_names[$category_name])) {
  72. // If the category already exists, get it from $categories_by_names
  73. $category = $categories_by_names[$category_name];
  74. } elseif ($category_element) {
  75. // Otherwise, create the category (if possible)
  76. $limit_reached = $nb_categories >= $limits['max_categories'];
  77. $can_create_category = FreshRSS_Context::$isCli || !$limit_reached;
  78. if ($can_create_category) {
  79. $category = $this->createCategory($category_element, $dry_run);
  80. if ($category) {
  81. $categories_by_names[$category->name()] = $category;
  82. $nb_categories++;
  83. }
  84. } else {
  85. Minz_Log::warning(
  86. _t('feedback.sub.category.over_max', $limits['max_categories'])
  87. );
  88. }
  89. }
  90. if (!$category) {
  91. // Category can be null if the feeds weren't in a category
  92. // outline, or if we weren't able to create the category.
  93. $category = $default_category;
  94. }
  95. // Then, create the feeds one by one and attach them to the
  96. // category we just got.
  97. foreach ($feeds_elements as $feed_element) {
  98. $limit_reached = $nb_feeds >= $limits['max_feeds'];
  99. $can_create_feed = FreshRSS_Context::$isCli || !$limit_reached;
  100. if (!$can_create_feed) {
  101. Minz_Log::warning(
  102. _t('feedback.sub.feed.over_max', $limits['max_feeds'])
  103. );
  104. $this->lastStatus = false;
  105. break;
  106. }
  107. if ($this->createFeed($feed_element, $category, $dry_run)) {
  108. // TODO what if the feed already exists in the database?
  109. $nb_feeds++;
  110. } else {
  111. $this->lastStatus = false;
  112. }
  113. }
  114. }
  115. }
  116. /**
  117. * Create a feed from a feed element (i.e. OPML outline).
  118. *
  119. * @param array<string,string> $feed_elt An OPML element (must be a feed element).
  120. * @param FreshRSS_Category $category The category to associate to the feed.
  121. * @param bool $dry_run true to not create the feed in database.
  122. * @return FreshRSS_Feed|null The created feed, or null if it failed.
  123. */
  124. private function createFeed(array $feed_elt, FreshRSS_Category $category, bool $dry_run): ?FreshRSS_Feed {
  125. $url = Minz_Helper::htmlspecialchars_utf8($feed_elt['xmlUrl']);
  126. $name = $feed_elt['text'] ?? $feed_elt['title'] ?? '';
  127. $name = Minz_Helper::htmlspecialchars_utf8($name);
  128. $website = Minz_Helper::htmlspecialchars_utf8($feed_elt['htmlUrl'] ?? '');
  129. $description = Minz_Helper::htmlspecialchars_utf8($feed_elt['description'] ?? '');
  130. try {
  131. // Create a Feed object and add it in DB
  132. $feed = new FreshRSS_Feed($url);
  133. $category->addFeed($feed);
  134. $feed->_name($name);
  135. $feed->_website($website);
  136. $feed->_description($description);
  137. switch (strtolower($feed_elt['type'] ?? '')) {
  138. case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH):
  139. $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH);
  140. break;
  141. case strtolower(FreshRSS_Export_Service::TYPE_XML_XPATH):
  142. $feed->_kind(FreshRSS_Feed::KIND_XML_XPATH);
  143. break;
  144. case strtolower(FreshRSS_Export_Service::TYPE_RSS_ATOM):
  145. default:
  146. $feed->_kind(FreshRSS_Feed::KIND_RSS);
  147. break;
  148. }
  149. if (isset($feed_elt['frss:cssFullContent'])) {
  150. $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($feed_elt['frss:cssFullContent']));
  151. }
  152. if (isset($feed_elt['frss:cssFullContentFilter'])) {
  153. $feed->_attribute('path_entries_filter', $feed_elt['frss:cssFullContentFilter']);
  154. }
  155. if (isset($feed_elt['frss:filtersActionRead'])) {
  156. $feed->_filtersAction(
  157. 'read',
  158. preg_split('/\R/', $feed_elt['frss:filtersActionRead']) ?: []
  159. );
  160. }
  161. $xPathSettings = [];
  162. if (isset($feed_elt['frss:xPathItem'])) {
  163. $xPathSettings['item'] = $feed_elt['frss:xPathItem'];
  164. }
  165. if (isset($feed_elt['frss:xPathItemTitle'])) {
  166. $xPathSettings['itemTitle'] = $feed_elt['frss:xPathItemTitle'];
  167. }
  168. if (isset($feed_elt['frss:xPathItemContent'])) {
  169. $xPathSettings['itemContent'] = $feed_elt['frss:xPathItemContent'];
  170. }
  171. if (isset($feed_elt['frss:xPathItemUri'])) {
  172. $xPathSettings['itemUri'] = $feed_elt['frss:xPathItemUri'];
  173. }
  174. if (isset($feed_elt['frss:xPathItemAuthor'])) {
  175. $xPathSettings['itemAuthor'] = $feed_elt['frss:xPathItemAuthor'];
  176. }
  177. if (isset($feed_elt['frss:xPathItemTimestamp'])) {
  178. $xPathSettings['itemTimestamp'] = $feed_elt['frss:xPathItemTimestamp'];
  179. }
  180. if (isset($feed_elt['frss:xPathItemTimeFormat'])) {
  181. $xPathSettings['itemTimeFormat'] = $feed_elt['frss:xPathItemTimeFormat'];
  182. }
  183. if (isset($feed_elt['frss:xPathItemThumbnail'])) {
  184. $xPathSettings['itemThumbnail'] = $feed_elt['frss:xPathItemThumbnail'];
  185. }
  186. if (isset($feed_elt['frss:xPathItemCategories'])) {
  187. $xPathSettings['itemCategories'] = $feed_elt['frss:xPathItemCategories'];
  188. }
  189. if (isset($feed_elt['frss:xPathItemUid'])) {
  190. $xPathSettings['itemUid'] = $feed_elt['frss:xPathItemUid'];
  191. }
  192. if (!empty($xPathSettings)) {
  193. $feed->_attribute('xpath', $xPathSettings);
  194. }
  195. // Call the extension hook
  196. /** @var FreshRSS_Feed|null */
  197. $feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed);
  198. if ($dry_run) {
  199. return $feed;
  200. }
  201. if ($feed != null) {
  202. // addFeedObject checks if feed is already in DB
  203. $id = $this->feedDAO->addFeedObject($feed);
  204. if ($id == false) {
  205. $this->lastStatus = false;
  206. } else {
  207. $feed->_id($id);
  208. return $feed;
  209. }
  210. }
  211. } catch (FreshRSS_Feed_Exception $e) {
  212. self::log($e->getMessage());
  213. $this->lastStatus = false;
  214. }
  215. $clean_url = SimplePie_Misc::url_remove_credentials($url);
  216. self::log("Cannot create {$clean_url} feed in category {$category->name()}");
  217. return null;
  218. }
  219. /**
  220. * Create and return a category.
  221. *
  222. * @param array<string,string> $category_element An OPML element (must be a category element).
  223. * @param bool $dry_run true to not create the category in database.
  224. * @return FreshRSS_Category|null The created category, or null if it failed.
  225. */
  226. private function createCategory(array $category_element, bool $dry_run): ?FreshRSS_Category {
  227. $name = $category_element['text'] ?? $category_element['title'] ?? '';
  228. $name = Minz_Helper::htmlspecialchars_utf8($name);
  229. $category = new FreshRSS_Category($name);
  230. if (isset($category_element['frss:opmlUrl'])) {
  231. $opml_url = checkUrl($category_element['frss:opmlUrl']);
  232. if ($opml_url != '') {
  233. $category->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML);
  234. $category->_attribute('opml_url', $opml_url);
  235. }
  236. }
  237. if ($dry_run) {
  238. return $category;
  239. }
  240. $id = $this->catDAO->addCategoryObject($category);
  241. if ($id !== false) {
  242. $category->_id($id);
  243. return $category;
  244. } else {
  245. self::log("Cannot create category {$category->name()}");
  246. $this->lastStatus = false;
  247. return null;
  248. }
  249. }
  250. /**
  251. * Return the list of category and feed outlines by categories names.
  252. *
  253. * This method is applied to a list of outlines. It merges the different
  254. * list of feeds from several outlines into one array.
  255. *
  256. * @param array<mixed> $outlines
  257. * The outlines from which to extract the outlines.
  258. * @param string $parent_category_name
  259. * The name of the parent category of the current outlines.
  260. * @return array{0:array<mixed>,1:array<mixed>}
  261. */
  262. private function loadFromOutlines(array $outlines, string $parent_category_name): array {
  263. $categories_elements = [];
  264. $categories_to_feeds = [];
  265. foreach ($outlines as $outline) {
  266. // Get the categories and feeds from the child outline (it may
  267. // return several categories and feeds if the outline is a category).
  268. [$outline_categories, $outline_categories_to_feeds] = $this->loadFromOutline($outline, $parent_category_name);
  269. // Then, we merge the initial arrays with the arrays returned by
  270. // the outline.
  271. $categories_elements = array_merge($categories_elements, $outline_categories);
  272. foreach ($outline_categories_to_feeds as $category_name => $feeds) {
  273. if (!isset($categories_to_feeds[$category_name])) {
  274. $categories_to_feeds[$category_name] = [];
  275. }
  276. $categories_to_feeds[$category_name] = array_merge(
  277. $categories_to_feeds[$category_name],
  278. $feeds
  279. );
  280. }
  281. }
  282. return [$categories_elements, $categories_to_feeds];
  283. }
  284. /**
  285. * Return the list of category and feed outlines by categories names.
  286. *
  287. * This method is applied to a specific outline. If the outline represents
  288. * a category (i.e. @outlines key exists), it will reapply loadFromOutlines()
  289. * to its children. If the outline represents a feed (i.e. xmlUrl key
  290. * exists), it will add the outline to an array accessible by its category
  291. * name.
  292. *
  293. * @param array<mixed> $outline
  294. * The outline from which to extract the categories and feeds outlines.
  295. * @param string $parent_category_name
  296. * The name of the parent category of the current outline.
  297. *
  298. * @return array{0:array<string,mixed>,1:array<string,mixed>}
  299. */
  300. private function loadFromOutline($outline, $parent_category_name): array {
  301. $categories_elements = [];
  302. $categories_to_feeds = [];
  303. if ($parent_category_name === '' && isset($outline['category'])) {
  304. // The outline has no parent category, but its OPML category
  305. // attribute is set, so we use it as the category name.
  306. // lib_opml parses this attribute as an array of strings, so we
  307. // rebuild a string here.
  308. $parent_category_name = implode(', ', $outline['category']);
  309. $categories_elements[$parent_category_name] = [
  310. 'text' => $parent_category_name,
  311. ];
  312. }
  313. if (isset($outline['@outlines'])) {
  314. // The outline has children, it’s probably a category
  315. if (!empty($outline['text'])) {
  316. $category_name = $outline['text'];
  317. } elseif (!empty($outline['title'])) {
  318. $category_name = $outline['title'];
  319. } else {
  320. $category_name = $parent_category_name;
  321. }
  322. [$categories_elements, $categories_to_feeds] = $this->loadFromOutlines($outline['@outlines'], $category_name);
  323. unset($outline['@outlines']);
  324. $categories_elements[$category_name] = $outline;
  325. }
  326. // The xmlUrl means it’s a feed URL: add the outline to the array if it exists.
  327. if (isset($outline['xmlUrl'])) {
  328. if (!isset($categories_to_feeds[$parent_category_name])) {
  329. $categories_to_feeds[$parent_category_name] = [];
  330. }
  331. $categories_to_feeds[$parent_category_name][] = $outline;
  332. }
  333. return [$categories_elements, $categories_to_feeds];
  334. }
  335. private static function log(string $message): void {
  336. if (FreshRSS_Context::$isCli) {
  337. fwrite(STDERR, "FreshRSS error during OPML import: {$message}\n");
  338. } else {
  339. Minz_Log::warning("Error during OPML import: {$message}");
  340. }
  341. }
  342. }