ImportService.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. <?php
  2. /**
  3. * Provide methods to import files.
  4. */
  5. class FreshRSS_Import_Service {
  6. /** @var FreshRSS_CategoryDAO */
  7. private $catDAO;
  8. /** @var FreshRSS_FeedDAO */
  9. private $feedDAO;
  10. /** @var bool true if success, false otherwise */
  11. private $lastStatus;
  12. /**
  13. * Initialize the service for the given user.
  14. *
  15. * @param string $username
  16. */
  17. public function __construct($username = null) {
  18. $this->catDAO = FreshRSS_Factory::createCategoryDao($username);
  19. $this->feedDAO = FreshRSS_Factory::createFeedDao($username);
  20. }
  21. /** @return bool true if success, false otherwise */
  22. public function lastStatus(): bool {
  23. return $this->lastStatus;
  24. }
  25. /**
  26. * This method parses and imports an OPML file.
  27. *
  28. * @param string $opml_file the OPML file content.
  29. * @param FreshRSS_Category|null $forced_category force the feeds to be associated to this category.
  30. * @param boolean $dry_run true to not create categories and feeds in database.
  31. */
  32. public function importOpml(string $opml_file, $forced_category = null, $dry_run = false) {
  33. @set_time_limit(300);
  34. $this->lastStatus = true;
  35. $opml_array = array();
  36. try {
  37. $libopml = new \marienfressinaud\LibOpml\LibOpml(false);
  38. $opml_array = $libopml->parseString($opml_file);
  39. } catch (\marienfressinaud\LibOpml\Exception $e) {
  40. self::log($e->getMessage());
  41. $this->lastStatus = false;
  42. return;
  43. }
  44. $this->catDAO->checkDefault();
  45. $default_category = $this->catDAO->getDefault();
  46. if (!$default_category) {
  47. self::log('Cannot get the default category');
  48. $this->lastStatus = false;
  49. return;
  50. }
  51. // Get the categories by names so we can use this array to retrieve
  52. // existing categories later.
  53. $categories = $this->catDAO->listCategories(false);
  54. $categories_by_names = [];
  55. foreach ($categories as $category) {
  56. $categories_by_names[$category->name()] = $category;
  57. }
  58. // Get current numbers of categories and feeds, and the limits to
  59. // verify the user can import its categories/feeds.
  60. $nb_categories = count($categories);
  61. $nb_feeds = count($this->feedDAO->listFeeds());
  62. $limits = FreshRSS_Context::$system_conf->limits;
  63. // Process the OPML outlines to get a list of categories and a list of
  64. // feeds elements indexed by their categories names.
  65. list (
  66. $categories_elements,
  67. $categories_to_feeds,
  68. ) = $this->loadFromOutlines($opml_array['body'], '');
  69. foreach ($categories_to_feeds as $category_name => $feeds_elements) {
  70. $category_element = $categories_elements[$category_name] ?? null;
  71. $category = null;
  72. if ($forced_category) {
  73. // If the category is forced, ignore the actual category name
  74. $category = $forced_category;
  75. } elseif (isset($categories_by_names[$category_name])) {
  76. // If the category already exists, get it from $categories_by_names
  77. $category = $categories_by_names[$category_name];
  78. } elseif ($category_element) {
  79. // Otherwise, create the category (if possible)
  80. $limit_reached = $nb_categories >= $limits['max_categories'];
  81. $can_create_category = FreshRSS_Context::$isCli || !$limit_reached;
  82. if ($can_create_category) {
  83. $category = $this->createCategory($category_element, $dry_run);
  84. if ($category) {
  85. $categories_by_names[$category->name()] = $category;
  86. $nb_categories++;
  87. }
  88. } else {
  89. Minz_Log::warning(
  90. _t('feedback.sub.category.over_max', $limits['max_categories'])
  91. );
  92. }
  93. }
  94. if (!$category) {
  95. // Category can be null if the feeds weren't in a category
  96. // outline, or if we weren't able to create the category.
  97. $category = $default_category;
  98. }
  99. // Then, create the feeds one by one and attach them to the
  100. // category we just got.
  101. foreach ($feeds_elements as $feed_element) {
  102. $limit_reached = $nb_feeds >= $limits['max_feeds'];
  103. $can_create_feed = FreshRSS_Context::$isCli || !$limit_reached;
  104. if (!$can_create_feed) {
  105. Minz_Log::warning(
  106. _t('feedback.sub.feed.over_max', $limits['max_feeds'])
  107. );
  108. $this->lastStatus = false;
  109. break;
  110. }
  111. if ($this->createFeed($feed_element, $category, $dry_run)) {
  112. // TODO what if the feed already exists in the database?
  113. $nb_feeds++;
  114. } else {
  115. $this->lastStatus = false;
  116. }
  117. }
  118. }
  119. return;
  120. }
  121. /**
  122. * Create a feed from a feed element (i.e. OPML outline).
  123. *
  124. * @param array<string, string> $feed_elt An OPML element (must be a feed element).
  125. * @param FreshRSS_Category $category The category to associate to the feed.
  126. * @param boolean $dry_run true to not create the feed in database.
  127. *
  128. * @return FreshRSS_Feed|null The created feed, or null if it failed.
  129. */
  130. private function createFeed($feed_elt, $category, $dry_run) {
  131. $url = Minz_Helper::htmlspecialchars_utf8($feed_elt['xmlUrl']);
  132. $name = $feed_elt['text'] ?? $feed_elt['title'] ?? '';
  133. $name = Minz_Helper::htmlspecialchars_utf8($name);
  134. $website = Minz_Helper::htmlspecialchars_utf8($feed_elt['htmlUrl'] ?? '');
  135. $description = Minz_Helper::htmlspecialchars_utf8($feed_elt['description'] ?? '');
  136. try {
  137. // Create a Feed object and add it in DB
  138. $feed = new FreshRSS_Feed($url);
  139. $feed->_categoryId($category->id());
  140. $category->addFeed($feed);
  141. $feed->_name($name);
  142. $feed->_website($website);
  143. $feed->_description($description);
  144. switch (strtolower($feed_elt['type'] ?? '')) {
  145. case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH):
  146. $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH);
  147. break;
  148. case strtolower(FreshRSS_Export_Service::TYPE_XML_XPATH):
  149. $feed->_kind(FreshRSS_Feed::KIND_XML_XPATH);
  150. break;
  151. case strtolower(FreshRSS_Export_Service::TYPE_RSS_ATOM):
  152. default:
  153. $feed->_kind(FreshRSS_Feed::KIND_RSS);
  154. break;
  155. }
  156. if (isset($feed_elt['frss:cssFullContent'])) {
  157. $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($feed_elt['frss:cssFullContent']));
  158. }
  159. if (isset($feed_elt['frss:cssFullContentFilter'])) {
  160. $feed->_attributes('path_entries_filter', $feed_elt['frss:cssFullContentFilter']);
  161. }
  162. if (isset($feed_elt['frss:filtersActionRead'])) {
  163. $feed->_filtersAction(
  164. 'read',
  165. preg_split('/[\n\r]+/', $feed_elt['frss:filtersActionRead'])
  166. );
  167. }
  168. $xPathSettings = [];
  169. if (isset($feed_elt['frss:xPathItem'])) {
  170. $xPathSettings['item'] = $feed_elt['frss:xPathItem'];
  171. }
  172. if (isset($feed_elt['frss:xPathItemTitle'])) {
  173. $xPathSettings['itemTitle'] = $feed_elt['frss:xPathItemTitle'];
  174. }
  175. if (isset($feed_elt['frss:xPathItemContent'])) {
  176. $xPathSettings['itemContent'] = $feed_elt['frss:xPathItemContent'];
  177. }
  178. if (isset($feed_elt['frss:xPathItemUri'])) {
  179. $xPathSettings['itemUri'] = $feed_elt['frss:xPathItemUri'];
  180. }
  181. if (isset($feed_elt['frss:xPathItemAuthor'])) {
  182. $xPathSettings['itemAuthor'] = $feed_elt['frss:xPathItemAuthor'];
  183. }
  184. if (isset($feed_elt['frss:xPathItemTimestamp'])) {
  185. $xPathSettings['itemTimestamp'] = $feed_elt['frss:xPathItemTimestamp'];
  186. }
  187. if (isset($feed_elt['frss:xPathItemTimeFormat'])) {
  188. $xPathSettings['itemTimeFormat'] = $feed_elt['frss:xPathItemTimeFormat'];
  189. }
  190. if (isset($feed_elt['frss:xPathItemThumbnail'])) {
  191. $xPathSettings['itemThumbnail'] = $feed_elt['frss:xPathItemThumbnail'];
  192. }
  193. if (isset($feed_elt['frss:xPathItemCategories'])) {
  194. $xPathSettings['itemCategories'] = $feed_elt['frss:xPathItemCategories'];
  195. }
  196. if (isset($feed_elt['frss:xPathItemUid'])) {
  197. $xPathSettings['itemUid'] = $feed_elt['frss:xPathItemUid'];
  198. }
  199. if (!empty($xPathSettings)) {
  200. $feed->_attributes('xpath', $xPathSettings);
  201. }
  202. // Call the extension hook
  203. /** @var FreshRSS_Feed|null */
  204. $feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed);
  205. if ($dry_run) {
  206. return $feed;
  207. }
  208. if ($feed != null) {
  209. // addFeedObject checks if feed is already in DB
  210. $id = $this->feedDAO->addFeedObject($feed);
  211. if ($id == false) {
  212. $this->lastStatus = false;
  213. } else {
  214. $feed->_id($id);
  215. return $feed;
  216. }
  217. }
  218. } catch (FreshRSS_Feed_Exception $e) {
  219. self::log($e->getMessage());
  220. $this->lastStatus = false;
  221. }
  222. $clean_url = SimplePie_Misc::url_remove_credentials($url);
  223. self::log("Cannot create {$clean_url} feed in category {$category->name()}");
  224. return null;
  225. }
  226. /**
  227. * Create and return a category.
  228. *
  229. * @param array<string, string> $category_element An OPML element (must be a category element).
  230. * @param boolean $dry_run true to not create the category in database.
  231. *
  232. * @return FreshRSS_Category|null The created category, or null if it failed.
  233. */
  234. private function createCategory($category_element, $dry_run) {
  235. $name = $category_element['text'] ?? $category_element['title'] ?? '';
  236. $name = Minz_Helper::htmlspecialchars_utf8($name);
  237. $category = new FreshRSS_Category($name);
  238. if (isset($category_element['frss:opmlUrl'])) {
  239. $opml_url = checkUrl($category_element['frss:opmlUrl']);
  240. if ($opml_url != '') {
  241. $category->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML);
  242. $category->_attributes('opml_url', $opml_url);
  243. }
  244. }
  245. if ($dry_run) {
  246. return $category;
  247. }
  248. $id = $this->catDAO->addCategoryObject($category);
  249. if ($id !== false) {
  250. $category->_id($id);
  251. return $category;
  252. } else {
  253. self::log("Cannot create category {$category->name()}");
  254. $this->lastStatus = false;
  255. return null;
  256. }
  257. }
  258. /**
  259. * Return the list of category and feed outlines by categories names.
  260. *
  261. * This method is applied to a list of outlines. It merges the different
  262. * list of feeds from several outlines into one array.
  263. *
  264. * @param array $outlines
  265. * The outlines from which to extract the outlines.
  266. * @param string $parent_category_name
  267. * The name of the parent category of the current outlines.
  268. *
  269. * @return array[]
  270. */
  271. private function loadFromOutlines($outlines, $parent_category_name) {
  272. $categories_elements = [];
  273. $categories_to_feeds = [];
  274. foreach ($outlines as $outline) {
  275. // Get the categories and feeds from the child outline (it may
  276. // return several categories and feeds if the outline is a category).
  277. list (
  278. $outline_categories,
  279. $outline_categories_to_feeds,
  280. ) = $this->loadFromOutline($outline, $parent_category_name);
  281. // Then, we merge the initial arrays with the arrays returned by
  282. // the outline.
  283. $categories_elements = array_merge($categories_elements, $outline_categories);
  284. foreach ($outline_categories_to_feeds as $category_name => $feeds) {
  285. if (!isset($categories_to_feeds[$category_name])) {
  286. $categories_to_feeds[$category_name] = [];
  287. }
  288. $categories_to_feeds[$category_name] = array_merge(
  289. $categories_to_feeds[$category_name],
  290. $feeds
  291. );
  292. }
  293. }
  294. return [$categories_elements, $categories_to_feeds];
  295. }
  296. /**
  297. * Return the list of category and feed outlines by categories names.
  298. *
  299. * This method is applied to a specific outline. If the outline represents
  300. * a category (i.e. @outlines key exists), it will reapply loadFromOutlines()
  301. * to its children. If the outline represents a feed (i.e. xmlUrl key
  302. * exists), it will add the outline to an array accessible by its category
  303. * name.
  304. *
  305. * @param array $outline
  306. * The outline from which to extract the categories and feeds outlines.
  307. * @param string $parent_category_name
  308. * The name of the parent category of the current outline.
  309. *
  310. * @return array[]
  311. */
  312. private function loadFromOutline($outline, $parent_category_name) {
  313. $categories_elements = [];
  314. $categories_to_feeds = [];
  315. if ($parent_category_name === '' && isset($outline['category'])) {
  316. // The outline has no parent category, but its OPML category
  317. // attribute is set, so we use it as the category name.
  318. // lib_opml parses this attribute as an array of strings, so we
  319. // rebuild a string here.
  320. $parent_category_name = implode(', ', $outline['category']);
  321. $categories_elements[$parent_category_name] = [
  322. 'text' => $parent_category_name,
  323. ];
  324. }
  325. if (isset($outline['@outlines'])) {
  326. // The outline has children, it's probably a category
  327. if (!empty($outline['text'])) {
  328. $category_name = $outline['text'];
  329. } elseif (!empty($outline['title'])) {
  330. $category_name = $outline['title'];
  331. } else {
  332. $category_name = $parent_category_name;
  333. }
  334. list (
  335. $categories_elements,
  336. $categories_to_feeds,
  337. ) = $this->loadFromOutlines($outline['@outlines'], $category_name);
  338. unset($outline['@outlines']);
  339. $categories_elements[$category_name] = $outline;
  340. }
  341. // The xmlUrl means it's a feed URL: add the outline to the array if it
  342. // exists.
  343. if (isset($outline['xmlUrl'])) {
  344. if (!isset($categories_to_feeds[$parent_category_name])) {
  345. $categories_to_feeds[$parent_category_name] = [];
  346. }
  347. $categories_to_feeds[$parent_category_name][] = $outline;
  348. }
  349. return [$categories_elements, $categories_to_feeds];
  350. }
  351. private static function log($message) {
  352. if (FreshRSS_Context::$isCli) {
  353. fwrite(STDERR, "FreshRSS error during OPML import: {$message}\n");
  354. } else {
  355. Minz_Log::warning("Error during OPML import: {$message}");
  356. }
  357. }
  358. }