| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506 |
- <?php
- declare(strict_types=1);
- /**
- * Provide methods to import files.
- */
- class FreshRSS_Import_Service {
- private readonly FreshRSS_CategoryDAO $catDAO;
- private readonly FreshRSS_FeedDAO $feedDAO;
- /** true if success, false otherwise */
- private bool $lastStatus;
- /**
- * Initialize the service for the given user.
- */
- public function __construct(?string $username = null) {
- $this->catDAO = FreshRSS_Factory::createCategoryDao($username);
- $this->feedDAO = FreshRSS_Factory::createFeedDao($username);
- }
- /** @return bool true if success, false otherwise */
- public function lastStatus(): bool {
- return $this->lastStatus;
- }
- /**
- * This method parses and imports an OPML file.
- *
- * @param string $opml_file the OPML file content.
- * @param FreshRSS_Category|null $forced_category force the feeds to be associated to this category.
- * @param bool $dry_run true to not create categories and feeds in database.
- */
- public function importOpml(string $opml_file, ?FreshRSS_Category $forced_category = null, bool $dry_run = false): void {
- if (function_exists('set_time_limit')) {
- @set_time_limit(300);
- }
- $this->lastStatus = true;
- $opml_array = [];
- try {
- $libopml = new \marienfressinaud\LibOpml\LibOpml(strict: false);
- /** @var array{body:array<array<mixed>>} $opml_array */
- $opml_array = $libopml->parseString($opml_file);
- } catch (\marienfressinaud\LibOpml\Exception $e) {
- self::log($e->getMessage());
- $this->lastStatus = false;
- return;
- }
- $this->catDAO->checkDefault();
- $default_category = $this->catDAO->getDefault();
- if ($default_category === null) {
- self::log('Cannot get the default category');
- $this->lastStatus = false;
- return;
- }
- // Get the categories by names so we can use this array to retrieve
- // existing categories later.
- $categories = $this->catDAO->listCategories(prePopulateFeeds: false);
- $categories_by_names = [];
- foreach ($categories as $category) {
- $categories_by_names[$category->name()] = $category;
- }
- // Get current numbers of categories and feeds, and the limits to
- // verify the user can import its categories/feeds.
- $nb_categories = count($categories);
- $nb_feeds = count($this->feedDAO->listFeeds());
- $limits = FreshRSS_Context::systemConf()->limits;
- // Process the OPML outlines to get a list of categories and a list of
- // feeds elements indexed by their categories names.
- [$categories_elements, $categories_to_feeds] = $this->loadFromOutlines($opml_array['body'], '');
- foreach ($categories_to_feeds as $category_name => $feeds_elements) {
- $category_element = $categories_elements[$category_name] ?? null;
- $category = null;
- if ($forced_category !== null) {
- // If the category is forced, ignore the actual category name
- $category = $forced_category;
- } elseif (isset($categories_by_names[$category_name])) {
- // If the category already exists, get it from $categories_by_names
- $category = $categories_by_names[$category_name];
- } elseif (is_array($category_element)) {
- // Otherwise, create the category (if possible)
- $limit_reached = $nb_categories >= $limits['max_categories'];
- $can_create_category = FreshRSS_Context::$isCli || !$limit_reached;
- if ($can_create_category) {
- $category = $this->createCategory($category_element, $dry_run);
- if ($category !== null) {
- $categories_by_names[$category->name()] = $category;
- $nb_categories++;
- }
- } else {
- Minz_Log::warning(
- _t('feedback.sub.category.over_max', $limits['max_categories'])
- );
- }
- }
- if ($category === null) {
- // Category can be null if the feeds weren't in a category
- // outline, or if we weren't able to create the category.
- $category = $default_category;
- }
- // Then, create the feeds one by one and attach them to the
- // category we just got.
- foreach ($feeds_elements as $feed_element) {
- $limit_reached = $nb_feeds >= $limits['max_feeds'];
- $can_create_feed = FreshRSS_Context::$isCli || !$limit_reached;
- if (!$can_create_feed) {
- Minz_Log::warning(
- _t('feedback.sub.feed.over_max', $limits['max_feeds'])
- );
- $this->lastStatus = false;
- break;
- }
- if ($this->createFeed($feed_element, $category, $dry_run) !== null) {
- // TODO what if the feed already exists in the database?
- $nb_feeds++;
- } else {
- $this->lastStatus = false;
- }
- }
- }
- }
- /**
- * Create a feed from a feed element (i.e. OPML outline).
- *
- * @param array<string,string> $feed_elt An OPML element (must be a feed element).
- * @param FreshRSS_Category $category The category to associate to the feed.
- * @param bool $dry_run true to not create the feed in database.
- * @return FreshRSS_Feed|null The created feed, or null if it failed.
- */
- private function createFeed(array $feed_elt, FreshRSS_Category $category, bool $dry_run): ?FreshRSS_Feed {
- $url = Minz_Helper::htmlspecialchars_utf8($feed_elt['xmlUrl']);
- $name = $feed_elt['text'] ?? $feed_elt['title'] ?? '';
- $name = Minz_Helper::htmlspecialchars_utf8($name);
- $website = Minz_Helper::htmlspecialchars_utf8($feed_elt['htmlUrl'] ?? '');
- $description = Minz_Helper::htmlspecialchars_utf8($feed_elt['description'] ?? '');
- try {
- // Create a Feed object and add it in DB
- $feed = new FreshRSS_Feed($url);
- $feed->_category($category);
- $feed->_name($name);
- $feed->_website($website);
- $feed->_description($description);
- switch (strtolower($feed_elt['type'] ?? '')) {
- case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH):
- $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH);
- break;
- case strtolower(FreshRSS_Export_Service::TYPE_XML_XPATH):
- $feed->_kind(FreshRSS_Feed::KIND_XML_XPATH);
- break;
- case strtolower(FreshRSS_Export_Service::TYPE_JSON_DOTNOTATION):
- case strtolower(FreshRSS_Export_Service::TYPE_JSON_DOTPATH):
- $feed->_kind(FreshRSS_Feed::KIND_JSON_DOTNOTATION);
- break;
- case strtolower(FreshRSS_Export_Service::TYPE_JSONFEED):
- $feed->_kind(FreshRSS_Feed::KIND_JSONFEED);
- break;
- case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION):
- $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION);
- break;
- default:
- $feed->_kind(FreshRSS_Feed::KIND_RSS);
- break;
- }
- $feed->_priority(match (strtolower($feed_elt['frss:priority'] ?? '')) {
- FreshRSS_Export_Service::PRIORITY_IMPORTANT => FreshRSS_Feed::PRIORITY_IMPORTANT,
- FreshRSS_Export_Service::PRIORITY_MAIN_STREAM => FreshRSS_Feed::PRIORITY_MAIN_STREAM,
- FreshRSS_Export_Service::PRIORITY_CATEGORY => FreshRSS_Feed::PRIORITY_CATEGORY,
- FreshRSS_Export_Service::PRIORITY_FEED => FreshRSS_Feed::PRIORITY_FEED,
- FreshRSS_Export_Service::PRIORITY_HIDDEN => FreshRSS_Feed::PRIORITY_HIDDEN,
- default => FreshRSS_Feed::PRIORITY_MAIN_STREAM,
- });
- if (isset($feed_elt['frss:unicityCriteria']) && $feed_elt['frss:unicityCriteria'] !== 'id'
- && preg_match('/^[a-z:_-]{2,64}$/', $feed_elt['frss:unicityCriteria'])) {
- $feed->_attribute('unicityCriteria', $feed_elt['frss:unicityCriteria']);
- }
- if (filter_var($feed_elt['frss:unicityCriteriaForced'] ?? '', FILTER_VALIDATE_BOOLEAN)) {
- $feed->_attribute('unicityCriteriaForced', true);
- }
- if (isset($feed_elt['frss:cssFullContent'])) {
- $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($feed_elt['frss:cssFullContent']));
- }
- if (isset($feed_elt['frss:cssFullContentConditions'])) {
- $feed->_attribute(
- 'path_entries_conditions',
- preg_split('/\R/u', $feed_elt['frss:cssFullContentConditions']) ?: []
- );
- }
- if (isset($feed_elt['frss:cssContentFilter']) || isset($feed_elt['frss:cssFullContentFilter'])) {
- $feed->_attribute('path_entries_filter', $feed_elt['frss:cssContentFilter'] ?? $feed_elt['frss:cssFullContentFilter']);
- }
- if (isset($feed_elt['frss:filtersActionRead'])) {
- $feed->_filtersAction(
- 'read',
- preg_split('/\R/u', $feed_elt['frss:filtersActionRead']) ?: []
- );
- }
- $xPathSettings = [];
- if (isset($feed_elt['frss:xPathItem'])) {
- $xPathSettings['item'] = $feed_elt['frss:xPathItem'];
- }
- if (isset($feed_elt['frss:xPathItemTitle'])) {
- $xPathSettings['itemTitle'] = $feed_elt['frss:xPathItemTitle'];
- }
- if (isset($feed_elt['frss:xPathItemContent'])) {
- $xPathSettings['itemContent'] = $feed_elt['frss:xPathItemContent'];
- }
- if (isset($feed_elt['frss:xPathItemUri'])) {
- $xPathSettings['itemUri'] = $feed_elt['frss:xPathItemUri'];
- }
- if (isset($feed_elt['frss:xPathItemAuthor'])) {
- $xPathSettings['itemAuthor'] = $feed_elt['frss:xPathItemAuthor'];
- }
- if (isset($feed_elt['frss:xPathItemTimestamp'])) {
- $xPathSettings['itemTimestamp'] = $feed_elt['frss:xPathItemTimestamp'];
- }
- if (isset($feed_elt['frss:xPathItemTimeFormat'])) {
- $xPathSettings['itemTimeFormat'] = $feed_elt['frss:xPathItemTimeFormat'];
- }
- if (isset($feed_elt['frss:xPathItemThumbnail'])) {
- $xPathSettings['itemThumbnail'] = $feed_elt['frss:xPathItemThumbnail'];
- }
- if (isset($feed_elt['frss:xPathItemCategories'])) {
- $xPathSettings['itemCategories'] = $feed_elt['frss:xPathItemCategories'];
- }
- if (isset($feed_elt['frss:xPathItemUid'])) {
- $xPathSettings['itemUid'] = $feed_elt['frss:xPathItemUid'];
- }
- if (!empty($xPathSettings)) {
- $feed->_attribute('xpath', $xPathSettings);
- }
- $jsonSettings = [];
- if (isset($feed_elt['frss:jsonItem'])) {
- $jsonSettings['item'] = $feed_elt['frss:jsonItem'];
- }
- if (isset($feed_elt['frss:jsonItemTitle'])) {
- $jsonSettings['itemTitle'] = $feed_elt['frss:jsonItemTitle'];
- }
- if (isset($feed_elt['frss:jsonItemContent'])) {
- $jsonSettings['itemContent'] = $feed_elt['frss:jsonItemContent'];
- }
- if (isset($feed_elt['frss:jsonItemUri'])) {
- $jsonSettings['itemUri'] = $feed_elt['frss:jsonItemUri'];
- }
- if (isset($feed_elt['frss:jsonItemAuthor'])) {
- $jsonSettings['itemAuthor'] = $feed_elt['frss:jsonItemAuthor'];
- }
- if (isset($feed_elt['frss:jsonItemTimestamp'])) {
- $jsonSettings['itemTimestamp'] = $feed_elt['frss:jsonItemTimestamp'];
- }
- if (isset($feed_elt['frss:jsonItemTimeFormat'])) {
- $jsonSettings['itemTimeFormat'] = $feed_elt['frss:jsonItemTimeFormat'];
- }
- if (isset($feed_elt['frss:jsonItemThumbnail'])) {
- $jsonSettings['itemThumbnail'] = $feed_elt['frss:jsonItemThumbnail'];
- }
- if (isset($feed_elt['frss:jsonItemCategories'])) {
- $jsonSettings['itemCategories'] = $feed_elt['frss:jsonItemCategories'];
- }
- if (isset($feed_elt['frss:jsonItemUid'])) {
- $jsonSettings['itemUid'] = $feed_elt['frss:jsonItemUid'];
- }
- if (!empty($jsonSettings)) {
- $feed->_attribute('json_dotnotation', $jsonSettings);
- }
- $feed->_attribute('xPathToJson', $feed_elt['frss:xPathToJson'] ?? null);
- $curl_params = [];
- if (isset($feed_elt['frss:CURLOPT_COOKIE'])) {
- $curl_params[CURLOPT_COOKIE] = $feed_elt['frss:CURLOPT_COOKIE'];
- }
- if (isset($feed_elt['frss:CURLOPT_COOKIEFILE'])) {
- // Allow only an empty value just to enable the libcurl cookie engine
- $curl_params[CURLOPT_COOKIEFILE] = '';
- }
- if (isset($feed_elt['frss:CURLOPT_FOLLOWLOCATION'])) {
- $curl_params[CURLOPT_FOLLOWLOCATION] = (bool)$feed_elt['frss:CURLOPT_FOLLOWLOCATION'];
- }
- if (isset($feed_elt['frss:CURLOPT_HTTPHEADER'])) {
- $curl_params[CURLOPT_HTTPHEADER] = preg_split('/\R/u', $feed_elt['frss:CURLOPT_HTTPHEADER']) ?: [];
- }
- if (isset($feed_elt['frss:CURLOPT_MAXREDIRS'])) {
- $curl_params[CURLOPT_MAXREDIRS] = (int)$feed_elt['frss:CURLOPT_MAXREDIRS'];
- }
- if (isset($feed_elt['frss:CURLOPT_POST'])) {
- $curl_params[CURLOPT_POST] = (bool)$feed_elt['frss:CURLOPT_POST'];
- }
- if (isset($feed_elt['frss:CURLOPT_POSTFIELDS'])) {
- $curl_params[CURLOPT_POSTFIELDS] = $feed_elt['frss:CURLOPT_POSTFIELDS'];
- }
- if (isset($feed_elt['frss:CURLOPT_PROXY'])) {
- $curl_params[CURLOPT_PROXY] = $feed_elt['frss:CURLOPT_PROXY'];
- }
- if (isset($feed_elt['frss:CURLOPT_PROXYTYPE'])) {
- $curl_params[CURLOPT_PROXYTYPE] = (int)$feed_elt['frss:CURLOPT_PROXYTYPE'];
- if ($curl_params[CURLOPT_PROXYTYPE] === 3) { // Legacy for NONE
- $curl_params[CURLOPT_PROXYTYPE] = -1;
- }
- }
- if (isset($feed_elt['frss:CURLOPT_USERAGENT'])) {
- $curl_params[CURLOPT_USERAGENT] = $feed_elt['frss:CURLOPT_USERAGENT'];
- }
- if (!empty($curl_params)) {
- $feed->_attribute('curl_params', $curl_params);
- }
- // Call the extension hook
- /** @var FreshRSS_Feed|null */
- $feed = Minz_ExtensionManager::callHook(Minz_HookType::FeedBeforeInsert, $feed);
- if ($dry_run) {
- if ($feed !== null) {
- $category->addFeed($feed);
- }
- return $feed;
- }
- if ($feed !== null) {
- // addFeedObject checks if feed is already in DB
- $id = $this->feedDAO->addFeedObject($feed);
- if ($id == false) {
- $this->lastStatus = false;
- } else {
- $feed->_id($id);
- $category->addFeed($feed);
- return $feed;
- }
- }
- } catch (FreshRSS_Feed_Exception $e) {
- self::log($e->getMessage());
- $this->lastStatus = false;
- }
- $clean_url = \SimplePie\Misc::url_remove_credentials($url);
- self::log("Cannot create {$clean_url} feed in category {$category->name()}");
- return null;
- }
- /**
- * Create and return a category.
- *
- * @param array<string,string> $category_element An OPML element (must be a category element).
- * @param bool $dry_run true to not create the category in database.
- * @return FreshRSS_Category|null The created category, or null if it failed.
- */
- private function createCategory(array $category_element, bool $dry_run): ?FreshRSS_Category {
- $name = $category_element['text'] ?? $category_element['title'] ?? '';
- $name = Minz_Helper::htmlspecialchars_utf8($name);
- $category = new FreshRSS_Category($name);
- if (isset($category_element['frss:opmlUrl'])) {
- $opml_url = FreshRSS_http_Util::checkUrl($category_element['frss:opmlUrl']);
- if ($opml_url != '') {
- $category->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML);
- $category->_attribute('opml_url', $opml_url);
- }
- }
- if ($dry_run) {
- return $category;
- }
- $id = $this->catDAO->addCategoryObject($category);
- if ($id !== false) {
- $category->_id($id);
- return $category;
- } else {
- self::log("Cannot create category {$category->name()}");
- $this->lastStatus = false;
- return null;
- }
- }
- /**
- * Return the list of category and feed outlines by categories names.
- *
- * This method is applied to a list of outlines. It merges the different
- * list of feeds from several outlines into one array.
- *
- * @param array<array<mixed>> $outlines The outlines from which to extract the outlines.
- * @param string $parent_category_name The name of the parent category of the current outlines.
- * @return array{0:array<string,array<string,string>>,1:array<string,list<array<string,string>>>}
- */
- private function loadFromOutlines(array $outlines, string $parent_category_name): array {
- $categories_elements = [];
- $categories_to_feeds = [];
- foreach ($outlines as $outline) {
- if (!is_array($outline)) {
- continue;
- }
- // Get the categories and feeds from the child outline (it may
- // return several categories and feeds if the outline is a category).
- [$outline_categories, $outline_categories_to_feeds] = $this->loadFromOutline($outline, $parent_category_name);
- // Then, we merge the initial arrays with the arrays returned by
- // the outline.
- $categories_elements = array_merge($categories_elements, $outline_categories);
- foreach ($outline_categories_to_feeds as $category_name => $feeds) {
- if (!is_string($category_name) || !is_array($feeds)) {
- continue;
- }
- if (!isset($categories_to_feeds[$category_name])) {
- $categories_to_feeds[$category_name] = [];
- }
- $categories_to_feeds[$category_name] = array_merge(
- $categories_to_feeds[$category_name],
- $feeds
- );
- }
- }
- return [$categories_elements, $categories_to_feeds];
- }
- /**
- * Return the list of category and feed outlines by categories names.
- *
- * This method is applied to a specific outline. If the outline represents
- * a category (i.e. @outlines key exists), it will reapply loadFromOutlines()
- * to its children. If the outline represents a feed (i.e. xmlUrl key
- * exists), it will add the outline to an array accessible by its category
- * name.
- *
- * @param array<mixed> $outline The outline from which to extract the categories and feeds outlines.
- * @param string $parent_category_name The name of the parent category of the current outline.
- *
- * @return array{0:array<string,array<string,string>>,1:array<string,list<array<string,string>>>}
- */
- private function loadFromOutline(array $outline, string $parent_category_name): array {
- $categories_elements = [];
- $categories_to_feeds = [];
- if ($parent_category_name === '' && is_array($outline['category'] ?? null)) {
- // The outline has no parent category, but its OPML category
- // attribute is set, so we use it as the category name.
- // lib_opml parses this attribute as an array of strings, so we
- // rebuild a string here.
- $category_names = array_filter($outline['category'], 'is_string');
- $parent_category_name = implode(', ', $category_names);
- $categories_elements[$parent_category_name] = [
- 'text' => $parent_category_name,
- ];
- }
- if (is_array($outline['@outlines'] ?? null)) {
- // The outline has children, it’s probably a category
- if (!empty($outline['text']) && is_string($outline['text'])) {
- $category_name = $outline['text'];
- } elseif (!empty($outline['title']) && is_string($outline['title'])) {
- $category_name = $outline['title'];
- } else {
- $category_name = $parent_category_name;
- }
- $children = array_filter($outline['@outlines'], 'is_array');
- [$categories_elements, $categories_to_feeds] = $this->loadFromOutlines($children, $category_name);
- unset($outline['@outlines']);
- $categories_elements[$category_name] = array_filter($outline, static fn($value, $key) => is_string($key) && is_string($value), ARRAY_FILTER_USE_BOTH);
- }
- // The xmlUrl means it’s a feed URL: add the outline to the array if it exists.
- if (isset($outline['xmlUrl'])) {
- if (!isset($categories_to_feeds[$parent_category_name])) {
- $categories_to_feeds[$parent_category_name] = [];
- }
- $feed = array_filter($outline, static fn($value, $key) => is_string($key) && is_string($value), ARRAY_FILTER_USE_BOTH);
- $categories_to_feeds[$parent_category_name][] = $feed;
- }
- return [$categories_elements, $categories_to_feeds];
- }
- private static function log(string $message): void {
- if (FreshRSS_Context::$isCli) {
- fwrite(STDERR, "FreshRSS error during OPML import: {$message}\n");
- } else {
- Minz_Log::warning("Error during OPML import: {$message}");
- }
- }
- }
|