dotNotationUtil.php 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. <?php
  2. declare(strict_types=1);
  3. final class FreshRSS_dotNotation_Util
  4. {
  5. /**
  6. * Get an item from an array using "dot" notation.
  7. * Functions adapted from https://stackoverflow.com/a/39118759
  8. * https://github.com/illuminate/support/blob/52e8f314b8043860b1c09e5c2c7e8cca94aafc7d/Arr.php#L270-L305
  9. * Newer version in
  10. * https://github.com/laravel/framework/blob/10.x/src/Illuminate/Collections/Arr.php#L302-L337
  11. *
  12. * @param \ArrayAccess<string,mixed>|array<string,mixed>|mixed $array
  13. */
  14. public static function get($array, ?string $key, mixed $default = null): mixed {
  15. if (!static::accessible($array)) {
  16. return static::value($default);
  17. }
  18. /** @var \ArrayAccess<string,mixed>|array<string,mixed> $array */
  19. if ($key === null) {
  20. return $array;
  21. }
  22. $key = trim($key);
  23. if (in_array($key, ['', '.', '$'], true)) {
  24. return $array;
  25. }
  26. // If the key is a simple string, return the text
  27. if (preg_match('/^(?P<delim>[\'"])(?P<text>[^&]*)(?P=delim)$/', $key, $matches)) {
  28. $text = $matches['text'];
  29. $text = str_replace('&', '&', $text); // Unescape `&`
  30. return $text;
  31. }
  32. // Escape `&` operator
  33. $key = preg_replace_callback('/(?P<delim>[\'"])(?P<text>.*?)(?P=delim)/',
  34. fn(array $matches): string => str_replace('&', '&', $matches[0]),
  35. $key) ?? $key;
  36. // If the key contains string concatenations with `&`, process them
  37. $concats = explode('&', $key);
  38. if (count($concats) > 1) {
  39. $text = '';
  40. foreach ($concats as $concat) {
  41. $result = static::get($array, $concat, $default);
  42. if (is_scalar($result)) {
  43. $text .= (string)$result;
  44. }
  45. }
  46. return $text;
  47. }
  48. // Compatibility with brackets path such as `items[0].value`
  49. $key = preg_replace('/\[(\d+)\]/', '.$1', $key);
  50. if ($key === null) {
  51. return null;
  52. }
  53. if (static::exists($array, $key)) {
  54. return $array[$key];
  55. }
  56. if (str_contains($key, '.') === false) {
  57. return $array[$key] ?? static::value($default);
  58. }
  59. foreach (explode('.', $key) as $segment) {
  60. if (static::accessible($array) && static::exists($array, $segment)) {
  61. $array = $array[$segment];
  62. } else {
  63. return static::value($default);
  64. }
  65. }
  66. return $array;
  67. }
  68. /**
  69. * Get a string from an array using "dot" notation.
  70. *
  71. * @param \ArrayAccess<string,mixed>|array<string,mixed>|mixed $array
  72. */
  73. public static function getString($array, ?string $key): ?string {
  74. $result = self::get($array, $key, null);
  75. return is_string($result) || is_bool($result) || is_float($result) || is_int($result) ? (string)$result : null;
  76. }
  77. /**
  78. * Determine whether the given value is array accessible.
  79. */
  80. private static function accessible(mixed $value): bool {
  81. return is_array($value) || $value instanceof \ArrayAccess;
  82. }
  83. /**
  84. * Determine if the given key exists in the provided array.
  85. *
  86. * @param \ArrayAccess<string,mixed>|array<string,mixed>|mixed $array
  87. * @phpstan-assert-if-true \ArrayAccess<string,mixed>|array<string,mixed> $array
  88. */
  89. private static function exists($array, string $key): bool {
  90. if ($array instanceof \ArrayAccess) {
  91. return $array->offsetExists($key);
  92. }
  93. if (is_array($array)) {
  94. return array_key_exists($key, $array);
  95. }
  96. return false;
  97. }
  98. private static function value(mixed $value): mixed {
  99. return $value instanceof Closure ? $value() : $value;
  100. }
  101. /**
  102. * Convert a JSON object to a RSS document
  103. * mapping fields from the JSON object into RSS equivalents
  104. * according to the dot-separated paths
  105. *
  106. * @param array<int|string,mixed> $jf json feed
  107. * @param string $feedSourceUrl the source URL for the feed
  108. * @param array<string,string> $dotNotation dot notation to map JSON into RSS
  109. * @param string $defaultRssTitle Default title of the RSS feed, if not already provided in dotNotation `feedTitle`
  110. */
  111. public static function convertJsonToRss(array $jf, string $feedSourceUrl, array $dotNotation, string $defaultRssTitle = ''): ?string {
  112. if (!isset($dotNotation['item']) || $dotNotation['item'] === '') {
  113. return null; //no definition of item path, but we can't scrape anything without knowing this
  114. }
  115. $view = new FreshRSS_View();
  116. $view->_path('index/rss.phtml');
  117. $view->internal_rendering = true;
  118. $view->rss_url = htmlspecialchars($feedSourceUrl, ENT_COMPAT, 'UTF-8');
  119. $view->html_url = $view->rss_url;
  120. $view->entries = [];
  121. $view->rss_title = isset($dotNotation['feedTitle'])
  122. ? (htmlspecialchars(FreshRSS_dotNotation_Util::getString($jf, $dotNotation['feedTitle']) ?? '', ENT_COMPAT, 'UTF-8') ?: $defaultRssTitle)
  123. : $defaultRssTitle;
  124. $jsonItems = FreshRSS_dotNotation_Util::get($jf, $dotNotation['item']);
  125. if (!is_array($jsonItems) || count($jsonItems) === 0) {
  126. return null;
  127. }
  128. foreach ($jsonItems as $jsonItem) {
  129. $rssItem = [];
  130. $rssItem['link'] = isset($dotNotation['itemUri']) ? FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemUri']) ?? '' : '';
  131. if (empty($rssItem['link'])) {
  132. continue;
  133. }
  134. $rssItem['title'] = isset($dotNotation['itemTitle']) ? FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemTitle']) ?? '' : '';
  135. $rssItem['author'] = isset($dotNotation['itemAuthor']) ? FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemAuthor']) ?? '' : '';
  136. $rssItem['timestamp'] = isset($dotNotation['itemTimestamp']) ? FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemTimestamp']) ?? '' : '';
  137. //get simple content, but if a path for HTML content has been provided, replace the simple content with HTML content
  138. $rssItem['content'] = isset($dotNotation['itemContent']) ? FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemContent']) ?? '' : '';
  139. $rssItem['content'] = isset($dotNotation['itemContentHTML'])
  140. ? FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemContentHTML']) ?? ''
  141. : $rssItem['content'];
  142. if (isset($dotNotation['itemTimeFormat']) && is_string($dotNotation['itemTimeFormat'])) {
  143. if ($dotNotation['itemTimeFormat'] === 'U' && strlen($rssItem['timestamp']) > 10) {
  144. // Compatibility with Unix timestamp in milliseconds
  145. $rssItem['timestamp'] = substr($rssItem['timestamp'], 0, -3);
  146. }
  147. $dateTime = DateTime::createFromFormat($dotNotation['itemTimeFormat'], $rssItem['timestamp']);
  148. if ($dateTime != false) {
  149. $rssItem['timestamp'] = $dateTime->format(DateTime::ATOM);
  150. }
  151. }
  152. if (isset($dotNotation['itemCategories'])) {
  153. $jsonItemCategories = FreshRSS_dotNotation_Util::get($jsonItem, $dotNotation['itemCategories']);
  154. if (is_string($jsonItemCategories) && $jsonItemCategories !== '') {
  155. $rssItem['tags'] = [$jsonItemCategories];
  156. } elseif (is_array($jsonItemCategories) && count($jsonItemCategories) > 0) {
  157. $rssItem['tags'] = [];
  158. foreach ($jsonItemCategories as $jsonItemCategory) {
  159. if (is_string($jsonItemCategory)) {
  160. $rssItem['tags'][] = $jsonItemCategory;
  161. }
  162. }
  163. }
  164. }
  165. $rssItem['thumbnail'] = isset($dotNotation['itemThumbnail']) ? FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemThumbnail']) ?? '' : '';
  166. //Enclosures?
  167. if (isset($dotNotation['itemAttachment'])) {
  168. $jsonItemAttachments = FreshRSS_dotNotation_Util::get($jsonItem, $dotNotation['itemAttachment']);
  169. if (is_array($jsonItemAttachments) && count($jsonItemAttachments) > 0) {
  170. $rssItem['attachments'] = [];
  171. foreach ($jsonItemAttachments as $attachment) {
  172. $rssAttachment = [];
  173. $rssAttachment['url'] = isset($dotNotation['itemAttachmentUrl'])
  174. ? FreshRSS_dotNotation_Util::getString($attachment, $dotNotation['itemAttachmentUrl'])
  175. : '';
  176. $rssAttachment['type'] = isset($dotNotation['itemAttachmentType'])
  177. ? FreshRSS_dotNotation_Util::getString($attachment, $dotNotation['itemAttachmentType'])
  178. : '';
  179. $rssAttachment['length'] = isset($dotNotation['itemAttachmentLength'])
  180. ? FreshRSS_dotNotation_Util::get($attachment, $dotNotation['itemAttachmentLength'])
  181. : '';
  182. $rssItem['attachments'][] = $rssAttachment;
  183. }
  184. }
  185. }
  186. if (isset($dotNotation['itemUid'])) {
  187. $rssItem['guid'] = FreshRSS_dotNotation_Util::getString($jsonItem, $dotNotation['itemUid']);
  188. }
  189. if (empty($rssItem['guid'])) {
  190. $rssItem['guid'] = 'urn:sha1:' . sha1($rssItem['title'] . $rssItem['content'] . $rssItem['link']);
  191. }
  192. if ($rssItem['title'] != '' || $rssItem['content'] != '' || $rssItem['link'] != '') {
  193. // HTML-encoding/escaping of the relevant fields (all except 'content')
  194. foreach (['author', 'guid', 'link', 'thumbnail', 'timestamp', 'tags', 'title'] as $key) {
  195. if (!empty($rssItem[$key]) && is_string($rssItem[$key])) {
  196. $rssItem[$key] = Minz_Helper::htmlspecialchars_utf8($rssItem[$key]);
  197. }
  198. }
  199. $view->entries[] = FreshRSS_Entry::fromArray($rssItem);
  200. }
  201. }
  202. return $view->renderToString();
  203. }
  204. }