finder_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package subscription
  4. import (
  5. "strings"
  6. "testing"
  7. )
  8. func TestFindYoutubePlaylistFeed(t *testing.T) {
  9. type testResult struct {
  10. websiteURL string
  11. feedURL string
  12. discoveryError bool
  13. }
  14. scenarios := []testResult{
  15. // Video URL
  16. {
  17. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
  18. feedURL: "",
  19. },
  20. // Video URL with position argument
  21. {
  22. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
  23. feedURL: "",
  24. },
  25. // Video URL with position argument
  26. {
  27. websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
  28. feedURL: "",
  29. },
  30. // Channel URL
  31. {
  32. websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  33. feedURL: "",
  34. },
  35. // Channel URL with name
  36. {
  37. websiteURL: "https://www.youtube.com/@ABCDEFG",
  38. feedURL: "",
  39. },
  40. // Playlist URL
  41. {
  42. websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  43. feedURL: "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  44. },
  45. // Playlist URL with video ID
  46. {
  47. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  48. feedURL: "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  49. },
  50. // Playlist URL with video ID and index argument
  51. {
  52. websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
  53. feedURL: "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  54. },
  55. // Non-Youtube URL
  56. {
  57. websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  58. feedURL: "",
  59. },
  60. // Invalid URL
  61. {
  62. websiteURL: "https://example|org/",
  63. feedURL: "",
  64. discoveryError: true,
  65. },
  66. }
  67. for _, scenario := range scenarios {
  68. subscriptions, localizedError := NewSubscriptionFinder(nil).FindSubscriptionsFromYouTubePlaylistPage(scenario.websiteURL)
  69. if scenario.discoveryError {
  70. if localizedError == nil {
  71. t.Fatalf(`Parsing an invalid URL should return an error`)
  72. }
  73. }
  74. if scenario.feedURL == "" {
  75. if len(subscriptions) > 0 {
  76. t.Fatalf(`Parsing a non-playlist URL should not return any subscription: %q`, scenario.websiteURL)
  77. }
  78. } else {
  79. if localizedError != nil {
  80. t.Fatalf(`Parsing a correctly formatted YouTube playlist page should not return any error: %v`, localizedError)
  81. }
  82. if len(subscriptions) != 1 {
  83. t.Fatalf(`Incorrect number of subscriptions returned`)
  84. }
  85. if subscriptions[0].URL != scenario.feedURL {
  86. t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL)
  87. }
  88. }
  89. }
  90. }
  91. func TestFindYoutubeChannelFeed(t *testing.T) {
  92. type testResult struct {
  93. websiteURL string
  94. feedURL string
  95. discoveryError bool
  96. }
  97. scenarios := []testResult{
  98. // Video URL
  99. {
  100. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
  101. feedURL: "",
  102. },
  103. // Video URL with position argument
  104. {
  105. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
  106. feedURL: "",
  107. },
  108. // Video URL with position argument
  109. {
  110. websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
  111. feedURL: "",
  112. },
  113. // Channel URL
  114. {
  115. websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  116. feedURL: "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
  117. },
  118. // Channel URL with name
  119. {
  120. websiteURL: "https://www.youtube.com/@ABCDEFG",
  121. feedURL: "",
  122. },
  123. // Playlist URL
  124. {
  125. websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  126. feedURL: "",
  127. },
  128. // Playlist URL with video ID
  129. {
  130. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  131. feedURL: "",
  132. },
  133. // Playlist URL with video ID and index argument
  134. {
  135. websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
  136. feedURL: "",
  137. },
  138. // Non-Youtube URL
  139. {
  140. websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  141. feedURL: "",
  142. },
  143. // Invalid URL
  144. {
  145. websiteURL: "https://example|org/",
  146. feedURL: "",
  147. discoveryError: true,
  148. },
  149. }
  150. for _, scenario := range scenarios {
  151. subscriptions, localizedError := NewSubscriptionFinder(nil).FindSubscriptionsFromYouTubeChannelPage(scenario.websiteURL)
  152. if scenario.discoveryError {
  153. if localizedError == nil {
  154. t.Fatalf(`Parsing an invalid URL should return an error`)
  155. }
  156. }
  157. if scenario.feedURL == "" {
  158. if len(subscriptions) > 0 {
  159. t.Fatalf(`Parsing a non-channel URL should not return any subscription: %q`, scenario.websiteURL)
  160. }
  161. } else {
  162. if localizedError != nil {
  163. t.Fatalf(`Parsing a correctly formatted YouTube channel page should not return any error: %v`, localizedError)
  164. }
  165. if len(subscriptions) != 1 {
  166. t.Fatalf(`Incorrect number of subscriptions returned`)
  167. }
  168. if subscriptions[0].URL != scenario.feedURL {
  169. t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL)
  170. }
  171. }
  172. }
  173. }
  174. func TestParseWebPageWithRssFeed(t *testing.T) {
  175. htmlPage := `
  176. <!doctype html>
  177. <html>
  178. <head>
  179. <link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
  180. </head>
  181. <body>
  182. </body>
  183. </html>`
  184. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  185. if err != nil {
  186. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  187. }
  188. if len(subscriptions) != 1 {
  189. t.Fatal(`Incorrect number of subscriptions returned`)
  190. }
  191. if subscriptions[0].Title != "Some Title" {
  192. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  193. }
  194. if subscriptions[0].URL != "http://example.org/rss" {
  195. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  196. }
  197. if subscriptions[0].Type != "rss" {
  198. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  199. }
  200. }
  201. func TestParseWebPageWithAtomFeed(t *testing.T) {
  202. htmlPage := `
  203. <!doctype html>
  204. <html>
  205. <head>
  206. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
  207. </head>
  208. <body>
  209. </body>
  210. </html>`
  211. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  212. if err != nil {
  213. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  214. }
  215. if len(subscriptions) != 1 {
  216. t.Fatal(`Incorrect number of subscriptions returned`)
  217. }
  218. if subscriptions[0].Title != "Some Title" {
  219. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  220. }
  221. if subscriptions[0].URL != "http://example.org/atom.xml" {
  222. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  223. }
  224. if subscriptions[0].Type != "atom" {
  225. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  226. }
  227. }
  228. func TestParseWebPageWithJSONFeed(t *testing.T) {
  229. htmlPage := `
  230. <!doctype html>
  231. <html>
  232. <head>
  233. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  234. </head>
  235. <body>
  236. </body>
  237. </html>`
  238. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  239. if err != nil {
  240. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  241. }
  242. if len(subscriptions) != 1 {
  243. t.Fatal(`Incorrect number of subscriptions returned`)
  244. }
  245. if subscriptions[0].Title != "Some Title" {
  246. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  247. }
  248. if subscriptions[0].URL != "http://example.org/feed.json" {
  249. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  250. }
  251. if subscriptions[0].Type != "json" {
  252. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  253. }
  254. }
  255. func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
  256. htmlPage := `
  257. <!doctype html>
  258. <html>
  259. <head>
  260. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
  261. </head>
  262. <body>
  263. </body>
  264. </html>`
  265. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  266. if err != nil {
  267. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  268. }
  269. if len(subscriptions) != 1 {
  270. t.Fatal(`Incorrect number of subscriptions returned`)
  271. }
  272. if subscriptions[0].Title != "Some Title" {
  273. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  274. }
  275. if subscriptions[0].URL != "http://example.org/feed.json" {
  276. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  277. }
  278. if subscriptions[0].Type != "json" {
  279. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  280. }
  281. }
  282. func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
  283. htmlPage := `
  284. <!doctype html>
  285. <html>
  286. <head>
  287. <link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  288. </head>
  289. <body>
  290. </body>
  291. </html>`
  292. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  293. if err != nil {
  294. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  295. }
  296. if len(subscriptions) != 1 {
  297. t.Fatal(`Incorrect number of subscriptions returned`)
  298. }
  299. if subscriptions[0].Title != "Some Title" {
  300. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  301. }
  302. if subscriptions[0].URL != "http://example.org/feed.json" {
  303. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  304. }
  305. if subscriptions[0].Type != "json" {
  306. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  307. }
  308. }
  309. func TestParseWebPageWithEmptyTitle(t *testing.T) {
  310. htmlPage := `
  311. <!doctype html>
  312. <html>
  313. <head>
  314. <link href="/feed.json" rel="alternate" type="application/feed+json">
  315. </head>
  316. <body>
  317. </body>
  318. </html>`
  319. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  320. if err != nil {
  321. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  322. }
  323. if len(subscriptions) != 1 {
  324. t.Fatal(`Incorrect number of subscriptions returned`)
  325. }
  326. if subscriptions[0].Title != "http://example.org/feed.json" {
  327. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  328. }
  329. if subscriptions[0].URL != "http://example.org/feed.json" {
  330. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  331. }
  332. if subscriptions[0].Type != "json" {
  333. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  334. }
  335. }
  336. func TestParseWebPageWithMultipleFeeds(t *testing.T) {
  337. htmlPage := `
  338. <!doctype html>
  339. <html>
  340. <head>
  341. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
  342. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
  343. </head>
  344. <body>
  345. </body>
  346. </html>`
  347. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  348. if err != nil {
  349. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  350. }
  351. if len(subscriptions) != 2 {
  352. t.Fatal(`Incorrect number of subscriptions returned`)
  353. }
  354. }
  355. func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
  356. htmlPage := `
  357. <!doctype html>
  358. <html>
  359. <head>
  360. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed A">
  361. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed B">
  362. </head>
  363. <body>
  364. </body>
  365. </html>`
  366. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  367. if err != nil {
  368. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  369. }
  370. if len(subscriptions) != 1 {
  371. t.Fatal(`Incorrect number of subscriptions returned`)
  372. }
  373. if subscriptions[0].Title != "Feed A" {
  374. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  375. }
  376. if subscriptions[0].URL != "http://example.org/feed.xml" {
  377. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  378. }
  379. if subscriptions[0].Type != "rss" {
  380. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  381. }
  382. }
  383. func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
  384. htmlPage := `
  385. <!doctype html>
  386. <html>
  387. <head>
  388. <link href rel="alternate" type="application/feed+json" title="Some Title">
  389. </head>
  390. <body>
  391. </body>
  392. </html>`
  393. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  394. if err != nil {
  395. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  396. }
  397. if len(subscriptions) != 0 {
  398. t.Fatal(`Incorrect number of subscriptions returned`)
  399. }
  400. }
  401. func TestParseWebPageWithNoHref(t *testing.T) {
  402. htmlPage := `
  403. <!doctype html>
  404. <html>
  405. <head>
  406. <link rel="alternate" type="application/feed+json" title="Some Title">
  407. </head>
  408. <body>
  409. </body>
  410. </html>`
  411. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  412. if err != nil {
  413. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  414. }
  415. if len(subscriptions) != 0 {
  416. t.Fatal(`Incorrect number of subscriptions returned`)
  417. }
  418. }