finder_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package subscription
  4. import (
  5. "strings"
  6. "testing"
  7. )
  8. func TestFindYoutubeFeed(t *testing.T) {
  9. type testResult struct {
  10. websiteURL string
  11. feedURL string
  12. discoveryError bool
  13. }
  14. scenarios := []testResult{
  15. // Video URL
  16. {
  17. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
  18. feedURL: "",
  19. },
  20. // Video URL with position argument
  21. {
  22. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
  23. feedURL: "",
  24. },
  25. // Video URL with position argument
  26. {
  27. websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
  28. feedURL: "",
  29. },
  30. // Channel URL
  31. {
  32. websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  33. feedURL: "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
  34. },
  35. // Channel URL with name
  36. {
  37. websiteURL: "https://www.youtube.com/@ABCDEFG",
  38. feedURL: "",
  39. },
  40. // Playlist URL
  41. {
  42. websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  43. feedURL: "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  44. },
  45. // Playlist URL with video ID
  46. {
  47. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  48. feedURL: "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  49. },
  50. // Playlist URL with video ID and index argument
  51. {
  52. websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
  53. feedURL: "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  54. },
  55. // Empty playlist ID parameter
  56. {
  57. websiteURL: "https://www.youtube.com/playlist?list=",
  58. feedURL: "",
  59. },
  60. // Non-Youtube URL
  61. {
  62. websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  63. feedURL: "",
  64. },
  65. // Invalid URL
  66. {
  67. websiteURL: "https://example|org/",
  68. feedURL: "",
  69. discoveryError: true,
  70. },
  71. }
  72. for _, scenario := range scenarios {
  73. subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTube(scenario.websiteURL)
  74. if scenario.discoveryError {
  75. if localizedError == nil {
  76. t.Fatalf(`Parsing an invalid URL should return an error`)
  77. }
  78. }
  79. if scenario.feedURL == "" {
  80. if len(subscriptions) > 0 {
  81. t.Fatalf(`Parsing an invalid URL should not return any subscription: %q -> %v`, scenario.websiteURL, subscriptions)
  82. }
  83. } else {
  84. if localizedError != nil {
  85. t.Fatalf(`Parsing a correctly formatted YouTube playlist or channel page should not return any error: %v`, localizedError)
  86. }
  87. if len(subscriptions) != 1 {
  88. t.Fatalf(`Incorrect number of subscriptions returned`)
  89. }
  90. if subscriptions[0].URL != scenario.feedURL {
  91. t.Errorf(`Unexpected feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL)
  92. }
  93. }
  94. }
  95. }
  96. func TestParseWebPageWithRssFeed(t *testing.T) {
  97. htmlPage := `
  98. <!doctype html>
  99. <html>
  100. <head>
  101. <link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
  102. </head>
  103. <body>
  104. </body>
  105. </html>`
  106. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  107. if err != nil {
  108. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  109. }
  110. if len(subscriptions) != 1 {
  111. t.Fatal(`Incorrect number of subscriptions returned`)
  112. }
  113. if subscriptions[0].Title != "Some Title" {
  114. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  115. }
  116. if subscriptions[0].URL != "http://example.org/rss" {
  117. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  118. }
  119. if subscriptions[0].Type != "rss" {
  120. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  121. }
  122. }
  123. func TestParseWebPageWithAtomFeed(t *testing.T) {
  124. htmlPage := `
  125. <!doctype html>
  126. <html>
  127. <head>
  128. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
  129. </head>
  130. <body>
  131. </body>
  132. </html>`
  133. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  134. if err != nil {
  135. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  136. }
  137. if len(subscriptions) != 1 {
  138. t.Fatal(`Incorrect number of subscriptions returned`)
  139. }
  140. if subscriptions[0].Title != "Some Title" {
  141. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  142. }
  143. if subscriptions[0].URL != "http://example.org/atom.xml" {
  144. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  145. }
  146. if subscriptions[0].Type != "atom" {
  147. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  148. }
  149. }
  150. func TestParseWebPageWithJSONFeed(t *testing.T) {
  151. htmlPage := `
  152. <!doctype html>
  153. <html>
  154. <head>
  155. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  156. </head>
  157. <body>
  158. </body>
  159. </html>`
  160. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  161. if err != nil {
  162. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  163. }
  164. if len(subscriptions) != 1 {
  165. t.Fatal(`Incorrect number of subscriptions returned`)
  166. }
  167. if subscriptions[0].Title != "Some Title" {
  168. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  169. }
  170. if subscriptions[0].URL != "http://example.org/feed.json" {
  171. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  172. }
  173. if subscriptions[0].Type != "json" {
  174. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  175. }
  176. }
  177. func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
  178. htmlPage := `
  179. <!doctype html>
  180. <html>
  181. <head>
  182. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
  183. </head>
  184. <body>
  185. </body>
  186. </html>`
  187. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  188. if err != nil {
  189. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  190. }
  191. if len(subscriptions) != 1 {
  192. t.Fatal(`Incorrect number of subscriptions returned`)
  193. }
  194. if subscriptions[0].Title != "Some Title" {
  195. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  196. }
  197. if subscriptions[0].URL != "http://example.org/feed.json" {
  198. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  199. }
  200. if subscriptions[0].Type != "json" {
  201. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  202. }
  203. }
  204. func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
  205. htmlPage := `
  206. <!doctype html>
  207. <html>
  208. <head>
  209. <link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  210. </head>
  211. <body>
  212. </body>
  213. </html>`
  214. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  215. if err != nil {
  216. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  217. }
  218. if len(subscriptions) != 1 {
  219. t.Fatal(`Incorrect number of subscriptions returned`)
  220. }
  221. if subscriptions[0].Title != "Some Title" {
  222. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  223. }
  224. if subscriptions[0].URL != "http://example.org/feed.json" {
  225. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  226. }
  227. if subscriptions[0].Type != "json" {
  228. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  229. }
  230. }
  231. func TestParseWebPageWithEmptyTitle(t *testing.T) {
  232. htmlPage := `
  233. <!doctype html>
  234. <html>
  235. <head>
  236. <link href="/feed.json" rel="alternate" type="application/feed+json">
  237. </head>
  238. <body>
  239. </body>
  240. </html>`
  241. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  242. if err != nil {
  243. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  244. }
  245. if len(subscriptions) != 1 {
  246. t.Fatal(`Incorrect number of subscriptions returned`)
  247. }
  248. if subscriptions[0].Title != "http://example.org/feed.json" {
  249. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  250. }
  251. if subscriptions[0].URL != "http://example.org/feed.json" {
  252. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  253. }
  254. if subscriptions[0].Type != "json" {
  255. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  256. }
  257. }
  258. func TestParseWebPageWithMultipleFeeds(t *testing.T) {
  259. htmlPage := `
  260. <!doctype html>
  261. <html>
  262. <head>
  263. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
  264. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="JSON Feed">
  265. </head>
  266. <body>
  267. </body>
  268. </html>`
  269. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  270. if err != nil {
  271. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  272. }
  273. if len(subscriptions) != 2 {
  274. t.Fatal(`Incorrect number of subscriptions returned`)
  275. }
  276. }
  277. func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
  278. htmlPage := `
  279. <!doctype html>
  280. <html>
  281. <head>
  282. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed A">
  283. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed B">
  284. </head>
  285. <body>
  286. </body>
  287. </html>`
  288. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  289. if err != nil {
  290. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  291. }
  292. if len(subscriptions) != 1 {
  293. t.Fatal(`Incorrect number of subscriptions returned`)
  294. }
  295. if subscriptions[0].Title != "Feed A" {
  296. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  297. }
  298. if subscriptions[0].URL != "http://example.org/feed.xml" {
  299. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  300. }
  301. if subscriptions[0].Type != "rss" {
  302. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  303. }
  304. }
  305. func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
  306. htmlPage := `
  307. <!doctype html>
  308. <html>
  309. <head>
  310. <link href rel="alternate" type="application/feed+json" title="Some Title">
  311. </head>
  312. <body>
  313. </body>
  314. </html>`
  315. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  316. if err != nil {
  317. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  318. }
  319. if len(subscriptions) != 0 {
  320. t.Fatal(`Incorrect number of subscriptions returned`)
  321. }
  322. }
  323. func TestParseWebPageWithNoHref(t *testing.T) {
  324. htmlPage := `
  325. <!doctype html>
  326. <html>
  327. <head>
  328. <link rel="alternate" type="application/feed+json" title="Some Title">
  329. </head>
  330. <body>
  331. </body>
  332. </html>`
  333. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  334. if err != nil {
  335. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  336. }
  337. if len(subscriptions) != 0 {
  338. t.Fatal(`Incorrect number of subscriptions returned`)
  339. }
  340. }