finder_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package subscription
  4. import (
  5. "strings"
  6. "testing"
  7. )
  8. func TestFindYoutubeFeed(t *testing.T) {
  9. type testResult struct {
  10. websiteURL string
  11. feedURLs []string
  12. discoveryError bool
  13. }
  14. scenarios := []testResult{
  15. // Video URL
  16. {
  17. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
  18. feedURLs: []string{},
  19. },
  20. // Video URL with position argument
  21. {
  22. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
  23. feedURLs: []string{},
  24. },
  25. // Video URL with position argument
  26. {
  27. websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
  28. feedURLs: []string{},
  29. },
  30. // Channel URL
  31. {
  32. websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  33. feedURLs: []string{
  34. "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
  35. "https://www.youtube.com/feeds/videos.xml?playlist_id=UULF-Qj80avWItNRjkZ41rzHyw",
  36. "https://www.youtube.com/feeds/videos.xml?playlist_id=UUSH-Qj80avWItNRjkZ41rzHyw",
  37. "https://www.youtube.com/feeds/videos.xml?playlist_id=UULV-Qj80avWItNRjkZ41rzHyw",
  38. },
  39. },
  40. // Channel URL with name
  41. {
  42. websiteURL: "https://www.youtube.com/@ABCDEFG",
  43. feedURLs: []string{},
  44. },
  45. // Playlist URL
  46. {
  47. websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  48. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR"},
  49. },
  50. // Playlist URL with video ID
  51. {
  52. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  53. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM"},
  54. },
  55. // Playlist URL with video ID and index argument
  56. {
  57. websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
  58. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR"},
  59. },
  60. // Empty playlist ID parameter
  61. {
  62. websiteURL: "https://www.youtube.com/playlist?list=",
  63. feedURLs: []string{},
  64. },
  65. // Non-Youtube URL
  66. {
  67. websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  68. feedURLs: []string{},
  69. },
  70. // Invalid URL
  71. {
  72. websiteURL: "https://example|org/",
  73. feedURLs: []string{},
  74. discoveryError: true,
  75. },
  76. }
  77. for _, scenario := range scenarios {
  78. subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTube(scenario.websiteURL)
  79. if scenario.discoveryError {
  80. if localizedError == nil {
  81. t.Fatalf(`Parsing an invalid URL should return an error`)
  82. }
  83. }
  84. if len(scenario.feedURLs) == 0 {
  85. if len(subscriptions) > 0 {
  86. t.Fatalf(`Parsing an invalid URL should not return any subscription: %q -> %v`, scenario.websiteURL, subscriptions)
  87. }
  88. } else {
  89. if localizedError != nil {
  90. t.Fatalf(`Parsing a correctly formatted YouTube playlist or channel page should not return any error: %v`, localizedError)
  91. }
  92. if len(subscriptions) != len(scenario.feedURLs) {
  93. t.Fatalf(`Incorrect number of subscriptions returned, expected %d, got %d`, len(scenario.feedURLs), len(subscriptions))
  94. }
  95. for i := range scenario.feedURLs {
  96. if subscriptions[i].URL != scenario.feedURLs[i] {
  97. t.Errorf(`Unexpected feed, got %s, instead of %s`, subscriptions[i].URL, scenario.feedURLs[i])
  98. }
  99. }
  100. }
  101. }
  102. }
  103. func TestParseWebPageWithRssFeed(t *testing.T) {
  104. htmlPage := `
  105. <!doctype html>
  106. <html>
  107. <head>
  108. <link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
  109. </head>
  110. <body>
  111. </body>
  112. </html>`
  113. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  114. if err != nil {
  115. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  116. }
  117. if len(subscriptions) != 1 {
  118. t.Fatal(`Incorrect number of subscriptions returned`)
  119. }
  120. if subscriptions[0].Title != "Some Title" {
  121. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  122. }
  123. if subscriptions[0].URL != "http://example.org/rss" {
  124. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  125. }
  126. if subscriptions[0].Type != "rss" {
  127. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  128. }
  129. }
  130. func TestParseWebPageWithAtomFeed(t *testing.T) {
  131. htmlPage := `
  132. <!doctype html>
  133. <html>
  134. <head>
  135. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
  136. </head>
  137. <body>
  138. </body>
  139. </html>`
  140. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  141. if err != nil {
  142. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  143. }
  144. if len(subscriptions) != 1 {
  145. t.Fatal(`Incorrect number of subscriptions returned`)
  146. }
  147. if subscriptions[0].Title != "Some Title" {
  148. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  149. }
  150. if subscriptions[0].URL != "http://example.org/atom.xml" {
  151. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  152. }
  153. if subscriptions[0].Type != "atom" {
  154. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  155. }
  156. }
  157. func TestParseWebPageWithJSONFeed(t *testing.T) {
  158. htmlPage := `
  159. <!doctype html>
  160. <html>
  161. <head>
  162. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  163. </head>
  164. <body>
  165. </body>
  166. </html>`
  167. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  168. if err != nil {
  169. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  170. }
  171. if len(subscriptions) != 1 {
  172. t.Fatal(`Incorrect number of subscriptions returned`)
  173. }
  174. if subscriptions[0].Title != "Some Title" {
  175. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  176. }
  177. if subscriptions[0].URL != "http://example.org/feed.json" {
  178. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  179. }
  180. if subscriptions[0].Type != "json" {
  181. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  182. }
  183. }
  184. func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
  185. htmlPage := `
  186. <!doctype html>
  187. <html>
  188. <head>
  189. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
  190. </head>
  191. <body>
  192. </body>
  193. </html>`
  194. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  195. if err != nil {
  196. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  197. }
  198. if len(subscriptions) != 1 {
  199. t.Fatal(`Incorrect number of subscriptions returned`)
  200. }
  201. if subscriptions[0].Title != "Some Title" {
  202. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  203. }
  204. if subscriptions[0].URL != "http://example.org/feed.json" {
  205. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  206. }
  207. if subscriptions[0].Type != "json" {
  208. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  209. }
  210. }
  211. func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
  212. htmlPage := `
  213. <!doctype html>
  214. <html>
  215. <head>
  216. <link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  217. </head>
  218. <body>
  219. </body>
  220. </html>`
  221. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  222. if err != nil {
  223. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  224. }
  225. if len(subscriptions) != 1 {
  226. t.Fatal(`Incorrect number of subscriptions returned`)
  227. }
  228. if subscriptions[0].Title != "Some Title" {
  229. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  230. }
  231. if subscriptions[0].URL != "http://example.org/feed.json" {
  232. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  233. }
  234. if subscriptions[0].Type != "json" {
  235. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  236. }
  237. }
  238. func TestParseWebPageWithEmptyTitle(t *testing.T) {
  239. htmlPage := `
  240. <!doctype html>
  241. <html>
  242. <head>
  243. <link href="/feed.json" rel="alternate" type="application/feed+json">
  244. </head>
  245. <body>
  246. </body>
  247. </html>`
  248. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  249. if err != nil {
  250. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  251. }
  252. if len(subscriptions) != 1 {
  253. t.Fatal(`Incorrect number of subscriptions returned`)
  254. }
  255. if subscriptions[0].Title != "http://example.org/feed.json" {
  256. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  257. }
  258. if subscriptions[0].URL != "http://example.org/feed.json" {
  259. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  260. }
  261. if subscriptions[0].Type != "json" {
  262. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  263. }
  264. }
  265. func TestParseWebPageWithMultipleFeeds(t *testing.T) {
  266. htmlPage := `
  267. <!doctype html>
  268. <html>
  269. <head>
  270. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
  271. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="JSON Feed">
  272. </head>
  273. <body>
  274. </body>
  275. </html>`
  276. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  277. if err != nil {
  278. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  279. }
  280. if len(subscriptions) != 2 {
  281. t.Fatal(`Incorrect number of subscriptions returned`)
  282. }
  283. }
  284. func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
  285. htmlPage := `
  286. <!doctype html>
  287. <html>
  288. <head>
  289. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed A">
  290. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed B">
  291. </head>
  292. <body>
  293. </body>
  294. </html>`
  295. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  296. if err != nil {
  297. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  298. }
  299. if len(subscriptions) != 1 {
  300. t.Fatal(`Incorrect number of subscriptions returned`)
  301. }
  302. if subscriptions[0].Title != "Feed A" {
  303. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  304. }
  305. if subscriptions[0].URL != "http://example.org/feed.xml" {
  306. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  307. }
  308. if subscriptions[0].Type != "rss" {
  309. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  310. }
  311. }
  312. func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
  313. htmlPage := `
  314. <!doctype html>
  315. <html>
  316. <head>
  317. <link href rel="alternate" type="application/feed+json" title="Some Title">
  318. </head>
  319. <body>
  320. </body>
  321. </html>`
  322. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  323. if err != nil {
  324. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  325. }
  326. if len(subscriptions) != 0 {
  327. t.Fatal(`Incorrect number of subscriptions returned`)
  328. }
  329. }
  330. func TestParseWebPageWithNoHref(t *testing.T) {
  331. htmlPage := `
  332. <!doctype html>
  333. <html>
  334. <head>
  335. <link rel="alternate" type="application/feed+json" title="Some Title">
  336. </head>
  337. <body>
  338. </body>
  339. </html>`
  340. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  341. if err != nil {
  342. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  343. }
  344. if len(subscriptions) != 0 {
  345. t.Fatal(`Incorrect number of subscriptions returned`)
  346. }
  347. }
  348. func TestFindCanonicalURL(t *testing.T) {
  349. htmlPage := `
  350. <!doctype html>
  351. <html>
  352. <head>
  353. <link rel="canonical" href="https://example.org/canonical-page">
  354. </head>
  355. <body>
  356. </body>
  357. </html>`
  358. canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", strings.NewReader(htmlPage))
  359. if canonicalURL != "https://example.org/canonical-page" {
  360. t.Errorf(`Unexpected canonical URL, got %q, expected %q`, canonicalURL, "https://example.org/canonical-page")
  361. }
  362. }
  363. func TestFindCanonicalURLNotFound(t *testing.T) {
  364. htmlPage := `
  365. <!doctype html>
  366. <html>
  367. <head>
  368. </head>
  369. <body>
  370. </body>
  371. </html>`
  372. canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", strings.NewReader(htmlPage))
  373. if canonicalURL != "https://example.org/page" {
  374. t.Errorf(`Expected effective URL when canonical not found, got %q`, canonicalURL)
  375. }
  376. }