finder_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package subscription
  4. import (
  5. "testing"
  6. )
  7. func TestFindYoutubeFeed(t *testing.T) {
  8. type testResult struct {
  9. websiteURL string
  10. feedURLs []string
  11. discoveryError bool
  12. }
  13. scenarios := []testResult{
  14. // Video URL
  15. {
  16. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
  17. feedURLs: []string{},
  18. },
  19. // Video URL with position argument
  20. {
  21. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
  22. feedURLs: []string{},
  23. },
  24. // Video URL with position argument
  25. {
  26. websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
  27. feedURLs: []string{},
  28. },
  29. // Channel URL
  30. {
  31. websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  32. feedURLs: []string{
  33. "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
  34. "https://www.youtube.com/feeds/videos.xml?playlist_id=UULF-Qj80avWItNRjkZ41rzHyw",
  35. "https://www.youtube.com/feeds/videos.xml?playlist_id=UUSH-Qj80avWItNRjkZ41rzHyw",
  36. "https://www.youtube.com/feeds/videos.xml?playlist_id=UULV-Qj80avWItNRjkZ41rzHyw",
  37. },
  38. },
  39. // Channel URL with name
  40. {
  41. websiteURL: "https://www.youtube.com/@ABCDEFG",
  42. feedURLs: []string{},
  43. },
  44. // Playlist URL
  45. {
  46. websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  47. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR"},
  48. },
  49. // Playlist URL with video ID
  50. {
  51. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  52. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM"},
  53. },
  54. // Playlist URL with video ID and index argument
  55. {
  56. websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
  57. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR"},
  58. },
  59. // Empty playlist ID parameter
  60. {
  61. websiteURL: "https://www.youtube.com/playlist?list=",
  62. feedURLs: []string{},
  63. },
  64. // Non-Youtube URL
  65. {
  66. websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  67. feedURLs: []string{},
  68. },
  69. // Invalid URL
  70. {
  71. websiteURL: "https://example|org/",
  72. feedURLs: []string{},
  73. discoveryError: true,
  74. },
  75. }
  76. for _, scenario := range scenarios {
  77. subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTube(scenario.websiteURL)
  78. if scenario.discoveryError {
  79. if localizedError == nil {
  80. t.Fatalf(`Parsing an invalid URL should return an error`)
  81. }
  82. }
  83. if len(scenario.feedURLs) == 0 {
  84. if len(subscriptions) > 0 {
  85. t.Fatalf(`Parsing an invalid URL should not return any subscription: %q -> %v`, scenario.websiteURL, subscriptions)
  86. }
  87. } else {
  88. if localizedError != nil {
  89. t.Fatalf(`Parsing a correctly formatted YouTube playlist or channel page should not return any error: %v`, localizedError)
  90. }
  91. if len(subscriptions) != len(scenario.feedURLs) {
  92. t.Fatalf(`Incorrect number of subscriptions returned, expected %d, got %d`, len(scenario.feedURLs), len(subscriptions))
  93. }
  94. for i := range scenario.feedURLs {
  95. if subscriptions[i].URL != scenario.feedURLs[i] {
  96. t.Errorf(`Unexpected feed, got %s, instead of %s`, subscriptions[i].URL, scenario.feedURLs[i])
  97. }
  98. }
  99. }
  100. }
  101. }
  102. func TestParseWebPageWithRssFeed(t *testing.T) {
  103. htmlPage := `
  104. <!doctype html>
  105. <html>
  106. <head>
  107. <link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
  108. </head>
  109. <body>
  110. </body>
  111. </html>`
  112. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  113. if err != nil {
  114. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  115. }
  116. if len(subscriptions) != 1 {
  117. t.Fatal(`Incorrect number of subscriptions returned`)
  118. }
  119. if subscriptions[0].Title != "Some Title" {
  120. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  121. }
  122. if subscriptions[0].URL != "http://example.org/rss" {
  123. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  124. }
  125. if subscriptions[0].Type != "rss" {
  126. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  127. }
  128. }
  129. func TestParseWebPageWithAtomFeed(t *testing.T) {
  130. htmlPage := `
  131. <!doctype html>
  132. <html>
  133. <head>
  134. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
  135. </head>
  136. <body>
  137. </body>
  138. </html>`
  139. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  140. if err != nil {
  141. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  142. }
  143. if len(subscriptions) != 1 {
  144. t.Fatal(`Incorrect number of subscriptions returned`)
  145. }
  146. if subscriptions[0].Title != "Some Title" {
  147. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  148. }
  149. if subscriptions[0].URL != "http://example.org/atom.xml" {
  150. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  151. }
  152. if subscriptions[0].Type != "atom" {
  153. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  154. }
  155. }
  156. func TestParseWebPageWithJSONFeed(t *testing.T) {
  157. htmlPage := `
  158. <!doctype html>
  159. <html>
  160. <head>
  161. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  162. </head>
  163. <body>
  164. </body>
  165. </html>`
  166. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  167. if err != nil {
  168. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  169. }
  170. if len(subscriptions) != 1 {
  171. t.Fatal(`Incorrect number of subscriptions returned`)
  172. }
  173. if subscriptions[0].Title != "Some Title" {
  174. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  175. }
  176. if subscriptions[0].URL != "http://example.org/feed.json" {
  177. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  178. }
  179. if subscriptions[0].Type != "json" {
  180. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  181. }
  182. }
  183. func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
  184. htmlPage := `
  185. <!doctype html>
  186. <html>
  187. <head>
  188. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
  189. </head>
  190. <body>
  191. </body>
  192. </html>`
  193. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  194. if err != nil {
  195. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  196. }
  197. if len(subscriptions) != 1 {
  198. t.Fatal(`Incorrect number of subscriptions returned`)
  199. }
  200. if subscriptions[0].Title != "Some Title" {
  201. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  202. }
  203. if subscriptions[0].URL != "http://example.org/feed.json" {
  204. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  205. }
  206. if subscriptions[0].Type != "json" {
  207. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  208. }
  209. }
  210. func TestParseWebPageWithJSONFeedWpJsonIgnored(t *testing.T) {
  211. htmlPage := `
  212. <!doctype html>
  213. <html>
  214. <head>
  215. <link rel="https://api.w.org/" href="https://example.org/wp-json/" />
  216. <link rel="alternate" title="JSON" type="application/json" href="https://example.org/wp-json/wp/v2/posts/123456" />
  217. </head>
  218. <body>
  219. </body>
  220. </html>`
  221. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  222. if err != nil {
  223. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  224. }
  225. if len(subscriptions) != 0 {
  226. t.Fatal(`Incorrect number of subscriptions returned`)
  227. }
  228. }
  229. func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
  230. htmlPage := `
  231. <!doctype html>
  232. <html>
  233. <head>
  234. <link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  235. </head>
  236. <body>
  237. </body>
  238. </html>`
  239. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  240. if err != nil {
  241. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  242. }
  243. if len(subscriptions) != 1 {
  244. t.Fatal(`Incorrect number of subscriptions returned`)
  245. }
  246. if subscriptions[0].Title != "Some Title" {
  247. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  248. }
  249. if subscriptions[0].URL != "http://example.org/feed.json" {
  250. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  251. }
  252. if subscriptions[0].Type != "json" {
  253. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  254. }
  255. }
  256. func TestParseWebPageWithEmptyTitle(t *testing.T) {
  257. htmlPage := `
  258. <!doctype html>
  259. <html>
  260. <head>
  261. <link href="/feed.json" rel="alternate" type="application/feed+json">
  262. </head>
  263. <body>
  264. </body>
  265. </html>`
  266. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  267. if err != nil {
  268. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  269. }
  270. if len(subscriptions) != 1 {
  271. t.Fatal(`Incorrect number of subscriptions returned`)
  272. }
  273. if subscriptions[0].Title != "http://example.org/feed.json" {
  274. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  275. }
  276. if subscriptions[0].URL != "http://example.org/feed.json" {
  277. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  278. }
  279. if subscriptions[0].Type != "json" {
  280. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  281. }
  282. }
  283. func TestParseWebPageWithMultipleFeeds(t *testing.T) {
  284. htmlPage := `
  285. <!doctype html>
  286. <html>
  287. <head>
  288. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
  289. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="JSON Feed">
  290. </head>
  291. <body>
  292. </body>
  293. </html>`
  294. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  295. if err != nil {
  296. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  297. }
  298. if len(subscriptions) != 2 {
  299. t.Fatal(`Incorrect number of subscriptions returned`)
  300. }
  301. }
  302. func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
  303. htmlPage := `
  304. <!doctype html>
  305. <html>
  306. <head>
  307. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed A">
  308. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed B">
  309. </head>
  310. <body>
  311. </body>
  312. </html>`
  313. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  314. if err != nil {
  315. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  316. }
  317. if len(subscriptions) != 1 {
  318. t.Fatal(`Incorrect number of subscriptions returned`)
  319. }
  320. if subscriptions[0].Title != "Feed A" {
  321. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  322. }
  323. if subscriptions[0].URL != "http://example.org/feed.xml" {
  324. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  325. }
  326. if subscriptions[0].Type != "rss" {
  327. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  328. }
  329. }
  330. func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
  331. htmlPage := `
  332. <!doctype html>
  333. <html>
  334. <head>
  335. <link href rel="alternate" type="application/feed+json" title="Some Title">
  336. </head>
  337. <body>
  338. </body>
  339. </html>`
  340. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  341. if err != nil {
  342. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  343. }
  344. if len(subscriptions) != 0 {
  345. t.Fatal(`Incorrect number of subscriptions returned`)
  346. }
  347. }
  348. func TestParseWebPageWithNoHref(t *testing.T) {
  349. htmlPage := `
  350. <!doctype html>
  351. <html>
  352. <head>
  353. <link rel="alternate" type="application/feed+json" title="Some Title">
  354. </head>
  355. <body>
  356. </body>
  357. </html>`
  358. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
  359. if err != nil {
  360. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  361. }
  362. if len(subscriptions) != 0 {
  363. t.Fatal(`Incorrect number of subscriptions returned`)
  364. }
  365. }
  366. func TestFindCanonicalURL(t *testing.T) {
  367. htmlPage := `
  368. <!doctype html>
  369. <html>
  370. <head>
  371. <link rel="canonical" href="https://example.org/canonical-page">
  372. </head>
  373. <body>
  374. </body>
  375. </html>`
  376. canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", []byte(htmlPage))
  377. if canonicalURL != "https://example.org/canonical-page" {
  378. t.Errorf(`Unexpected canonical URL, got %q, expected %q`, canonicalURL, "https://example.org/canonical-page")
  379. }
  380. }
  381. func TestFindCanonicalURLNotFound(t *testing.T) {
  382. htmlPage := `
  383. <!doctype html>
  384. <html>
  385. <head>
  386. </head>
  387. <body>
  388. </body>
  389. </html>`
  390. canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "text/html", []byte(htmlPage))
  391. if canonicalURL != "https://example.org/page" {
  392. t.Errorf(`Expected effective URL when canonical not found, got %q`, canonicalURL)
  393. }
  394. }