finder_test.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package subscription
  4. import (
  5. "errors"
  6. "strings"
  7. "testing"
  8. )
  9. func TestFindYoutubePlaylistFeed(t *testing.T) {
  10. scenarios := map[string]string{
  11. "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR": "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  12. "https://www.youtube.com/playlist?list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM": "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  13. "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM": "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  14. }
  15. for websiteURL, expectedFeedURL := range scenarios {
  16. subscriptions, localizedError := NewSubscriptionFinder(nil).FindSubscriptionsFromYouTubePlaylistPage(websiteURL)
  17. if localizedError != nil {
  18. t.Fatalf(`Parsing a correctly formatted YouTube playlist page should not return any error: %v`, localizedError)
  19. }
  20. if len(subscriptions) != 1 {
  21. t.Fatal(`Incorrect number of subscriptions returned`)
  22. }
  23. if subscriptions[0].URL != expectedFeedURL {
  24. t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, expectedFeedURL)
  25. }
  26. }
  27. }
  28. func TestItDoesNotConsiderPlaylistWatchPageAsVideoWatchPage(t *testing.T) {
  29. _, localizedError := NewSubscriptionFinder(nil).FindSubscriptionsFromYouTubeVideoPage("https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM")
  30. if localizedError != nil {
  31. t.Fatalf(`Should not consider a playlist watch page as a video watch page`)
  32. }
  33. }
  34. func TestYoutubeIdExtractor(t *testing.T) {
  35. type testResult struct {
  36. ID string
  37. Kind youtubeKind
  38. error error
  39. }
  40. urls := map[string]testResult{
  41. "https://www.youtube.com/watch?v=dQw4w9WgXcQ": {
  42. ID: "dQw4w9WgXcQ",
  43. Kind: youtubeIDKindVideo,
  44. error: nil,
  45. },
  46. "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1": {
  47. ID: "dQw4w9WgXcQ",
  48. Kind: youtubeIDKindVideo,
  49. error: nil,
  50. },
  51. "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ": {
  52. ID: "dQw4w9WgXcQ",
  53. Kind: youtubeIDKindVideo,
  54. error: nil,
  55. },
  56. "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM": {
  57. ID: "PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  58. Kind: youtubeIDKindPlaylist,
  59. error: nil,
  60. },
  61. "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR": {
  62. ID: "PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  63. Kind: youtubeIDKindPlaylist,
  64. error: nil,
  65. },
  66. "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw": {
  67. ID: "UC-Qj80avWItNRjkZ41rzHyw",
  68. Kind: youtubeIDKindChannel,
  69. error: nil,
  70. },
  71. "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw": {
  72. ID: "",
  73. Kind: "",
  74. error: errNotYoutubeUrl,
  75. },
  76. }
  77. for websiteURL, expected := range urls {
  78. kind, id, err := youtubeURLIDExtractor(websiteURL)
  79. if !errors.Is(err, expected.error) {
  80. t.Fatalf(`Unexpected error: %v got %v`, expected.error, err)
  81. }
  82. if id != expected.ID {
  83. t.Fatalf(`Unexpected ID: %v got %v`, expected.ID, id)
  84. }
  85. if kind != expected.Kind {
  86. t.Fatalf(`Unexpected Kind: %v got %v`, expected.Kind, kind)
  87. }
  88. }
  89. }
  90. func TestFindYoutubeChannelFeed(t *testing.T) {
  91. scenarios := map[string]string{
  92. "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw": "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
  93. }
  94. for websiteURL, expectedFeedURL := range scenarios {
  95. subscriptions, localizedError := NewSubscriptionFinder(nil).FindSubscriptionsFromYouTubeChannelPage(websiteURL)
  96. if localizedError != nil {
  97. t.Fatalf(`Parsing a correctly formatted YouTube channel page should not return any error: %v`, localizedError)
  98. }
  99. if len(subscriptions) != 1 {
  100. t.Fatal(`Incorrect number of subscriptions returned`)
  101. }
  102. if subscriptions[0].URL != expectedFeedURL {
  103. t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, expectedFeedURL)
  104. }
  105. }
  106. }
  107. func TestParseWebPageWithRssFeed(t *testing.T) {
  108. htmlPage := `
  109. <!doctype html>
  110. <html>
  111. <head>
  112. <link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
  113. </head>
  114. <body>
  115. </body>
  116. </html>`
  117. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  118. if err != nil {
  119. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  120. }
  121. if len(subscriptions) != 1 {
  122. t.Fatal(`Incorrect number of subscriptions returned`)
  123. }
  124. if subscriptions[0].Title != "Some Title" {
  125. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  126. }
  127. if subscriptions[0].URL != "http://example.org/rss" {
  128. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  129. }
  130. if subscriptions[0].Type != "rss" {
  131. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  132. }
  133. }
  134. func TestParseWebPageWithAtomFeed(t *testing.T) {
  135. htmlPage := `
  136. <!doctype html>
  137. <html>
  138. <head>
  139. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
  140. </head>
  141. <body>
  142. </body>
  143. </html>`
  144. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  145. if err != nil {
  146. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  147. }
  148. if len(subscriptions) != 1 {
  149. t.Fatal(`Incorrect number of subscriptions returned`)
  150. }
  151. if subscriptions[0].Title != "Some Title" {
  152. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  153. }
  154. if subscriptions[0].URL != "http://example.org/atom.xml" {
  155. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  156. }
  157. if subscriptions[0].Type != "atom" {
  158. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  159. }
  160. }
  161. func TestParseWebPageWithJSONFeed(t *testing.T) {
  162. htmlPage := `
  163. <!doctype html>
  164. <html>
  165. <head>
  166. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  167. </head>
  168. <body>
  169. </body>
  170. </html>`
  171. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  172. if err != nil {
  173. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  174. }
  175. if len(subscriptions) != 1 {
  176. t.Fatal(`Incorrect number of subscriptions returned`)
  177. }
  178. if subscriptions[0].Title != "Some Title" {
  179. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  180. }
  181. if subscriptions[0].URL != "http://example.org/feed.json" {
  182. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  183. }
  184. if subscriptions[0].Type != "json" {
  185. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  186. }
  187. }
  188. func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
  189. htmlPage := `
  190. <!doctype html>
  191. <html>
  192. <head>
  193. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
  194. </head>
  195. <body>
  196. </body>
  197. </html>`
  198. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  199. if err != nil {
  200. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  201. }
  202. if len(subscriptions) != 1 {
  203. t.Fatal(`Incorrect number of subscriptions returned`)
  204. }
  205. if subscriptions[0].Title != "Some Title" {
  206. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  207. }
  208. if subscriptions[0].URL != "http://example.org/feed.json" {
  209. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  210. }
  211. if subscriptions[0].Type != "json" {
  212. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  213. }
  214. }
  215. func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
  216. htmlPage := `
  217. <!doctype html>
  218. <html>
  219. <head>
  220. <link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  221. </head>
  222. <body>
  223. </body>
  224. </html>`
  225. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  226. if err != nil {
  227. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  228. }
  229. if len(subscriptions) != 1 {
  230. t.Fatal(`Incorrect number of subscriptions returned`)
  231. }
  232. if subscriptions[0].Title != "Some Title" {
  233. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  234. }
  235. if subscriptions[0].URL != "http://example.org/feed.json" {
  236. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  237. }
  238. if subscriptions[0].Type != "json" {
  239. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  240. }
  241. }
  242. func TestParseWebPageWithEmptyTitle(t *testing.T) {
  243. htmlPage := `
  244. <!doctype html>
  245. <html>
  246. <head>
  247. <link href="/feed.json" rel="alternate" type="application/feed+json">
  248. </head>
  249. <body>
  250. </body>
  251. </html>`
  252. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  253. if err != nil {
  254. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  255. }
  256. if len(subscriptions) != 1 {
  257. t.Fatal(`Incorrect number of subscriptions returned`)
  258. }
  259. if subscriptions[0].Title != "http://example.org/feed.json" {
  260. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  261. }
  262. if subscriptions[0].URL != "http://example.org/feed.json" {
  263. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  264. }
  265. if subscriptions[0].Type != "json" {
  266. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  267. }
  268. }
  269. func TestParseWebPageWithMultipleFeeds(t *testing.T) {
  270. htmlPage := `
  271. <!doctype html>
  272. <html>
  273. <head>
  274. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
  275. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
  276. </head>
  277. <body>
  278. </body>
  279. </html>`
  280. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  281. if err != nil {
  282. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  283. }
  284. if len(subscriptions) != 2 {
  285. t.Fatal(`Incorrect number of subscriptions returned`)
  286. }
  287. }
  288. func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
  289. htmlPage := `
  290. <!doctype html>
  291. <html>
  292. <head>
  293. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed A">
  294. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed B">
  295. </head>
  296. <body>
  297. </body>
  298. </html>`
  299. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  300. if err != nil {
  301. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  302. }
  303. if len(subscriptions) != 1 {
  304. t.Fatal(`Incorrect number of subscriptions returned`)
  305. }
  306. if subscriptions[0].Title != "Feed A" {
  307. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  308. }
  309. if subscriptions[0].URL != "http://example.org/feed.xml" {
  310. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  311. }
  312. if subscriptions[0].Type != "rss" {
  313. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  314. }
  315. }
  316. func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
  317. htmlPage := `
  318. <!doctype html>
  319. <html>
  320. <head>
  321. <link href rel="alternate" type="application/feed+json" title="Some Title">
  322. </head>
  323. <body>
  324. </body>
  325. </html>`
  326. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  327. if err != nil {
  328. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  329. }
  330. if len(subscriptions) != 0 {
  331. t.Fatal(`Incorrect number of subscriptions returned`)
  332. }
  333. }
  334. func TestParseWebPageWithNoHref(t *testing.T) {
  335. htmlPage := `
  336. <!doctype html>
  337. <html>
  338. <head>
  339. <link rel="alternate" type="application/feed+json" title="Some Title">
  340. </head>
  341. <body>
  342. </body>
  343. </html>`
  344. subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", "text/html", strings.NewReader(htmlPage))
  345. if err != nil {
  346. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  347. }
  348. if len(subscriptions) != 0 {
  349. t.Fatal(`Incorrect number of subscriptions returned`)
  350. }
  351. }