finder_test.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package subscription
  4. import (
  5. "testing"
  6. )
  7. func TestFindYoutubeFeed(t *testing.T) {
  8. type testResult struct {
  9. websiteURL string
  10. feedURLs []string
  11. discoveryError bool
  12. }
  13. scenarios := []testResult{
  14. // Video URL
  15. {
  16. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
  17. feedURLs: []string{},
  18. },
  19. // Video URL with position argument
  20. {
  21. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
  22. feedURLs: []string{},
  23. },
  24. // Video URL with position argument
  25. {
  26. websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
  27. feedURLs: []string{},
  28. },
  29. // Channel URL
  30. {
  31. websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  32. feedURLs: []string{
  33. "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
  34. "https://www.youtube.com/feeds/videos.xml?playlist_id=UULF-Qj80avWItNRjkZ41rzHyw",
  35. "https://www.youtube.com/feeds/videos.xml?playlist_id=UUSH-Qj80avWItNRjkZ41rzHyw",
  36. "https://www.youtube.com/feeds/videos.xml?playlist_id=UULV-Qj80avWItNRjkZ41rzHyw",
  37. },
  38. },
  39. // Channel URL with name
  40. {
  41. websiteURL: "https://www.youtube.com/@ABCDEFG",
  42. feedURLs: []string{},
  43. },
  44. // Playlist URL
  45. {
  46. websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
  47. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR"},
  48. },
  49. // Playlist URL with video ID
  50. {
  51. websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
  52. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM"},
  53. },
  54. // Playlist URL with video ID and index argument
  55. {
  56. websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
  57. feedURLs: []string{"https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR"},
  58. },
  59. // Empty playlist ID parameter
  60. {
  61. websiteURL: "https://www.youtube.com/playlist?list=",
  62. feedURLs: []string{},
  63. },
  64. // Non-Youtube URL
  65. {
  66. websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
  67. feedURLs: []string{},
  68. },
  69. // Invalid URL
  70. {
  71. websiteURL: "https://example|org/",
  72. feedURLs: []string{},
  73. discoveryError: true,
  74. },
  75. }
  76. for _, scenario := range scenarios {
  77. subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTube(scenario.websiteURL)
  78. if scenario.discoveryError {
  79. if localizedError == nil {
  80. t.Fatalf(`Parsing an invalid URL should return an error`)
  81. }
  82. }
  83. if len(scenario.feedURLs) == 0 {
  84. if len(subscriptions) > 0 {
  85. t.Fatalf(`Parsing an invalid URL should not return any subscription: %q -> %v`, scenario.websiteURL, subscriptions)
  86. }
  87. } else {
  88. if localizedError != nil {
  89. t.Fatalf(`Parsing a correctly formatted YouTube playlist or channel page should not return any error: %v`, localizedError)
  90. }
  91. if len(subscriptions) != len(scenario.feedURLs) {
  92. t.Fatalf(`Incorrect number of subscriptions returned, expected %d, got %d`, len(scenario.feedURLs), len(subscriptions))
  93. }
  94. for i := range scenario.feedURLs {
  95. if subscriptions[i].URL != scenario.feedURLs[i] {
  96. t.Errorf(`Unexpected feed, got %s, instead of %s`, subscriptions[i].URL, scenario.feedURLs[i])
  97. }
  98. }
  99. }
  100. }
  101. }
  102. func TestParseWebPageWithRssFeed(t *testing.T) {
  103. htmlPage := `
  104. <!doctype html>
  105. <html>
  106. <head>
  107. <link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
  108. </head>
  109. <body>
  110. </body>
  111. </html>`
  112. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  113. if shouldNeverHappenErr != nil {
  114. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  115. }
  116. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  117. if err != nil {
  118. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  119. }
  120. if len(subscriptions) != 1 {
  121. t.Fatal(`Incorrect number of subscriptions returned`)
  122. }
  123. if subscriptions[0].Title != "Some Title" {
  124. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  125. }
  126. if subscriptions[0].URL != "http://example.org/rss" {
  127. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  128. }
  129. if subscriptions[0].Type != "rss" {
  130. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  131. }
  132. }
  133. func TestParseWebPageWithAtomFeed(t *testing.T) {
  134. htmlPage := `
  135. <!doctype html>
  136. <html>
  137. <head>
  138. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
  139. </head>
  140. <body>
  141. </body>
  142. </html>`
  143. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  144. if shouldNeverHappenErr != nil {
  145. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  146. }
  147. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  148. if err != nil {
  149. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  150. }
  151. if len(subscriptions) != 1 {
  152. t.Fatal(`Incorrect number of subscriptions returned`)
  153. }
  154. if subscriptions[0].Title != "Some Title" {
  155. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  156. }
  157. if subscriptions[0].URL != "http://example.org/atom.xml" {
  158. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  159. }
  160. if subscriptions[0].Type != "atom" {
  161. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  162. }
  163. }
  164. func TestParseWebPageWithJSONFeed(t *testing.T) {
  165. htmlPage := `
  166. <!doctype html>
  167. <html>
  168. <head>
  169. <link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  170. </head>
  171. <body>
  172. </body>
  173. </html>`
  174. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  175. if shouldNeverHappenErr != nil {
  176. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  177. }
  178. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  179. if err != nil {
  180. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  181. }
  182. if len(subscriptions) != 1 {
  183. t.Fatal(`Incorrect number of subscriptions returned`)
  184. }
  185. if subscriptions[0].Title != "Some Title" {
  186. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  187. }
  188. if subscriptions[0].URL != "http://example.org/feed.json" {
  189. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  190. }
  191. if subscriptions[0].Type != "json" {
  192. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  193. }
  194. }
  195. func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
  196. htmlPage := `
  197. <!doctype html>
  198. <html>
  199. <head>
  200. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
  201. </head>
  202. <body>
  203. </body>
  204. </html>`
  205. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  206. if shouldNeverHappenErr != nil {
  207. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  208. }
  209. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  210. if err != nil {
  211. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  212. }
  213. if len(subscriptions) != 1 {
  214. t.Fatal(`Incorrect number of subscriptions returned`)
  215. }
  216. if subscriptions[0].Title != "Some Title" {
  217. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  218. }
  219. if subscriptions[0].URL != "http://example.org/feed.json" {
  220. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  221. }
  222. if subscriptions[0].Type != "json" {
  223. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  224. }
  225. }
  226. func TestParseWebPageWithJSONFeedWpJsonIgnored(t *testing.T) {
  227. htmlPage := `
  228. <!doctype html>
  229. <html>
  230. <head>
  231. <link rel="https://api.w.org/" href="https://example.org/wp-json/" />
  232. <link rel="alternate" title="JSON" type="application/json" href="https://example.org/wp-json/wp/v2/posts/123456" />
  233. </head>
  234. <body>
  235. </body>
  236. </html>`
  237. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  238. if shouldNeverHappenErr != nil {
  239. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  240. }
  241. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  242. if err != nil {
  243. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  244. }
  245. if len(subscriptions) != 0 {
  246. t.Fatal(`Incorrect number of subscriptions returned`)
  247. }
  248. }
  249. func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
  250. htmlPage := `
  251. <!doctype html>
  252. <html>
  253. <head>
  254. <link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
  255. </head>
  256. <body>
  257. </body>
  258. </html>`
  259. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  260. if shouldNeverHappenErr != nil {
  261. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  262. }
  263. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  264. if err != nil {
  265. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  266. }
  267. if len(subscriptions) != 1 {
  268. t.Fatal(`Incorrect number of subscriptions returned`)
  269. }
  270. if subscriptions[0].Title != "Some Title" {
  271. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  272. }
  273. if subscriptions[0].URL != "http://example.org/feed.json" {
  274. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  275. }
  276. if subscriptions[0].Type != "json" {
  277. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  278. }
  279. }
  280. func TestParseWebPageWithEmptyTitle(t *testing.T) {
  281. htmlPage := `
  282. <!doctype html>
  283. <html>
  284. <head>
  285. <link href="/feed.json" rel="alternate" type="application/feed+json">
  286. </head>
  287. <body>
  288. </body>
  289. </html>`
  290. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  291. if shouldNeverHappenErr != nil {
  292. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  293. }
  294. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  295. if err != nil {
  296. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  297. }
  298. if len(subscriptions) != 1 {
  299. t.Fatal(`Incorrect number of subscriptions returned`)
  300. }
  301. if subscriptions[0].Title != "http://example.org/feed.json" {
  302. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  303. }
  304. if subscriptions[0].URL != "http://example.org/feed.json" {
  305. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  306. }
  307. if subscriptions[0].Type != "json" {
  308. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  309. }
  310. }
  311. func TestParseWebPageWithMultipleFeeds(t *testing.T) {
  312. htmlPage := `
  313. <!doctype html>
  314. <html>
  315. <head>
  316. <link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
  317. <link href="http://example.org/feed.json" rel="alternate" type="application/json" title="JSON Feed">
  318. </head>
  319. <body>
  320. </body>
  321. </html>`
  322. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  323. if shouldNeverHappenErr != nil {
  324. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  325. }
  326. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  327. if err != nil {
  328. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  329. }
  330. if len(subscriptions) != 2 {
  331. t.Fatal(`Incorrect number of subscriptions returned`)
  332. }
  333. }
  334. func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
  335. htmlPage := `
  336. <!doctype html>
  337. <html>
  338. <head>
  339. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed A">
  340. <link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed B">
  341. </head>
  342. <body>
  343. </body>
  344. </html>`
  345. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  346. if shouldNeverHappenErr != nil {
  347. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  348. }
  349. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  350. if err != nil {
  351. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  352. }
  353. if len(subscriptions) != 1 {
  354. t.Fatal(`Incorrect number of subscriptions returned`)
  355. }
  356. if subscriptions[0].Title != "Feed A" {
  357. t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
  358. }
  359. if subscriptions[0].URL != "http://example.org/feed.xml" {
  360. t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
  361. }
  362. if subscriptions[0].Type != "rss" {
  363. t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
  364. }
  365. }
  366. func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
  367. htmlPage := `
  368. <!doctype html>
  369. <html>
  370. <head>
  371. <link href rel="alternate" type="application/feed+json" title="Some Title">
  372. </head>
  373. <body>
  374. </body>
  375. </html>`
  376. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  377. if shouldNeverHappenErr != nil {
  378. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  379. }
  380. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  381. if err != nil {
  382. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  383. }
  384. if len(subscriptions) != 0 {
  385. t.Fatal(`Incorrect number of subscriptions returned`)
  386. }
  387. }
  388. func TestParseWebPageWithNoHref(t *testing.T) {
  389. htmlPage := `
  390. <!doctype html>
  391. <html>
  392. <head>
  393. <link rel="alternate" type="application/feed+json" title="Some Title">
  394. </head>
  395. <body>
  396. </body>
  397. </html>`
  398. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  399. if shouldNeverHappenErr != nil {
  400. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  401. }
  402. subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", doc)
  403. if err != nil {
  404. t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
  405. }
  406. if len(subscriptions) != 0 {
  407. t.Fatal(`Incorrect number of subscriptions returned`)
  408. }
  409. }
  410. func TestFindCanonicalURL(t *testing.T) {
  411. htmlPage := `
  412. <!doctype html>
  413. <html>
  414. <head>
  415. <link rel="canonical" href="https://example.org/canonical-page">
  416. </head>
  417. <body>
  418. </body>
  419. </html>`
  420. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  421. if shouldNeverHappenErr != nil {
  422. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  423. }
  424. canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "http://example.org", doc)
  425. if canonicalURL != "https://example.org/canonical-page" {
  426. t.Errorf(`Unexpected canonical URL, got %q, expected %q`, canonicalURL, "https://example.org/canonical-page")
  427. }
  428. }
  429. func TestFindCanonicalURLNotFound(t *testing.T) {
  430. htmlPage := `
  431. <!doctype html>
  432. <html>
  433. <head>
  434. </head>
  435. <body>
  436. </body>
  437. </html>`
  438. doc, shouldNeverHappenErr := parseHTMLDocument("text/html", []byte(htmlPage))
  439. if shouldNeverHappenErr != nil {
  440. t.Fatalf(`Unable to parse the HTML: %v`, shouldNeverHappenErr)
  441. }
  442. canonicalURL := NewSubscriptionFinder(nil).findCanonicalURL("https://example.org/page", "https://example.org", doc)
  443. if canonicalURL != "https://example.org/page" {
  444. t.Errorf(`Expected effective URL when canonical not found, got %q`, canonicalURL)
  445. }
  446. }