parser_test.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rss
  5. import (
  6. "bytes"
  7. "testing"
  8. "time"
  9. )
  10. func TestParseRss2Sample(t *testing.T) {
  11. data := `
  12. <?xml version="1.0"?>
  13. <rss version="2.0">
  14. <channel>
  15. <title>Liftoff News</title>
  16. <link>http://liftoff.msfc.nasa.gov/</link>
  17. <description>Liftoff to Space Exploration.</description>
  18. <language>en-us</language>
  19. <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
  20. <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
  21. <docs>http://blogs.law.harvard.edu/tech/rss</docs>
  22. <generator>Weblog Editor 2.0</generator>
  23. <managingEditor>editor@example.com</managingEditor>
  24. <webMaster>webmaster@example.com</webMaster>
  25. <item>
  26. <title>Star City</title>
  27. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  28. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  29. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  30. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  31. </item>
  32. <item>
  33. <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
  34. <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
  35. <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
  36. </item>
  37. <item>
  38. <title>The Engine That Does More</title>
  39. <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
  40. <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
  41. <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
  42. <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
  43. </item>
  44. <item>
  45. <title>Astronauts' Dirty Laundry</title>
  46. <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
  47. <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
  48. <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
  49. <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
  50. </item>
  51. </channel>
  52. </rss>`
  53. feed, err := Parse(bytes.NewBufferString(data))
  54. if err != nil {
  55. t.Error(err)
  56. }
  57. if feed.Title != "Liftoff News" {
  58. t.Errorf("Incorrect title, got: %s", feed.Title)
  59. }
  60. if feed.FeedURL != "" {
  61. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  62. }
  63. if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
  64. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  65. }
  66. if len(feed.Entries) != 4 {
  67. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  68. }
  69. expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
  70. if !feed.Entries[0].Date.Equal(expectedDate) {
  71. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  72. }
  73. if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
  74. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  75. }
  76. if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
  77. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  78. }
  79. if feed.Entries[0].Title != "Star City" {
  80. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  81. }
  82. if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.` {
  83. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  84. }
  85. }
  86. func TestParseFeedWithoutTitle(t *testing.T) {
  87. data := `<?xml version="1.0" encoding="utf-8"?>
  88. <rss version="2.0">
  89. <channel>
  90. <link>https://example.org/</link>
  91. </channel>
  92. </rss>`
  93. feed, err := Parse(bytes.NewBufferString(data))
  94. if err != nil {
  95. t.Error(err)
  96. }
  97. if feed.Title != "https://example.org/" {
  98. t.Errorf("Incorrect feed title, got: %s", feed.Title)
  99. }
  100. }
  101. func TestParseEntryWithoutTitle(t *testing.T) {
  102. data := `<?xml version="1.0" encoding="utf-8"?>
  103. <rss version="2.0">
  104. <channel>
  105. <link>https://example.org/</link>
  106. <item>
  107. <link>https://example.org/item</link>
  108. </item>
  109. </channel>
  110. </rss>`
  111. feed, err := Parse(bytes.NewBufferString(data))
  112. if err != nil {
  113. t.Error(err)
  114. }
  115. if feed.Entries[0].Title != "https://example.org/item" {
  116. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  117. }
  118. }
  119. func TestParseEntryWithoutLink(t *testing.T) {
  120. data := `<?xml version="1.0" encoding="utf-8"?>
  121. <rss version="2.0">
  122. <channel>
  123. <link>https://example.org/</link>
  124. <item>
  125. <guid isPermaLink="false">1234</guid>
  126. </item>
  127. </channel>
  128. </rss>`
  129. feed, err := Parse(bytes.NewBufferString(data))
  130. if err != nil {
  131. t.Error(err)
  132. }
  133. if feed.Entries[0].URL != "https://example.org/" {
  134. t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
  135. }
  136. if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" {
  137. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  138. }
  139. }
  140. func TestParseEntryWithAtomLink(t *testing.T) {
  141. data := `<?xml version="1.0" encoding="utf-8"?>
  142. <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  143. <channel>
  144. <link>https://example.org/</link>
  145. <item>
  146. <title>Test</title>
  147. <atom:link href="https://example.org/item" />
  148. </item>
  149. </channel>
  150. </rss>`
  151. feed, err := Parse(bytes.NewBufferString(data))
  152. if err != nil {
  153. t.Error(err)
  154. }
  155. if feed.Entries[0].URL != "https://example.org/item" {
  156. t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
  157. }
  158. }
  159. func TestParseEntryWithMultipleAtomLinks(t *testing.T) {
  160. data := `<?xml version="1.0" encoding="utf-8"?>
  161. <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  162. <channel>
  163. <link>https://example.org/</link>
  164. <item>
  165. <title>Test</title>
  166. <atom:link rel="payment" href="https://example.org/a" />
  167. <atom:link rel="http://foobar.tld" href="https://example.org/b" />
  168. </item>
  169. </channel>
  170. </rss>`
  171. feed, err := Parse(bytes.NewBufferString(data))
  172. if err != nil {
  173. t.Error(err)
  174. }
  175. if feed.Entries[0].URL != "https://example.org/b" {
  176. t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
  177. }
  178. }
  179. func TestParseFeedURLWithAtomLink(t *testing.T) {
  180. data := `<?xml version="1.0" encoding="utf-8"?>
  181. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  182. <channel>
  183. <title>Example</title>
  184. <link>https://example.org/</link>
  185. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  186. </channel>
  187. </rss>`
  188. feed, err := Parse(bytes.NewBufferString(data))
  189. if err != nil {
  190. t.Error(err)
  191. }
  192. if feed.FeedURL != "https://example.org/rss" {
  193. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  194. }
  195. if feed.SiteURL != "https://example.org/" {
  196. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  197. }
  198. }
  199. func TestParseEntryWithAtomAuthor(t *testing.T) {
  200. data := `<?xml version="1.0" encoding="utf-8"?>
  201. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  202. <channel>
  203. <title>Example</title>
  204. <link>https://example.org/</link>
  205. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  206. <item>
  207. <title>Test</title>
  208. <link>https://example.org/item</link>
  209. <author xmlns:author="http://www.w3.org/2005/Atom">
  210. <name>Foo Bar</name>
  211. <title>Vice President</title>
  212. <department/>
  213. <company>FooBar Inc.</company>
  214. </author>
  215. </item>
  216. </channel>
  217. </rss>`
  218. feed, err := Parse(bytes.NewBufferString(data))
  219. if err != nil {
  220. t.Error(err)
  221. }
  222. if feed.Entries[0].Author != "Foo Bar" {
  223. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  224. }
  225. }
  226. func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
  227. data := `<?xml version="1.0" encoding="utf-8"?>
  228. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  229. <channel>
  230. <title>Example</title>
  231. <link>https://example.org/</link>
  232. <item>
  233. <title>Test</title>
  234. <link>https://example.org/item</link>
  235. <dc:creator>Me (me@example.com)</dc:creator>
  236. </item>
  237. </channel>
  238. </rss>`
  239. feed, err := Parse(bytes.NewBufferString(data))
  240. if err != nil {
  241. t.Error(err)
  242. }
  243. if feed.Entries[0].Author != "Me (me@example.com)" {
  244. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  245. }
  246. }
  247. func TestParseEntryWithItunesAuthor(t *testing.T) {
  248. data := `<?xml version="1.0" encoding="utf-8"?>
  249. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  250. <channel>
  251. <title>Example</title>
  252. <link>https://example.org/</link>
  253. <item>
  254. <title>Test</title>
  255. <link>https://example.org/item</link>
  256. <itunes:author>Someone</itunes:author>
  257. </item>
  258. </channel>
  259. </rss>`
  260. feed, err := Parse(bytes.NewBufferString(data))
  261. if err != nil {
  262. t.Error(err)
  263. }
  264. if feed.Entries[0].Author != "Someone" {
  265. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  266. }
  267. }
  268. func TestParseFeedWithItunesAuthor(t *testing.T) {
  269. data := `<?xml version="1.0" encoding="utf-8"?>
  270. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  271. <channel>
  272. <title>Example</title>
  273. <link>https://example.org/</link>
  274. <itunes:author>Someone</itunes:author>
  275. <item>
  276. <title>Test</title>
  277. <link>https://example.org/item</link>
  278. </item>
  279. </channel>
  280. </rss>`
  281. feed, err := Parse(bytes.NewBufferString(data))
  282. if err != nil {
  283. t.Error(err)
  284. }
  285. if feed.Entries[0].Author != "Someone" {
  286. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  287. }
  288. }
  289. func TestParseEntryWithDublinCoreDate(t *testing.T) {
  290. data := `<?xml version="1.0" encoding="utf-8"?>
  291. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  292. <channel>
  293. <title>Example</title>
  294. <link>http://example.org/</link>
  295. <item>
  296. <title>Item 1</title>
  297. <link>http://example.org/item1</link>
  298. <description>Description.</description>
  299. <guid isPermaLink="false">UUID</guid>
  300. <dc:date>2002-09-29T23:40:06-05:00</dc:date>
  301. </item>
  302. </channel>
  303. </rss>`
  304. feed, err := Parse(bytes.NewBufferString(data))
  305. if err != nil {
  306. t.Error(err)
  307. }
  308. location, _ := time.LoadLocation("EST")
  309. expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
  310. if !feed.Entries[0].Date.Equal(expectedDate) {
  311. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  312. }
  313. }
  314. func TestParseEntryWithContentEncoded(t *testing.T) {
  315. data := `<?xml version="1.0" encoding="utf-8"?>
  316. <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  317. <channel>
  318. <title>Example</title>
  319. <link>http://example.org/</link>
  320. <item>
  321. <title>Item 1</title>
  322. <link>http://example.org/item1</link>
  323. <description>Description.</description>
  324. <guid isPermaLink="false">UUID</guid>
  325. <content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
  326. </item>
  327. </channel>
  328. </rss>`
  329. feed, err := Parse(bytes.NewBufferString(data))
  330. if err != nil {
  331. t.Error(err)
  332. }
  333. if feed.Entries[0].Content != `<p><a href="http://www.example.org/">Example</a>.</p>` {
  334. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  335. }
  336. }
  337. func TestParseEntryWithFeedBurnerLink(t *testing.T) {
  338. data := `<?xml version="1.0" encoding="utf-8"?>
  339. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  340. <channel>
  341. <title>Example</title>
  342. <link>http://example.org/</link>
  343. <item>
  344. <title>Item 1</title>
  345. <link>http://example.org/item1</link>
  346. <feedburner:origLink>http://example.org/original</feedburner:origLink>
  347. </item>
  348. </channel>
  349. </rss>`
  350. feed, err := Parse(bytes.NewBufferString(data))
  351. if err != nil {
  352. t.Error(err)
  353. }
  354. if feed.Entries[0].URL != "http://example.org/original" {
  355. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
  356. }
  357. }
  358. func TestParseEntryTitleWithWhitespaces(t *testing.T) {
  359. data := `<?xml version="1.0" encoding="utf-8"?>
  360. <rss version="2.0">
  361. <channel>
  362. <title>Example</title>
  363. <link>http://example.org</link>
  364. <item>
  365. <title>
  366. Some Title
  367. </title>
  368. <link>http://www.example.org/entries/1</link>
  369. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  370. </item>
  371. </channel>
  372. </rss>`
  373. feed, err := Parse(bytes.NewBufferString(data))
  374. if err != nil {
  375. t.Error(err)
  376. }
  377. if feed.Entries[0].Title != "Some Title" {
  378. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  379. }
  380. }
  381. func TestParseEntryWithEnclosures(t *testing.T) {
  382. data := `<?xml version="1.0" encoding="utf-8"?>
  383. <rss version="2.0">
  384. <channel>
  385. <title>My Podcast Feed</title>
  386. <link>http://example.org</link>
  387. <author>some.email@example.org</author>
  388. <item>
  389. <title>Podcasting with RSS</title>
  390. <link>http://www.example.org/entries/1</link>
  391. <description>An overview of RSS podcasting</description>
  392. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  393. <guid isPermaLink="true">http://www.example.org/entries/1</guid>
  394. <enclosure url="http://www.example.org/myaudiofile.mp3"
  395. length="12345"
  396. type="audio/mpeg" />
  397. </item>
  398. </channel>
  399. </rss>`
  400. feed, err := Parse(bytes.NewBufferString(data))
  401. if err != nil {
  402. t.Error(err)
  403. }
  404. if len(feed.Entries) != 1 {
  405. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  406. }
  407. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  408. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  409. }
  410. if len(feed.Entries[0].Enclosures) != 1 {
  411. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  412. }
  413. if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
  414. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  415. }
  416. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  417. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  418. }
  419. if feed.Entries[0].Enclosures[0].Size != 12345 {
  420. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  421. }
  422. }
  423. func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
  424. data := `<?xml version="1.0" encoding="utf-8"?>
  425. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  426. <channel>
  427. <title>My Example Feed</title>
  428. <link>http://example.org</link>
  429. <author>some.email@example.org</author>
  430. <item>
  431. <title>Example Item</title>
  432. <link>http://www.example.org/entries/1</link>
  433. <enclosure
  434. url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
  435. length="76192460"
  436. type="audio/mpeg" />
  437. <feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
  438. </item>
  439. </channel>
  440. </rss>`
  441. feed, err := Parse(bytes.NewBufferString(data))
  442. if err != nil {
  443. t.Error(err)
  444. }
  445. if len(feed.Entries) != 1 {
  446. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  447. }
  448. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  449. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  450. }
  451. if len(feed.Entries[0].Enclosures) != 1 {
  452. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  453. }
  454. if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
  455. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  456. }
  457. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  458. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  459. }
  460. if feed.Entries[0].Enclosures[0].Size != 76192460 {
  461. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  462. }
  463. }
  464. func TestParseEntryWithRelativeURL(t *testing.T) {
  465. data := `<?xml version="1.0" encoding="utf-8"?>
  466. <rss version="2.0">
  467. <channel>
  468. <link>https://example.org/</link>
  469. <item>
  470. <link>item.html</link>
  471. </item>
  472. </channel>
  473. </rss>`
  474. feed, err := Parse(bytes.NewBufferString(data))
  475. if err != nil {
  476. t.Error(err)
  477. }
  478. if feed.Entries[0].Title != "https://example.org/item.html" {
  479. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  480. }
  481. }
  482. func TestParseInvalidXml(t *testing.T) {
  483. data := `garbage`
  484. _, err := Parse(bytes.NewBufferString(data))
  485. if err == nil {
  486. t.Error("Parse should returns an error")
  487. }
  488. }