parser_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rss
  5. import (
  6. "bytes"
  7. "testing"
  8. "time"
  9. )
  10. func TestParseRss2Sample(t *testing.T) {
  11. data := `
  12. <?xml version="1.0"?>
  13. <rss version="2.0">
  14. <channel>
  15. <title>Liftoff News</title>
  16. <link>http://liftoff.msfc.nasa.gov/</link>
  17. <description>Liftoff to Space Exploration.</description>
  18. <language>en-us</language>
  19. <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
  20. <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
  21. <docs>http://blogs.law.harvard.edu/tech/rss</docs>
  22. <generator>Weblog Editor 2.0</generator>
  23. <managingEditor>editor@example.com</managingEditor>
  24. <webMaster>webmaster@example.com</webMaster>
  25. <item>
  26. <title>Star City</title>
  27. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  28. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  29. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  30. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  31. </item>
  32. <item>
  33. <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
  34. <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
  35. <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
  36. </item>
  37. <item>
  38. <title>The Engine That Does More</title>
  39. <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
  40. <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
  41. <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
  42. <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
  43. </item>
  44. <item>
  45. <title>Astronauts' Dirty Laundry</title>
  46. <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
  47. <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
  48. <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
  49. <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
  50. </item>
  51. </channel>
  52. </rss>`
  53. feed, err := Parse(bytes.NewBufferString(data))
  54. if err != nil {
  55. t.Error(err)
  56. }
  57. if feed.Title != "Liftoff News" {
  58. t.Errorf("Incorrect title, got: %s", feed.Title)
  59. }
  60. if feed.FeedURL != "" {
  61. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  62. }
  63. if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
  64. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  65. }
  66. if len(feed.Entries) != 4 {
  67. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  68. }
  69. expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
  70. if !feed.Entries[0].Date.Equal(expectedDate) {
  71. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  72. }
  73. if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
  74. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  75. }
  76. if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
  77. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  78. }
  79. if feed.Entries[0].Title != "Star City" {
  80. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  81. }
  82. if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
  83. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  84. }
  85. }
  86. func TestParseFeedWithoutTitle(t *testing.T) {
  87. data := `<?xml version="1.0" encoding="utf-8"?>
  88. <rss version="2.0">
  89. <channel>
  90. <link>https://example.org/</link>
  91. </channel>
  92. </rss>`
  93. feed, err := Parse(bytes.NewBufferString(data))
  94. if err != nil {
  95. t.Error(err)
  96. }
  97. if feed.Title != "https://example.org/" {
  98. t.Errorf("Incorrect feed title, got: %s", feed.Title)
  99. }
  100. }
  101. func TestParseEntryWithoutTitle(t *testing.T) {
  102. data := `<?xml version="1.0" encoding="utf-8"?>
  103. <rss version="2.0">
  104. <channel>
  105. <link>https://example.org/</link>
  106. <item>
  107. <link>https://example.org/item</link>
  108. </item>
  109. </channel>
  110. </rss>`
  111. feed, err := Parse(bytes.NewBufferString(data))
  112. if err != nil {
  113. t.Error(err)
  114. }
  115. if feed.Entries[0].Title != "https://example.org/item" {
  116. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  117. }
  118. }
  119. func TestParseFeedURLWithAtomLink(t *testing.T) {
  120. data := `<?xml version="1.0" encoding="utf-8"?>
  121. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  122. <channel>
  123. <title>Example</title>
  124. <link>https://example.org/</link>
  125. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  126. </channel>
  127. </rss>`
  128. feed, err := Parse(bytes.NewBufferString(data))
  129. if err != nil {
  130. t.Error(err)
  131. }
  132. if feed.FeedURL != "https://example.org/rss" {
  133. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  134. }
  135. if feed.SiteURL != "https://example.org/" {
  136. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  137. }
  138. }
  139. func TestParseEntryWithAtomAuthor(t *testing.T) {
  140. data := `<?xml version="1.0" encoding="utf-8"?>
  141. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  142. <channel>
  143. <title>Example</title>
  144. <link>https://example.org/</link>
  145. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  146. <item>
  147. <title>Test</title>
  148. <link>https://example.org/item</link>
  149. <author xmlns:author="http://www.w3.org/2005/Atom">
  150. <name>Foo Bar</name>
  151. <title>Vice President</title>
  152. <department/>
  153. <company>FooBar Inc.</company>
  154. </author>
  155. </item>
  156. </channel>
  157. </rss>`
  158. feed, err := Parse(bytes.NewBufferString(data))
  159. if err != nil {
  160. t.Error(err)
  161. }
  162. if feed.Entries[0].Author != "Foo Bar" {
  163. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  164. }
  165. }
  166. func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
  167. data := `<?xml version="1.0" encoding="utf-8"?>
  168. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  169. <channel>
  170. <title>Example</title>
  171. <link>https://example.org/</link>
  172. <item>
  173. <title>Test</title>
  174. <link>https://example.org/item</link>
  175. <dc:creator>Me (me@example.com)</dc:creator>
  176. </item>
  177. </channel>
  178. </rss>`
  179. feed, err := Parse(bytes.NewBufferString(data))
  180. if err != nil {
  181. t.Error(err)
  182. }
  183. if feed.Entries[0].Author != "Me (me@example.com)" {
  184. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  185. }
  186. }
  187. func TestParseEntryWithItunesAuthor(t *testing.T) {
  188. data := `<?xml version="1.0" encoding="utf-8"?>
  189. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  190. <channel>
  191. <title>Example</title>
  192. <link>https://example.org/</link>
  193. <item>
  194. <title>Test</title>
  195. <link>https://example.org/item</link>
  196. <itunes:author>Someone</itunes:author>
  197. </item>
  198. </channel>
  199. </rss>`
  200. feed, err := Parse(bytes.NewBufferString(data))
  201. if err != nil {
  202. t.Error(err)
  203. }
  204. if feed.Entries[0].Author != "Someone" {
  205. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  206. }
  207. }
  208. func TestParseFeedWithItunesAuthor(t *testing.T) {
  209. data := `<?xml version="1.0" encoding="utf-8"?>
  210. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  211. <channel>
  212. <title>Example</title>
  213. <link>https://example.org/</link>
  214. <itunes:author>Someone</itunes:author>
  215. <item>
  216. <title>Test</title>
  217. <link>https://example.org/item</link>
  218. </item>
  219. </channel>
  220. </rss>`
  221. feed, err := Parse(bytes.NewBufferString(data))
  222. if err != nil {
  223. t.Error(err)
  224. }
  225. if feed.Entries[0].Author != "Someone" {
  226. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  227. }
  228. }
  229. func TestParseEntryWithDublinCoreDate(t *testing.T) {
  230. data := `<?xml version="1.0" encoding="utf-8"?>
  231. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  232. <channel>
  233. <title>Example</title>
  234. <link>http://example.org/</link>
  235. <item>
  236. <title>Item 1</title>
  237. <link>http://example.org/item1</link>
  238. <description>Description.</description>
  239. <guid isPermaLink="false">UUID</guid>
  240. <dc:date>2002-09-29T23:40:06-05:00</dc:date>
  241. </item>
  242. </channel>
  243. </rss>`
  244. feed, err := Parse(bytes.NewBufferString(data))
  245. if err != nil {
  246. t.Error(err)
  247. }
  248. location, _ := time.LoadLocation("EST")
  249. expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
  250. if !feed.Entries[0].Date.Equal(expectedDate) {
  251. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  252. }
  253. }
  254. func TestParseEntryWithContentEncoded(t *testing.T) {
  255. data := `<?xml version="1.0" encoding="utf-8"?>
  256. <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  257. <channel>
  258. <title>Example</title>
  259. <link>http://example.org/</link>
  260. <item>
  261. <title>Item 1</title>
  262. <link>http://example.org/item1</link>
  263. <description>Description.</description>
  264. <guid isPermaLink="false">UUID</guid>
  265. <content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
  266. </item>
  267. </channel>
  268. </rss>`
  269. feed, err := Parse(bytes.NewBufferString(data))
  270. if err != nil {
  271. t.Error(err)
  272. }
  273. if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
  274. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  275. }
  276. }
  277. func TestParseEntryWithFeedBurnerLink(t *testing.T) {
  278. data := `<?xml version="1.0" encoding="utf-8"?>
  279. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  280. <channel>
  281. <title>Example</title>
  282. <link>http://example.org/</link>
  283. <item>
  284. <title>Item 1</title>
  285. <link>http://example.org/item1</link>
  286. <feedburner:origLink>http://example.org/original</feedburner:origLink>
  287. </item>
  288. </channel>
  289. </rss>`
  290. feed, err := Parse(bytes.NewBufferString(data))
  291. if err != nil {
  292. t.Error(err)
  293. }
  294. if feed.Entries[0].URL != "http://example.org/original" {
  295. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
  296. }
  297. }
  298. func TestParseEntryTitleWithWhitespaces(t *testing.T) {
  299. data := `<?xml version="1.0" encoding="utf-8"?>
  300. <rss version="2.0">
  301. <channel>
  302. <title>Example</title>
  303. <link>http://example.org</link>
  304. <item>
  305. <title>
  306. Some Title
  307. </title>
  308. <link>http://www.example.org/entries/1</link>
  309. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  310. </item>
  311. </channel>
  312. </rss>`
  313. feed, err := Parse(bytes.NewBufferString(data))
  314. if err != nil {
  315. t.Error(err)
  316. }
  317. if feed.Entries[0].Title != "Some Title" {
  318. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  319. }
  320. }
  321. func TestParseEntryWithEnclosures(t *testing.T) {
  322. data := `<?xml version="1.0" encoding="utf-8"?>
  323. <rss version="2.0">
  324. <channel>
  325. <title>My Podcast Feed</title>
  326. <link>http://example.org</link>
  327. <author>some.email@example.org</author>
  328. <item>
  329. <title>Podcasting with RSS</title>
  330. <link>http://www.example.org/entries/1</link>
  331. <description>An overview of RSS podcasting</description>
  332. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  333. <guid isPermaLink="true">http://www.example.org/entries/1</guid>
  334. <enclosure url="http://www.example.org/myaudiofile.mp3"
  335. length="12345"
  336. type="audio/mpeg" />
  337. </item>
  338. </channel>
  339. </rss>`
  340. feed, err := Parse(bytes.NewBufferString(data))
  341. if err != nil {
  342. t.Error(err)
  343. }
  344. if len(feed.Entries) != 1 {
  345. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  346. }
  347. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  348. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  349. }
  350. if len(feed.Entries[0].Enclosures) != 1 {
  351. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  352. }
  353. if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
  354. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  355. }
  356. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  357. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  358. }
  359. if feed.Entries[0].Enclosures[0].Size != 12345 {
  360. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  361. }
  362. }
  363. func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
  364. data := `<?xml version="1.0" encoding="utf-8"?>
  365. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  366. <channel>
  367. <title>My Example Feed</title>
  368. <link>http://example.org</link>
  369. <author>some.email@example.org</author>
  370. <item>
  371. <title>Example Item</title>
  372. <link>http://www.example.org/entries/1</link>
  373. <enclosure
  374. url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
  375. length="76192460"
  376. type="audio/mpeg" />
  377. <feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
  378. </item>
  379. </channel>
  380. </rss>`
  381. feed, err := Parse(bytes.NewBufferString(data))
  382. if err != nil {
  383. t.Error(err)
  384. }
  385. if len(feed.Entries) != 1 {
  386. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  387. }
  388. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  389. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  390. }
  391. if len(feed.Entries[0].Enclosures) != 1 {
  392. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  393. }
  394. if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
  395. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  396. }
  397. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  398. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  399. }
  400. if feed.Entries[0].Enclosures[0].Size != 76192460 {
  401. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  402. }
  403. }