parser_test.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rss
  5. import (
  6. "bytes"
  7. "testing"
  8. "time"
  9. )
  10. func TestParseRss2Sample(t *testing.T) {
  11. data := `
  12. <?xml version="1.0"?>
  13. <rss version="2.0">
  14. <channel>
  15. <title>Liftoff News</title>
  16. <link>http://liftoff.msfc.nasa.gov/</link>
  17. <description>Liftoff to Space Exploration.</description>
  18. <language>en-us</language>
  19. <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
  20. <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
  21. <docs>http://blogs.law.harvard.edu/tech/rss</docs>
  22. <generator>Weblog Editor 2.0</generator>
  23. <managingEditor>editor@example.com</managingEditor>
  24. <webMaster>webmaster@example.com</webMaster>
  25. <item>
  26. <title>Star City</title>
  27. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  28. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  29. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  30. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  31. </item>
  32. <item>
  33. <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
  34. <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
  35. <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
  36. </item>
  37. <item>
  38. <title>The Engine That Does More</title>
  39. <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
  40. <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
  41. <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
  42. <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
  43. </item>
  44. <item>
  45. <title>Astronauts' Dirty Laundry</title>
  46. <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
  47. <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
  48. <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
  49. <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
  50. </item>
  51. </channel>
  52. </rss>`
  53. feed, err := Parse(bytes.NewBufferString(data))
  54. if err != nil {
  55. t.Error(err)
  56. }
  57. if feed.Title != "Liftoff News" {
  58. t.Errorf("Incorrect title, got: %s", feed.Title)
  59. }
  60. if feed.FeedURL != "" {
  61. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  62. }
  63. if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
  64. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  65. }
  66. if len(feed.Entries) != 4 {
  67. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  68. }
  69. expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
  70. if !feed.Entries[0].Date.Equal(expectedDate) {
  71. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  72. }
  73. if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
  74. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  75. }
  76. if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
  77. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  78. }
  79. if feed.Entries[0].Title != "Star City" {
  80. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  81. }
  82. if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
  83. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  84. }
  85. }
  86. func TestParseFeedWithoutTitle(t *testing.T) {
  87. data := `<?xml version="1.0" encoding="utf-8"?>
  88. <rss version="2.0">
  89. <channel>
  90. <link>https://example.org/</link>
  91. </channel>
  92. </rss>`
  93. feed, err := Parse(bytes.NewBufferString(data))
  94. if err != nil {
  95. t.Error(err)
  96. }
  97. if feed.Title != "https://example.org/" {
  98. t.Errorf("Incorrect feed title, got: %s", feed.Title)
  99. }
  100. }
  101. func TestParseEntryWithoutTitle(t *testing.T) {
  102. data := `<?xml version="1.0" encoding="utf-8"?>
  103. <rss version="2.0">
  104. <channel>
  105. <link>https://example.org/</link>
  106. <item>
  107. <link>https://example.org/item</link>
  108. </item>
  109. </channel>
  110. </rss>`
  111. feed, err := Parse(bytes.NewBufferString(data))
  112. if err != nil {
  113. t.Error(err)
  114. }
  115. if feed.Entries[0].Title != "https://example.org/item" {
  116. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  117. }
  118. }
  119. func TestParseEntryWithoutLink(t *testing.T) {
  120. data := `<?xml version="1.0" encoding="utf-8"?>
  121. <rss version="2.0">
  122. <channel>
  123. <link>https://example.org/</link>
  124. <item>
  125. <guid isPermaLink="false">1234</guid>
  126. </item>
  127. </channel>
  128. </rss>`
  129. feed, err := Parse(bytes.NewBufferString(data))
  130. if err != nil {
  131. t.Error(err)
  132. }
  133. if feed.Entries[0].URL != "https://example.org/" {
  134. t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
  135. }
  136. if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" {
  137. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  138. }
  139. }
  140. func TestParseFeedURLWithAtomLink(t *testing.T) {
  141. data := `<?xml version="1.0" encoding="utf-8"?>
  142. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  143. <channel>
  144. <title>Example</title>
  145. <link>https://example.org/</link>
  146. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  147. </channel>
  148. </rss>`
  149. feed, err := Parse(bytes.NewBufferString(data))
  150. if err != nil {
  151. t.Error(err)
  152. }
  153. if feed.FeedURL != "https://example.org/rss" {
  154. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  155. }
  156. if feed.SiteURL != "https://example.org/" {
  157. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  158. }
  159. }
  160. func TestParseEntryWithAtomAuthor(t *testing.T) {
  161. data := `<?xml version="1.0" encoding="utf-8"?>
  162. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  163. <channel>
  164. <title>Example</title>
  165. <link>https://example.org/</link>
  166. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  167. <item>
  168. <title>Test</title>
  169. <link>https://example.org/item</link>
  170. <author xmlns:author="http://www.w3.org/2005/Atom">
  171. <name>Foo Bar</name>
  172. <title>Vice President</title>
  173. <department/>
  174. <company>FooBar Inc.</company>
  175. </author>
  176. </item>
  177. </channel>
  178. </rss>`
  179. feed, err := Parse(bytes.NewBufferString(data))
  180. if err != nil {
  181. t.Error(err)
  182. }
  183. if feed.Entries[0].Author != "Foo Bar" {
  184. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  185. }
  186. }
  187. func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
  188. data := `<?xml version="1.0" encoding="utf-8"?>
  189. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  190. <channel>
  191. <title>Example</title>
  192. <link>https://example.org/</link>
  193. <item>
  194. <title>Test</title>
  195. <link>https://example.org/item</link>
  196. <dc:creator>Me (me@example.com)</dc:creator>
  197. </item>
  198. </channel>
  199. </rss>`
  200. feed, err := Parse(bytes.NewBufferString(data))
  201. if err != nil {
  202. t.Error(err)
  203. }
  204. if feed.Entries[0].Author != "Me (me@example.com)" {
  205. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  206. }
  207. }
  208. func TestParseEntryWithItunesAuthor(t *testing.T) {
  209. data := `<?xml version="1.0" encoding="utf-8"?>
  210. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  211. <channel>
  212. <title>Example</title>
  213. <link>https://example.org/</link>
  214. <item>
  215. <title>Test</title>
  216. <link>https://example.org/item</link>
  217. <itunes:author>Someone</itunes:author>
  218. </item>
  219. </channel>
  220. </rss>`
  221. feed, err := Parse(bytes.NewBufferString(data))
  222. if err != nil {
  223. t.Error(err)
  224. }
  225. if feed.Entries[0].Author != "Someone" {
  226. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  227. }
  228. }
  229. func TestParseFeedWithItunesAuthor(t *testing.T) {
  230. data := `<?xml version="1.0" encoding="utf-8"?>
  231. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  232. <channel>
  233. <title>Example</title>
  234. <link>https://example.org/</link>
  235. <itunes:author>Someone</itunes:author>
  236. <item>
  237. <title>Test</title>
  238. <link>https://example.org/item</link>
  239. </item>
  240. </channel>
  241. </rss>`
  242. feed, err := Parse(bytes.NewBufferString(data))
  243. if err != nil {
  244. t.Error(err)
  245. }
  246. if feed.Entries[0].Author != "Someone" {
  247. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  248. }
  249. }
  250. func TestParseEntryWithDublinCoreDate(t *testing.T) {
  251. data := `<?xml version="1.0" encoding="utf-8"?>
  252. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  253. <channel>
  254. <title>Example</title>
  255. <link>http://example.org/</link>
  256. <item>
  257. <title>Item 1</title>
  258. <link>http://example.org/item1</link>
  259. <description>Description.</description>
  260. <guid isPermaLink="false">UUID</guid>
  261. <dc:date>2002-09-29T23:40:06-05:00</dc:date>
  262. </item>
  263. </channel>
  264. </rss>`
  265. feed, err := Parse(bytes.NewBufferString(data))
  266. if err != nil {
  267. t.Error(err)
  268. }
  269. location, _ := time.LoadLocation("EST")
  270. expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
  271. if !feed.Entries[0].Date.Equal(expectedDate) {
  272. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  273. }
  274. }
  275. func TestParseEntryWithContentEncoded(t *testing.T) {
  276. data := `<?xml version="1.0" encoding="utf-8"?>
  277. <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  278. <channel>
  279. <title>Example</title>
  280. <link>http://example.org/</link>
  281. <item>
  282. <title>Item 1</title>
  283. <link>http://example.org/item1</link>
  284. <description>Description.</description>
  285. <guid isPermaLink="false">UUID</guid>
  286. <content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
  287. </item>
  288. </channel>
  289. </rss>`
  290. feed, err := Parse(bytes.NewBufferString(data))
  291. if err != nil {
  292. t.Error(err)
  293. }
  294. if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
  295. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  296. }
  297. }
  298. func TestParseEntryWithFeedBurnerLink(t *testing.T) {
  299. data := `<?xml version="1.0" encoding="utf-8"?>
  300. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  301. <channel>
  302. <title>Example</title>
  303. <link>http://example.org/</link>
  304. <item>
  305. <title>Item 1</title>
  306. <link>http://example.org/item1</link>
  307. <feedburner:origLink>http://example.org/original</feedburner:origLink>
  308. </item>
  309. </channel>
  310. </rss>`
  311. feed, err := Parse(bytes.NewBufferString(data))
  312. if err != nil {
  313. t.Error(err)
  314. }
  315. if feed.Entries[0].URL != "http://example.org/original" {
  316. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
  317. }
  318. }
  319. func TestParseEntryTitleWithWhitespaces(t *testing.T) {
  320. data := `<?xml version="1.0" encoding="utf-8"?>
  321. <rss version="2.0">
  322. <channel>
  323. <title>Example</title>
  324. <link>http://example.org</link>
  325. <item>
  326. <title>
  327. Some Title
  328. </title>
  329. <link>http://www.example.org/entries/1</link>
  330. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  331. </item>
  332. </channel>
  333. </rss>`
  334. feed, err := Parse(bytes.NewBufferString(data))
  335. if err != nil {
  336. t.Error(err)
  337. }
  338. if feed.Entries[0].Title != "Some Title" {
  339. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  340. }
  341. }
  342. func TestParseEntryWithEnclosures(t *testing.T) {
  343. data := `<?xml version="1.0" encoding="utf-8"?>
  344. <rss version="2.0">
  345. <channel>
  346. <title>My Podcast Feed</title>
  347. <link>http://example.org</link>
  348. <author>some.email@example.org</author>
  349. <item>
  350. <title>Podcasting with RSS</title>
  351. <link>http://www.example.org/entries/1</link>
  352. <description>An overview of RSS podcasting</description>
  353. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  354. <guid isPermaLink="true">http://www.example.org/entries/1</guid>
  355. <enclosure url="http://www.example.org/myaudiofile.mp3"
  356. length="12345"
  357. type="audio/mpeg" />
  358. </item>
  359. </channel>
  360. </rss>`
  361. feed, err := Parse(bytes.NewBufferString(data))
  362. if err != nil {
  363. t.Error(err)
  364. }
  365. if len(feed.Entries) != 1 {
  366. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  367. }
  368. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  369. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  370. }
  371. if len(feed.Entries[0].Enclosures) != 1 {
  372. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  373. }
  374. if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
  375. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  376. }
  377. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  378. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  379. }
  380. if feed.Entries[0].Enclosures[0].Size != 12345 {
  381. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  382. }
  383. }
  384. func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
  385. data := `<?xml version="1.0" encoding="utf-8"?>
  386. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  387. <channel>
  388. <title>My Example Feed</title>
  389. <link>http://example.org</link>
  390. <author>some.email@example.org</author>
  391. <item>
  392. <title>Example Item</title>
  393. <link>http://www.example.org/entries/1</link>
  394. <enclosure
  395. url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
  396. length="76192460"
  397. type="audio/mpeg" />
  398. <feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
  399. </item>
  400. </channel>
  401. </rss>`
  402. feed, err := Parse(bytes.NewBufferString(data))
  403. if err != nil {
  404. t.Error(err)
  405. }
  406. if len(feed.Entries) != 1 {
  407. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  408. }
  409. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  410. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  411. }
  412. if len(feed.Entries[0].Enclosures) != 1 {
  413. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  414. }
  415. if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
  416. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  417. }
  418. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  419. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  420. }
  421. if feed.Entries[0].Enclosures[0].Size != 76192460 {
  422. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  423. }
  424. }