parser_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rdf // import "miniflux.app/reader/rdf"
  5. import (
  6. "bytes"
  7. "strings"
  8. "testing"
  9. "time"
  10. )
  11. func TestParseRDFSample(t *testing.T) {
  12. data := `
  13. <?xml version="1.0"?>
  14. <rdf:RDF
  15. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  16. xmlns="http://purl.org/rss/1.0/"
  17. >
  18. <channel rdf:about="http://www.xml.com/xml/news.rss">
  19. <title>XML.com</title>
  20. <link>http://xml.com/pub</link>
  21. <description>
  22. XML.com features a rich mix of information and services
  23. for the XML community.
  24. </description>
  25. <image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" />
  26. <items>
  27. <rdf:Seq>
  28. <rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" />
  29. <rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" />
  30. </rdf:Seq>
  31. </items>
  32. <textinput rdf:resource="http://search.xml.com" />
  33. </channel>
  34. <image rdf:about="http://xml.com/universal/images/xml_tiny.gif">
  35. <title>XML.com</title>
  36. <link>http://www.xml.com</link>
  37. <url>http://xml.com/universal/images/xml_tiny.gif</url>
  38. </image>
  39. <item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html">
  40. <title>Processing Inclusions with XSLT</title>
  41. <link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link>
  42. <description>
  43. Processing document inclusions with general XML tools can be
  44. problematic. This article proposes a way of preserving inclusion
  45. information through SAX-based processing.
  46. </description>
  47. </item>
  48. <item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html">
  49. <title>Putting RDF to Work</title>
  50. <link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link>
  51. <description>
  52. Tool and API support for the Resource Description Framework
  53. is slowly coming of age. Edd Dumbill takes a look at RDFDB,
  54. one of the most exciting new RDF toolkits.
  55. </description>
  56. </item>
  57. <textinput rdf:about="http://search.xml.com">
  58. <title>Search XML.com</title>
  59. <description>Search XML.com's XML collection</description>
  60. <name>s</name>
  61. <link>http://search.xml.com</link>
  62. </textinput>
  63. </rdf:RDF>`
  64. feed, err := Parse(bytes.NewBufferString(data))
  65. if err != nil {
  66. t.Error(err)
  67. }
  68. if feed.Title != "XML.com" {
  69. t.Errorf("Incorrect title, got: %s", feed.Title)
  70. }
  71. if feed.FeedURL != "" {
  72. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  73. }
  74. if feed.SiteURL != "http://xml.com/pub" {
  75. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  76. }
  77. if len(feed.Entries) != 2 {
  78. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  79. }
  80. if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" {
  81. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  82. }
  83. if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" {
  84. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  85. }
  86. if feed.Entries[1].Title != "Putting RDF to Work" {
  87. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  88. }
  89. if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") {
  90. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  91. }
  92. if feed.Entries[1].Date.Year() != time.Now().Year() {
  93. t.Errorf("Entry date should not be empty")
  94. }
  95. }
  96. func TestParseRDFSampleWithDublinCore(t *testing.T) {
  97. data := `<?xml version="1.0" encoding="utf-8"?>
  98. <rdf:RDF
  99. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  100. xmlns:dc="http://purl.org/dc/elements/1.1/"
  101. xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
  102. xmlns:co="http://purl.org/rss/1.0/modules/company/"
  103. xmlns:ti="http://purl.org/rss/1.0/modules/textinput/"
  104. xmlns="http://purl.org/rss/1.0/"
  105. >
  106. <channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
  107. <title>Meerkat</title>
  108. <link>http://meerkat.oreillynet.com</link>
  109. <description>Meerkat: An Open Wire Service</description>
  110. <dc:publisher>The O'Reilly Network</dc:publisher>
  111. <dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
  112. <dc:rights>Copyright &#169; 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
  113. <dc:date>2000-01-01T12:00+00:00</dc:date>
  114. <sy:updatePeriod>hourly</sy:updatePeriod>
  115. <sy:updateFrequency>2</sy:updateFrequency>
  116. <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
  117. <image rdf:resource="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg" />
  118. <items>
  119. <rdf:Seq>
  120. <rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
  121. </rdf:Seq>
  122. </items>
  123. <textinput rdf:resource="http://meerkat.oreillynet.com" />
  124. </channel>
  125. <image rdf:about="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg">
  126. <title>Meerkat Powered!</title>
  127. <url>http://meerkat.oreillynet.com/icons/meerkat-powered.jpg</url>
  128. <link>http://meerkat.oreillynet.com</link>
  129. </image>
  130. <item rdf:about="http://c.moreover.com/click/here.pl?r123">
  131. <title>XML: A Disruptive Technology</title>
  132. <link>http://c.moreover.com/click/here.pl?r123</link>
  133. <dc:description>
  134. XML is placing increasingly heavy loads on the existing technical
  135. infrastructure of the Internet.
  136. </dc:description>
  137. <dc:publisher>The O'Reilly Network</dc:publisher>
  138. <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
  139. <dc:rights>Copyright &#169; 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
  140. <dc:subject>XML</dc:subject>
  141. <co:name>XML.com</co:name>
  142. <co:market>NASDAQ</co:market>
  143. <co:symbol>XML</co:symbol>
  144. </item>
  145. <textinput rdf:about="http://meerkat.oreillynet.com">
  146. <title>Search Meerkat</title>
  147. <description>Search Meerkat's RSS Database...</description>
  148. <name>s</name>
  149. <link>http://meerkat.oreillynet.com/</link>
  150. <ti:function>search</ti:function>
  151. <ti:inputType>regex</ti:inputType>
  152. </textinput>
  153. </rdf:RDF>`
  154. feed, err := Parse(bytes.NewBufferString(data))
  155. if err != nil {
  156. t.Error(err)
  157. }
  158. if feed.Title != "Meerkat" {
  159. t.Errorf("Incorrect title, got: %s", feed.Title)
  160. }
  161. if feed.FeedURL != "" {
  162. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  163. }
  164. if feed.SiteURL != "http://meerkat.oreillynet.com" {
  165. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  166. }
  167. if len(feed.Entries) != 1 {
  168. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  169. }
  170. if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" {
  171. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  172. }
  173. if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" {
  174. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  175. }
  176. if feed.Entries[0].Title != "XML: A Disruptive Technology" {
  177. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  178. }
  179. if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") {
  180. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  181. }
  182. if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" {
  183. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  184. }
  185. }
  186. func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
  187. data := `<?xml version="1.0" encoding="utf-8"?>
  188. <rdf:RDF
  189. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  190. xmlns:dc="http://purl.org/dc/elements/1.1/"
  191. xmlns="http://purl.org/rss/1.0/"
  192. >
  193. <channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
  194. <title>Meerkat</title>
  195. <link>http://meerkat.oreillynet.com</link>
  196. <dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
  197. </channel>
  198. <item rdf:about="http://c.moreover.com/click/here.pl?r123">
  199. <title>XML: A Disruptive Technology</title>
  200. <link>http://c.moreover.com/click/here.pl?r123</link>
  201. <dc:description>
  202. XML is placing increasingly heavy loads on the existing technical
  203. infrastructure of the Internet.
  204. </dc:description>
  205. </item>
  206. </rdf:RDF>`
  207. feed, err := Parse(bytes.NewBufferString(data))
  208. if err != nil {
  209. t.Error(err)
  210. }
  211. if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
  212. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  213. }
  214. }
  215. func TestParseItemRelativeURL(t *testing.T) {
  216. data := `<?xml version="1.0" encoding="utf-8"?>
  217. <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
  218. <channel>
  219. <title>Example</title>
  220. <link>http://example.org</link>
  221. </channel>
  222. <item>
  223. <title>Title</title>
  224. <description>Test</description>
  225. <link>something.html</link>
  226. </item>
  227. </rdf:RDF>`
  228. feed, err := Parse(bytes.NewBufferString(data))
  229. if err != nil {
  230. t.Error(err)
  231. }
  232. if feed.Entries[0].URL != "http://example.org/something.html" {
  233. t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
  234. }
  235. }
  236. func TestParseItemWithoutLink(t *testing.T) {
  237. data := `<?xml version="1.0" encoding="utf-8"?>
  238. <rdf:RDF
  239. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  240. xmlns="http://purl.org/rss/1.0/"
  241. >
  242. <channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
  243. <title>Meerkat</title>
  244. <link>http://meerkat.oreillynet.com</link>
  245. </channel>
  246. <item rdf:about="http://c.moreover.com/click/here.pl?r123">
  247. <title>Title</title>
  248. <description>Test</description>
  249. </item>
  250. </rdf:RDF>`
  251. feed, err := Parse(bytes.NewBufferString(data))
  252. if err != nil {
  253. t.Error(err)
  254. }
  255. if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" {
  256. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  257. }
  258. if feed.Entries[0].URL != "http://meerkat.oreillynet.com" {
  259. t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
  260. }
  261. }
  262. func TestParseItemWithDublicCoreDate(t *testing.T) {
  263. data := `<?xml version="1.0" encoding="utf-8"?>
  264. <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
  265. <channel>
  266. <title>Example</title>
  267. <link>http://example.org</link>
  268. </channel>
  269. <item>
  270. <title>Title</title>
  271. <description>Test</description>
  272. <link>http://example.org/test.html</link>
  273. <dc:creator>Tester</dc:creator>
  274. <dc:date>2018-04-10T05:00:00+00:00</dc:date>
  275. </item>
  276. </rdf:RDF>`
  277. feed, err := Parse(bytes.NewBufferString(data))
  278. if err != nil {
  279. t.Error(err)
  280. }
  281. expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC)
  282. if !feed.Entries[0].Date.Equal(expectedDate) {
  283. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  284. }
  285. }
  286. func TestParseItemWithoutDate(t *testing.T) {
  287. data := `<?xml version="1.0" encoding="utf-8"?>
  288. <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
  289. <channel>
  290. <title>Example</title>
  291. <link>http://example.org</link>
  292. </channel>
  293. <item>
  294. <title>Title</title>
  295. <description>Test</description>
  296. <link>http://example.org/test.html</link>
  297. </item>
  298. </rdf:RDF>`
  299. feed, err := Parse(bytes.NewBufferString(data))
  300. if err != nil {
  301. t.Error(err)
  302. }
  303. expectedDate := time.Now().In(time.Local)
  304. diff := expectedDate.Sub(feed.Entries[0].Date)
  305. if diff > time.Second {
  306. t.Errorf("Incorrect entry date, got: %v", diff)
  307. }
  308. }
  309. func TestParseInvalidXml(t *testing.T) {
  310. data := `garbage`
  311. _, err := Parse(bytes.NewBufferString(data))
  312. if err == nil {
  313. t.Error("Parse should returns an error")
  314. }
  315. }