| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405 |
- // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
- // SPDX-License-Identifier: Apache-2.0
- package parser // import "miniflux.app/v2/internal/reader/parser"
- import (
- "os"
- "strings"
- "testing"
- )
- func BenchmarkParse(b *testing.B) {
- var testCases = map[string][]string{
- "large_atom.xml": {"https://dustri.org/b", ""},
- "large_rss.xml": {"https://dustri.org/b", ""},
- "small_atom.xml": {"https://github.com/miniflux/v2/commits/main", ""},
- }
- for filename := range testCases {
- data, err := os.ReadFile("./testdata/" + filename)
- if err != nil {
- b.Fatalf(`Unable to read file %q: %v`, filename, err)
- }
- testCases[filename][1] = string(data)
- }
- for b.Loop() {
- for _, v := range testCases {
- ParseFeed(v[0], strings.NewReader(v[1]))
- }
- }
- }
- func FuzzParse(f *testing.F) {
- f.Add("https://z.org", `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Feed</title>
- <link href="http://z.org/"/>
- <link href="/k"/>
- <updated>2003-12-13T18:30:02Z</updated>
- <author><name>John Doe</name></author>
- <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
- <entry>
- <title>a</title>
- <link href="http://example.org/b"/>
- <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>c</summary>
- </entry>
- </feed>`)
- f.Add("https://z.org", `<?xml version="1.0"?>
- <rss version="2.0">
- <channel>
- <title>a</title>
- <link>http://z.org</link>
- <item>
- <title>a</title>
- <link>http://z.org</link>
- <description>d</description>
- <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
- <guid>l</guid>
- </item>
- </channel>
- </rss>`)
- f.Add("https://z.org", `<?xml version="1.0" encoding="utf-8"?>
- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
- <channel>
- <title>a</title>
- <link>http://z.org/</link>
- </channel>
- <item>
- <title>a</title>
- <link>/</link>
- <description>c</description>
- </item>
- </rdf:RDF>`)
- f.Add("http://z.org", `{
- "version": "http://jsonfeed.org/version/1",
- "title": "a",
- "home_page_url": "http://z.org/",
- "feed_url": "http://z.org/a.json",
- "items": [
- {"id": "2","content_text": "a","url": "https://z.org/2"},
- {"id": "1","content_html": "<a","url":"http://z.org/1"}]}`)
- f.Fuzz(func(t *testing.T, url string, data string) {
- ParseFeed(url, strings.NewReader(data))
- })
- }
- func TestParseAtom03Feed(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed version="0.3" xmlns="http://purl.org/atom/ns#">
- <title>dive into mark</title>
- <link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
- <modified>2003-12-13T18:30:02Z</modified>
- <author><name>Mark Pilgrim</name></author>
- <entry>
- <title>Atom 0.3 snapshot</title>
- <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
- <id>tag:diveintomark.org,2003:3.2397</id>
- <issued>2003-12-13T08:29:29-04:00</issued>
- <modified>2003-12-13T18:30:02Z</modified>
- <summary type="text/plain">It's a test</summary>
- <content type="text/html" mode="escaped"><![CDATA[<p>HTML content</p>]]></content>
- </entry>
- </feed>`
- feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.Title != "dive into mark" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
- }
- func TestParseAtom10Feed(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Feed</title>
- <link href="http://example.org/"/>
- <updated>2003-12-13T18:30:02Z</updated>
- <author>
- <name>John Doe</name>
- </author>
- <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
- <entry>
- <title>Atom-Powered Robots Run Amok</title>
- <link href="http://example.org/2003/12/13/atom03"/>
- <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>Some text.</summary>
- </entry>
- </feed>`
- feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.Title != "Example Feed" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
- }
- func TestParseAtomFeedWithRelativeURL(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Feed</title>
- <link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
- <link href="/blog"/>
- <entry>
- <title>Test</title>
- <link href="/blog/article.html"/>
- <link href="/blog/article.html" rel="alternate" type="text/html"/>
- <id>/blog/article.html</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>Some text.</summary>
- </entry>
- </feed>`
- feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))
- if err != nil {
- t.Fatal(err)
- }
- if feed.FeedURL != "https://example.org/blog/atom.xml" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
- if feed.SiteURL != "https://example.org/blog" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
- if feed.Entries[0].URL != "https://example.org/blog/article.html" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
- }
- func TestParseRSS(t *testing.T) {
- data := `<?xml version="1.0"?>
- <rss version="2.0">
- <channel>
- <title>Liftoff News</title>
- <link>http://liftoff.msfc.nasa.gov/</link>
- <item>
- <title>Star City</title>
- <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
- <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
- <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
- <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
- </item>
- </channel>
- </rss>`
- feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.Title != "Liftoff News" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
- }
- func TestParseRSSFeedWithRelativeURL(t *testing.T) {
- data := `<?xml version="1.0"?>
- <rss version="2.0">
- <channel>
- <title>Example Feed</title>
- <link>/blog</link>
- <item>
- <title>Example Entry</title>
- <link>/blog/article.html</link>
- <description>Something</description>
- <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
- <guid>1234</guid>
- </item>
- </channel>
- </rss>`
- feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.Title != "Example Feed" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
- if feed.FeedURL != "http://example.org/rss.xml" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
- if feed.SiteURL != "http://example.org/blog" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
- if feed.Entries[0].URL != "http://example.org/blog/article.html" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
- }
- func TestParseRDF(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns="http://purl.org/rss/1.0/"
- >
- <channel>
- <title>RDF Example</title>
- <link>http://example.org/</link>
- </channel>
- <item>
- <title>Title</title>
- <link>http://example.org/item</link>
- <description>Test</description>
- </item>
- </rdf:RDF>`
- feed, err := ParseFeed("http://example.org/", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.Title != "RDF Example" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
- }
- func TestParseRDFWithRelativeURL(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns="http://purl.org/rss/1.0/"
- >
- <channel>
- <title>RDF Example</title>
- <link>/blog</link>
- </channel>
- <item>
- <title>Title</title>
- <link>/blog/article.html</link>
- <description>Test</description>
- </item>
- </rdf:RDF>`
- feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.FeedURL != "http://example.org/rdf.xml" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
- if feed.SiteURL != "http://example.org/blog" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
- if feed.Entries[0].URL != "http://example.org/blog/article.html" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
- }
- func TestParseJson(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "id": "2",
- "content_text": "This is a second item.",
- "url": "https://example.org/second-item"
- },
- {
- "id": "1",
- "content_html": "<p>Hello, world!</p>",
- "url": "https://example.org/initial-post"
- }
- ]
- }`
- feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.Title != "My Example Feed" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
- }
- func TestParseJsonFeedWithRelativeURL(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "/blog",
- "feed_url": "/blog/feed.json",
- "items": [
- {
- "id": "2",
- "content_text": "This is a second item.",
- "url": "/blog/article.html"
- }
- ]
- }`
- feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))
- if err != nil {
- t.Error(err)
- }
- if feed.Title != "My Example Feed" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
- if feed.FeedURL != "https://example.org/blog/feed.json" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
- if feed.SiteURL != "https://example.org/blog" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
- if feed.Entries[0].URL != "https://example.org/blog/article.html" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
- }
- func TestParseUnknownFeed(t *testing.T) {
- data := `
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <title>Title of document</title>
- </head>
- <body>
- some content
- </body>
- </html>
- `
- _, err := ParseFeed("https://example.org/", strings.NewReader(data))
- if err == nil {
- t.Error("ParseFeed must returns an error")
- }
- }
- func TestParseEmptyFeed(t *testing.T) {
- _, err := ParseFeed("", strings.NewReader(""))
- if err == nil {
- t.Error("ParseFeed must returns an error")
- }
- }
|