Forráskód Böngészése

Add more scraper rules

Frédéric Guillot 8 éve
szülő
commit
48aa0d07ef
1 módosított fájl, 30 hozzáadás és 16 törlés
  1. 30 16
      reader/scraper/rules.go

+ 30 - 16
reader/scraper/rules.go

@@ -7,20 +7,34 @@ package scraper
 // List of predefined scraper rules (alphabetically sorted)
 // domain => CSS selectors
 var predefinedRules = map[string]string{
-	"cbc.ca":            ".story-content",
-	"github.com":        "article.entry-content",
-	"igen.fr":           "section.corps",
-	"ing.dk":            "section.body",
-	"lapresse.ca":       ".amorce, .entry",
-	"lemonde.fr":        "div#articleBody",
-	"lesjoiesducode.fr": ".blog-post-content img",
-	"linux.com":         "div.content, div[property]",
-	"medium.com":        ".section-content",
-	"opensource.com":    "div[property]",
-	"osnews.com":        "div.newscontent1",
-	"phoronix.com":      "div.content",
-	"techcrunch.com":    "div.article-entry",
-	"theregister.co.uk": "#body",
-	"version2.dk":       "section.body",
-	"wired.com":         "main figure, article",
+	"cbc.ca":              ".story-content",
+	"developpez.com":      "div[itemprop=articleBody]",
+	"francetvinfo.fr":     ".text",
+	"github.com":          "article.entry-content",
+	"heise.de":            "div.article-content",
+	"igen.fr":             "section.corps",
+	"ing.dk":              "section.body",
+	"lapresse.ca":         ".amorce, .entry",
+	"lemonde.fr":          "div#articleBody",
+	"lepoint.fr":          ".art-text",
+	"lesjoiesducode.fr":   ".blog-post-content img",
+	"lesnumeriques.com":   ".text",
+	"linux.com":           "div.content, div[property]",
+	"medium.com":          ".section-content",
+	"mac4ever.com":        "div[itemprop=articleBody]",
+	"monwindows.com":      ".blog-post-body",
+	"npr.org":             "#storytext",
+	"oneindia.com":        ".io-article-body",
+	"opensource.com":      "div[property]",
+	"osnews.com":          "div.newscontent1",
+	"phoronix.com":        "div.content",
+	"pseudo-sciences.org": "#art_main",
+	"slate.fr":            ".field-items",
+	"techcrunch.com":      "div.article-entry",
+	"theregister.co.uk":   "#body",
+	"universfreebox.com":  "#corps_corps",
+	"version2.dk":         "section.body",
+	"wired.com":           "main figure, article",
+	"zeit.de":             ".summary, .article-body",
+	"zdnet.com":           "div.storyBody",
 }