Browse Source

feat(subscription): ignore Wordpress API endpoint when discovering JSON feed

Frédéric Guillot 2 months ago
parent
commit
0022d0646b

+ 7 - 3
internal/reader/subscription/finder.go

@@ -142,12 +142,16 @@ func (f *subscriptionFinder) findSubscriptionsFromWebPage(websiteURL, contentTyp
 
 	var subscriptions Subscriptions
 	subscriptionURLs := make(map[string]bool)
-	for query, kind := range queries {
-		doc.Find(query).Each(func(i int, s *goquery.Selection) {
+	for feedQuerySelector, feedFormat := range queries {
+		doc.Find(feedQuerySelector).Each(func(i int, s *goquery.Selection) {
 			subscription := new(subscription)
-			subscription.Type = kind
+			subscription.Type = feedFormat
 
 			if feedURL, exists := s.Attr("href"); exists && feedURL != "" {
+				// Ignore JSON feed URLs that contain "wp-json" to avoid confusion with WordPress REST API endpoints.
+				if feedFormat == parser.FormatJSON && strings.Contains(feedURL, "wp-json") {
+					return
+				}
 				subscription.URL, err = urllib.ResolveToAbsoluteURL(websiteURL, feedURL)
 				if err != nil {
 					return

+ 22 - 0
internal/reader/subscription/finder_test.go

@@ -240,6 +240,28 @@ func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
 	}
 }
 
+func TestParseWebPageWithJSONFeedWpJsonIgnored(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link rel="https://api.w.org/" href="https://example.org/wp-json/" />
+			<link rel="alternate" title="JSON" type="application/json" href="https://example.org/wp-json/wp/v2/posts/123456" />
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := NewSubscriptionFinder(nil).findSubscriptionsFromWebPage("http://example.org/", "text/html", []byte(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 0 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+}
+
 func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
 	htmlPage := `
 	<!doctype html>