Browse Source

refactor(internal): add an urllib.DomainWithoutWWW function

jvoisin 9 months ago
parent
commit
b296f21e98

+ 1 - 1
internal/reader/sanitizer/sanitizer.go

@@ -486,7 +486,7 @@ func isBlockedResource(absoluteURL string) bool {
 }
 
 func isValidIframeSource(iframeSourceURL string) bool {
-	iframeSourceDomain := strings.TrimPrefix(urllib.Domain(iframeSourceURL), "www.")
+	iframeSourceDomain := urllib.DomainWithoutWWW(iframeSourceURL)
 
 	if _, ok := iframeAllowList[iframeSourceDomain]; ok {
 		return true

+ 1 - 2
internal/reader/scraper/scraper.go

@@ -93,8 +93,7 @@ func findContentUsingCustomRules(page io.Reader, rules string) (baseURL string,
 }
 
 func getPredefinedScraperRules(websiteURL string) string {
-	urlDomain := urllib.Domain(websiteURL)
-	urlDomain = strings.TrimPrefix(urlDomain, "www.")
+	urlDomain := urllib.DomainWithoutWWW(websiteURL)
 
 	if rules, ok := predefinedRules[urlDomain]; ok {
 		return rules

+ 5 - 0
internal/urllib/url.go

@@ -83,6 +83,11 @@ func Domain(websiteURL string) string {
 	return parsedURL.Host
 }
 
+// DomainWithoutWWW returns only the domain part of the given URL, with the "www." prefix removed if present.
+func DomainWithoutWWW(websiteURL string) string {
+	return strings.TrimPrefix(Domain(websiteURL), "www.")
+}
+
 // JoinBaseURLAndPath returns a URL string with the provided path elements joined together.
 func JoinBaseURLAndPath(baseURL, path string) (string, error) {
 	if baseURL == "" {