před 6 měsíci · 7ada5d54be
--- a/internal/reader/icon/finder.go
+++ b/internal/reader/icon/finder.go
@@ -64,13 +64,17 @@ func (f *iconFinder) findIcon() (*model.Icon, error) {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if icon, err := f.fetchIconsFromHTMLDocument(); err != nil {
			
 
				-		slog.Debug("Unable to fetch icons from HTML document",
			
 
				-			slog.String("website_url", f.websiteURL),
			
 
				-			slog.Any("error", err),
			
 
				-		)
			
 
				-	} else if icon != nil {
			
 
				-		return icon, nil
			
 
				+	// Try the website URL first, then fall back to the root URL if no icon is found.
			
 
				+	// The website URL may include a subdirectory (e.g., https://example.org/subfolder/), and icons can be referenced relative to that path.
			
 
				+	for _, documentURL := range []string{f.websiteURL, urllib.RootURL(f.websiteURL)} {
			
 
				+		if icon, err := f.fetchIconsFromHTMLDocument(documentURL); err != nil {
			
 
				+			slog.Debug("Unable to fetch icons from HTML document",
			
 
				+				slog.String("document_url", documentURL),
			
 
				+				slog.Any("error", err),
			
 
				+			)
			
 
				+		} else if icon != nil {
			
 
				+			return icon, nil
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return f.fetchDefaultIcon()
			
@@ -94,14 +98,12 @@ func (f *iconFinder) fetchDefaultIcon() (*model.Icon, error) {
 
				 	return icon, nil
			
 
				 }
			
 
				 
			
 
				-func (f *iconFinder) fetchIconsFromHTMLDocument() (*model.Icon, error) {
			
 
				+func (f *iconFinder) fetchIconsFromHTMLDocument(documentURL string) (*model.Icon, error) {
			
 
				 	slog.Debug("Searching icons from HTML document",
			
 
				-		slog.String("website_url", f.websiteURL),
			
 
				+		slog.String("document_url", documentURL),
			
 
				 	)
			
 
				 
			
 
				-	rootURL := urllib.RootURL(f.websiteURL)
			
 
				-
			
 
				-	responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(rootURL))
			
 
				+	responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(documentURL))
			
 
				 	defer responseHandler.Close()
			
 
				 
			
 
				 	if localizedError := responseHandler.LocalizedError(); localizedError != nil {
			
@@ -109,6 +111,7 @@ func (f *iconFinder) fetchIconsFromHTMLDocument() (*model.Icon, error) {
 
				 	}
			
 
				 
			
 
				 	iconURLs, err := findIconURLsFromHTMLDocument(
			
 
				+		documentURL,
			
 
				 		responseHandler.Body(config.Opts.HTTPClientMaxBodySize()),
			
 
				 		responseHandler.ContentType(),
			
 
				 	)
			
@@ -117,32 +120,27 @@ func (f *iconFinder) fetchIconsFromHTMLDocument() (*model.Icon, error) {
 
				 	}
			
 
				 
			
 
				 	slog.Debug("Searched icon from HTML document",
			
 
				-		slog.String("website_url", f.websiteURL),
			
 
				+		slog.String("document_url", documentURL),
			
 
				 		slog.String("icon_urls", strings.Join(iconURLs, ",")),
			
 
				 	)
			
 
				 
			
 
				 	for _, iconURL := range iconURLs {
			
 
				 		if strings.HasPrefix(iconURL, "data:") {
			
 
				 			slog.Debug("Found icon with data URL",
			
 
				-				slog.String("website_url", f.websiteURL),
			
 
				+				slog.String("document_url", documentURL),
			
 
				 			)
			
 
				 			return parseImageDataURL(iconURL)
			
 
				 		}
			
 
				 
			
 
				-		iconURL, err = urllib.AbsoluteURL(f.websiteURL, iconURL)
			
 
				-		if err != nil {
			
 
				-			return nil, fmt.Errorf(`icon: unable to convert icon URL to absolute URL: %w`, err)
			
 
				-		}
			
 
				-
			
 
				 		if icon, err := f.downloadIcon(iconURL); err != nil {
			
 
				 			slog.Debug("Unable to download icon from HTML document",
			
 
				-				slog.String("website_url", f.websiteURL),
			
 
				+				slog.String("document_url", documentURL),
			
 
				 				slog.String("icon_url", iconURL),
			
 
				 				slog.Any("error", err),
			
 
				 			)
			
 
				 		} else if icon != nil {
			
 
				 			slog.Debug("Downloaded icon from HTML document",
			
 
				-				slog.String("website_url", f.websiteURL),
			
 
				+				slog.String("document_url", documentURL),
			
 
				 				slog.String("icon_url", iconURL),
			
 
				 			)
			
 
				 			return icon, nil
			
@@ -195,7 +193,7 @@ func resizeIcon(icon *model.Icon) *model.Icon {
 
				 	}
			
 
				 
			
 
				 	if !slices.Contains([]string{"image/jpeg", "image/png", "image/gif", "image/webp"}, icon.MimeType) {
			
 
				-		slog.Info("Icon resize skipped: unsupported MIME type", slog.String("mime_type", icon.MimeType))
			
 
				+		slog.Debug("Icon resize skipped: unsupported MIME type", slog.String("mime_type", icon.MimeType))
			
 
				 		return icon
			
 
				 	}
			
 
				 
			
@@ -244,7 +242,7 @@ func resizeIcon(icon *model.Icon) *model.Icon {
 
				 	return icon
			
 
				 }
			
 
				 
			
 
				-func findIconURLsFromHTMLDocument(body io.Reader, contentType string) ([]string, error) {
			
 
				+func findIconURLsFromHTMLDocument(documentURL string, body io.Reader, contentType string) ([]string, error) {
			
 
				 	htmlDocumentReader, err := encoding.NewCharsetReader(body, contentType)
			
 
				 	if err != nil {
			
 
				 		return nil, fmt.Errorf("icon: unable to create charset reader: %w", err)
			
@@ -268,11 +266,20 @@ func findIconURLsFromHTMLDocument(body io.Reader, contentType string) ([]string,
 
				 
			
 
				 		for _, s := range doc.Find(query).EachIter() {
			
 
				 			href, _ := s.Attr("href")
			
 
				-			if iconURL := strings.TrimSpace(href); iconURL != "" {
			
 
				-				iconURLs = append(iconURLs, iconURL)
			
 
				+			href = strings.TrimSpace(href)
			
 
				+			if href == "" {
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if absoluteIconURL, err := urllib.AbsoluteURL(documentURL, href); err != nil {
			
 
				+				slog.Warn("Unable to convert icon URL to absolute URL", slog.Any("error", err), slog.String("icon_href", href))
			
 
				+			} else {
			
 
				+				iconURLs = append(iconURLs, absoluteIconURL)
			
 
				 				slog.Debug("Found icon URL in HTML document",
			
 
				 					slog.String("query", query),
			
 
				-					slog.String("icon_url", iconURL))
			
 
				+					slog.String("icon_href", href),
			
 
				+					slog.String("absolute_icon_url", absoluteIconURL),
			
 
				+				)
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
--- a/internal/reader/icon/finder_test.go
+++ b/internal/reader/icon/finder_test.go
@@ -123,16 +123,16 @@ func TestFindIconURLsFromHTMLDocument_MultipleIcons(t *testing.T) {
 
				 </head>
			
 
				 </html>`
			
 
				 
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
 
				 
			
 
				 	expected := []string{
			
 
				-		"/favicon.ico",
			
 
				-		"/shortcut-favicon.ico",
			
 
				-		"/icon-shortcut.ico",
			
 
				-		"/apple-touch-icon.png",
			
 
				+		"https://example.org/favicon.ico",
			
 
				+		"https://example.org/shortcut-favicon.ico",
			
 
				+		"https://example.org/icon-shortcut.ico",
			
 
				+		"https://example.org/apple-touch-icon.png",
			
 
				 	}
			
 
				 
			
 
				 	if len(iconURLs) != len(expected) {
			
@@ -155,22 +155,22 @@ func TestFindIconURLsFromHTMLDocument_CaseInsensitiveRel(t *testing.T) {
 
				 	<link rel="SHORTCUT ICON" href="/favicon3.ico">
			
 
				 	<link rel="Shortcut Icon" href="/favicon4.ico">
			
 
				 	<link rel="ICON SHORTCUT" href="/favicon5.ico">
			
 
				-	<link rel="Icon Shortcut" href="/favicon6.ico">
			
 
				+	<link rel="Icon Shortcut" href="favicon6.ico">
			
 
				 </head>
			
 
				 </html>`
			
 
				 
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org/folder/", strings.NewReader(html), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
 
				 
			
 
				 	expected := []string{
			
 
				-		"/favicon1.ico",
			
 
				-		"/favicon2.ico",
			
 
				-		"/favicon3.ico",
			
 
				-		"/favicon4.ico",
			
 
				-		"/favicon5.ico",
			
 
				-		"/favicon6.ico",
			
 
				+		"https://example.org/favicon1.ico",
			
 
				+		"https://example.org/favicon2.ico",
			
 
				+		"https://example.org/favicon3.ico",
			
 
				+		"https://example.org/favicon4.ico",
			
 
				+		"https://example.org/favicon5.ico",
			
 
				+		"https://example.org/folder/favicon6.ico",
			
 
				 	}
			
 
				 
			
 
				 	if len(iconURLs) != len(expected) {
			
@@ -194,7 +194,7 @@ func TestFindIconURLsFromHTMLDocument_NoIcons(t *testing.T) {
 
				 </head>
			
 
				 </html>`
			
 
				 
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
@@ -215,12 +215,12 @@ func TestFindIconURLsFromHTMLDocument_EmptyHref(t *testing.T) {
 
				 </head>
			
 
				 </html>`
			
 
				 
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
 
				 
			
 
				-	expected := []string{"/valid-icon.ico"}
			
 
				+	expected := []string{"https://example.org/valid-icon.ico"}
			
 
				 
			
 
				 	if len(iconURLs) != len(expected) {
			
 
				 		t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs))
			
@@ -241,7 +241,7 @@ func TestFindIconURLsFromHTMLDocument_DataURLs(t *testing.T) {
 
				 </head>
			
 
				 </html>`
			
 
				 
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org/folder", strings.NewReader(html), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
@@ -250,7 +250,7 @@ func TestFindIconURLsFromHTMLDocument_DataURLs(t *testing.T) {
 
				 	// So both rel="icon" links are found first, then the rel="shortcut icon" link
			
 
				 	expected := []string{
			
 
				 		"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAGAhGAQ+QAAAABJRU5ErkJggg==",
			
 
				-		"/regular-icon.ico",
			
 
				+		"https://example.org/regular-icon.ico",
			
 
				 		"data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'></svg>",
			
 
				 	}
			
 
				 
			
@@ -277,17 +277,17 @@ func TestFindIconURLsFromHTMLDocument_RelativeAndAbsoluteURLs(t *testing.T) {
 
				 </head>
			
 
				 </html>`
			
 
				 
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org/folder/", strings.NewReader(html), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
 
				 
			
 
				 	expected := []string{
			
 
				-		"/absolute-path.ico",
			
 
				-		"relative-path.ico",
			
 
				-		"../parent-dir.ico",
			
 
				+		"https://example.org/absolute-path.ico",
			
 
				+		"https://example.org/folder/relative-path.ico",
			
 
				+		"https://example.org/parent-dir.ico",
			
 
				 		"https://example.com/external.ico",
			
 
				-		"//cdn.example.com/protocol-relative.ico",
			
 
				+		"https://cdn.example.com/protocol-relative.ico",
			
 
				 	}
			
 
				 
			
 
				 	if len(iconURLs) != len(expected) {
			
@@ -311,7 +311,7 @@ func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) {
 
				 </head>
			
 
				 </html>`
			
 
				 
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
@@ -324,7 +324,7 @@ func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) {
 
				 	// Should at least find the valid ones
			
 
				 	foundValidIcon := false
			
 
				 	for _, url := range iconURLs {
			
 
				-		if url == "/valid-before-error.ico" || url == "/valid-after-error.ico" {
			
 
				+		if url == "https://example.org/valid-before-error.ico" || url == "https://example.org/valid-after-error.ico" {
			
 
				 			foundValidIcon = true
			
 
				 			break
			
 
				 		}
			
@@ -336,7 +336,7 @@ func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) {
 
				 }
			
 
				 
			
 
				 func TestFindIconURLsFromHTMLDocument_EmptyDocument(t *testing.T) {
			
 
				-	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(""), "text/html")
			
 
				+	iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(""), "text/html")
			
 
				 	if err != nil {
			
 
				 		t.Fatal(err)
			
 
				 	}
			
--- a/internal/urllib/url.go
+++ b/internal/urllib/url.go
@@ -18,7 +18,7 @@ func IsAbsoluteURL(link string) bool {
 
				 	return u.IsAbs()
			
 
				 }
			
 
				 
			
 
				-// GetAbsoluteURL return the absolute form of `input` is possible, as well as its parser form.
			
 
				+// GetAbsoluteURL returns the absolute form of `input` if possible, as well as its parsed form.
			
 
				 func GetAbsoluteURL(input string) (string, *url.URL, error) {
			
 
				 	if strings.HasPrefix(input, "//") {
			
 
				 		return "https:" + input, nil, nil