Sfoglia il codice sorgente

test(icon): add tests for finding icon URLs in HTML documents

Frédéric Guillot 7 mesi fa
parent
commit
459b1bf1ee
1 ha cambiato i file con 223 aggiunte e 8 eliminazioni
  1. 223 8
      internal/reader/icon/finder_test.go

+ 223 - 8
internal/reader/icon/finder_test.go

@@ -112,22 +112,237 @@ func TestParseInvalidImageDataURLWithWrongPrefix(t *testing.T) {
 	}
 }
 
-func TestParseDocumentWithWhitespaceIconURL(t *testing.T) {
-	html := `<link rel="shortcut icon" href="
-		/static/img/favicon.ico
-	"><link rel='shortcut icon'><link rel='shortcut icon' href="  ">`
+func TestFindIconURLsFromHTMLDocument_MultipleIcons(t *testing.T) {
+	html := `<!DOCTYPE html>
+<html>
+<head>
+	<link rel="icon" href="/favicon.ico">
+	<link rel="shortcut icon" href="/shortcut-favicon.ico">
+	<link rel="icon shortcut" href="/icon-shortcut.ico">
+	<link rel="apple-touch-icon" href="/apple-touch-icon.png">
+</head>
+</html>`
 
 	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	if len(iconURLs) != 1 {
-		t.Fatalf(`Invalid number of icon URLs, got %d`, len(iconURLs))
+	expected := []string{
+		"/favicon.ico",
+		"/shortcut-favicon.ico",
+		"/icon-shortcut.ico",
+		"/apple-touch-icon.png",
 	}
 
-	if iconURLs[0] != "/static/img/favicon.ico" {
-		t.Errorf(`Invalid icon URL, got %q`, iconURLs[0])
+	if len(iconURLs) != len(expected) {
+		t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs))
+	}
+
+	for i, expectedURL := range expected {
+		if iconURLs[i] != expectedURL {
+			t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i])
+		}
+	}
+}
+
+func TestFindIconURLsFromHTMLDocument_CaseInsensitiveRel(t *testing.T) {
+	html := `<!DOCTYPE html>
+<html>
+<head>
+	<link rel="ICON" href="/favicon1.ico">
+	<link rel="Icon" href="/favicon2.ico">
+	<link rel="SHORTCUT ICON" href="/favicon3.ico">
+	<link rel="Shortcut Icon" href="/favicon4.ico">
+	<link rel="ICON SHORTCUT" href="/favicon5.ico">
+	<link rel="Icon Shortcut" href="/favicon6.ico">
+</head>
+</html>`
+
+	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := []string{
+		"/favicon1.ico",
+		"/favicon2.ico",
+		"/favicon3.ico",
+		"/favicon4.ico",
+		"/favicon5.ico",
+		"/favicon6.ico",
+	}
+
+	if len(iconURLs) != len(expected) {
+		t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs))
+	}
+
+	for i, expectedURL := range expected {
+		if iconURLs[i] != expectedURL {
+			t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i])
+		}
+	}
+}
+
+func TestFindIconURLsFromHTMLDocument_NoIcons(t *testing.T) {
+	html := `<!DOCTYPE html>
+<html>
+<head>
+	<title>No Icons Here</title>
+	<link rel="stylesheet" href="/style.css">
+	<link rel="canonical" href="https://example.com">
+</head>
+</html>`
+
+	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(iconURLs) != 0 {
+		t.Fatalf("Expected 0 icon URLs, got %d: %v", len(iconURLs), iconURLs)
+	}
+}
+
+func TestFindIconURLsFromHTMLDocument_EmptyHref(t *testing.T) {
+	html := `<!DOCTYPE html>
+<html>
+<head>
+	<link rel="icon" href="">
+	<link rel="icon" href="   ">
+	<link rel="icon">
+	<link rel="shortcut icon" href="/valid-icon.ico">
+</head>
+</html>`
+
+	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := []string{"/valid-icon.ico"}
+
+	if len(iconURLs) != len(expected) {
+		t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs))
+	}
+
+	if iconURLs[0] != expected[0] {
+		t.Errorf("Expected icon URL to be %q, got %q", expected[0], iconURLs[0])
+	}
+}
+
+func TestFindIconURLsFromHTMLDocument_DataURLs(t *testing.T) {
+	html := `<!DOCTYPE html>
+<html>
+<head>
+	<link rel="icon" href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAGAhGAQ+QAAAABJRU5ErkJggg==">
+	<link rel="shortcut icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'></svg>">
+	<link rel="icon" href="/regular-icon.ico">
+</head>
+</html>`
+
+	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The function processes queries in order: rel="icon", then rel="shortcut icon", etc.
+	// So both rel="icon" links are found first, then the rel="shortcut icon" link
+	expected := []string{
+		"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAGAhGAQ+QAAAABJRU5ErkJggg==",
+		"/regular-icon.ico",
+		"data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'></svg>",
+	}
+
+	if len(iconURLs) != len(expected) {
+		t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs))
+	}
+
+	for i, expectedURL := range expected {
+		if iconURLs[i] != expectedURL {
+			t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i])
+		}
+	}
+}
+
+func TestFindIconURLsFromHTMLDocument_RelativeAndAbsoluteURLs(t *testing.T) {
+	html := `<!DOCTYPE html>
+<html>
+<head>
+	<link rel="icon" href="/absolute-path.ico">
+	<link rel="icon" href="relative-path.ico">
+	<link rel="icon" href="../parent-dir.ico">
+	<link rel="icon" href="https://example.com/external.ico">
+	<link rel="icon" href="//cdn.example.com/protocol-relative.ico">
+</head>
+</html>`
+
+	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := []string{
+		"/absolute-path.ico",
+		"relative-path.ico",
+		"../parent-dir.ico",
+		"https://example.com/external.ico",
+		"//cdn.example.com/protocol-relative.ico",
+	}
+
+	if len(iconURLs) != len(expected) {
+		t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs))
+	}
+
+	for i, expectedURL := range expected {
+		if iconURLs[i] != expectedURL {
+			t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i])
+		}
+	}
+}
+
+func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) {
+	html := `<!DOCTYPE html>
+<html>
+<head>
+	<link rel="icon" href="/valid-before-error.ico">
+	<link rel="icon" href="/unclosed-tag.ico"
+	<link rel="shortcut icon" href="/valid-after-error.ico">
+</head>
+</html>`
+
+	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// goquery should handle malformed HTML gracefully
+	if len(iconURLs) == 0 {
+		t.Fatal("Expected to find some icon URLs even with malformed HTML")
+	}
+
+	// Should at least find the valid ones
+	foundValidIcon := false
+	for _, url := range iconURLs {
+		if url == "/valid-before-error.ico" || url == "/valid-after-error.ico" {
+			foundValidIcon = true
+			break
+		}
+	}
+
+	if !foundValidIcon {
+		t.Errorf("Expected to find at least one valid icon URL, got: %v", iconURLs)
+	}
+}
+
+func TestFindIconURLsFromHTMLDocument_EmptyDocument(t *testing.T) {
+	iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(""), "text/html")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(iconURLs) != 0 {
+		t.Fatalf("Expected 0 icon URLs from empty document, got %d", len(iconURLs))
 	}
 }