|
|
@@ -1880,152 +1880,6 @@ func TestTransformMisusedDivsIntoParagraphs(t *testing.T) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func TestTransformMisusedDivsIntoParagraphsRegexPattern(t *testing.T) {
|
|
|
- // Test the regex pattern directly to ensure it matches the expected elements
|
|
|
- testCases := []struct {
|
|
|
- name string
|
|
|
- html string
|
|
|
- shouldMatch bool
|
|
|
- description string
|
|
|
- }{
|
|
|
- {
|
|
|
- name: "anchor tag",
|
|
|
- html: `<a href="#">link</a>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match anchor tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "blockquote tag",
|
|
|
- html: `<blockquote>quote</blockquote>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match blockquote tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "dl tag",
|
|
|
- html: `<dl><dt>term</dt></dl>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match dl tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "div tag",
|
|
|
- html: `<div>content</div>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match div tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "img tag",
|
|
|
- html: `<img src="test.jpg">`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match img tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "ol tag",
|
|
|
- html: `<ol><li>item</li></ol>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match ol tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "p tag",
|
|
|
- html: `<p>paragraph</p>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match p tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "pre tag",
|
|
|
- html: `<pre>code</pre>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match pre tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "table tag",
|
|
|
- html: `<table><tr></tr></table>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match table tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "ul tag",
|
|
|
- html: `<ul><li>item</li></ul>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match ul tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "self-closing anchor",
|
|
|
- html: `<a/>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match self-closing anchor tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "tag with attributes",
|
|
|
- html: `<a href="#" class="link">text</a>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match tags with attributes",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "uppercase tags",
|
|
|
- html: `<A href="#">link</A>`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should be case insensitive",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "mixed case tags",
|
|
|
- html: `<Img src="test.jpg">`,
|
|
|
- shouldMatch: true,
|
|
|
- description: "should match mixed case tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "span tag",
|
|
|
- html: `<span>text</span>`,
|
|
|
- shouldMatch: false,
|
|
|
- description: "should NOT match span tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "em tag",
|
|
|
- html: `<em>emphasis</em>`,
|
|
|
- shouldMatch: false,
|
|
|
- description: "should NOT match em tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "strong tag",
|
|
|
- html: `<strong>bold</strong>`,
|
|
|
- shouldMatch: false,
|
|
|
- description: "should NOT match strong tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "i tag",
|
|
|
- html: `<i>italic</i>`,
|
|
|
- shouldMatch: false,
|
|
|
- description: "should NOT match i tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "b tag",
|
|
|
- html: `<b>bold</b>`,
|
|
|
- shouldMatch: false,
|
|
|
- description: "should NOT match b tags",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "plain text",
|
|
|
- html: `just plain text`,
|
|
|
- shouldMatch: false,
|
|
|
- description: "should NOT match plain text",
|
|
|
- },
|
|
|
- {
|
|
|
- name: "empty string",
|
|
|
- html: ``,
|
|
|
- shouldMatch: false,
|
|
|
- description: "should NOT match empty string",
|
|
|
- },
|
|
|
- }
|
|
|
-
|
|
|
- for _, tc := range testCases {
|
|
|
- t.Run(tc.name, func(t *testing.T) {
|
|
|
- result := divToPElementsRegexp.MatchString(tc.html)
|
|
|
- if result != tc.shouldMatch {
|
|
|
- t.Errorf("%s\nHTML: %s\nExpected match: %v, Got: %v", tc.description, tc.html, tc.shouldMatch, result)
|
|
|
- }
|
|
|
- })
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
func TestTransformMisusedDivsIntoParagraphsEdgeCases(t *testing.T) {
|
|
|
t.Run("document with no divs", func(t *testing.T) {
|
|
|
html := `<html><body><p>No divs here</p><span>Just other elements</span></body></html>`
|