Просмотр исходного кода

refactor(sanitizer): split common stripping code into iterator function

gudvinr 5 дней назад
Родитель
Сommit
863d6039ad
1 измененных файлов с 29 добавлено и 12 удалено
  1. 29 12
      internal/reader/sanitizer/strip_tags.go

+ 29 - 12
internal/reader/sanitizer/strip_tags.go

@@ -14,22 +14,39 @@ import (
 // StripTags removes all HTML/XML tags from the input string.
 // This function must *only* be used for cosmetic purposes, not to prevent code injections like XSS.
 func StripTags(input string) string {
-	tokenizer := html.NewTokenizer(strings.NewReader(input))
-	var buffer strings.Builder
+	dst := &strings.Builder{}
+	src := strings.NewReader(input)
 
-	for {
-		if tokenizer.Next() == html.ErrorToken {
-			err := tokenizer.Err()
-			if errors.Is(err, io.EOF) {
-				return buffer.String()
-			}
+	err := stripIter(src, func(text string) bool {
+		dst.WriteString(text)
+		return true
+	})
+	if err != nil {
+		return ""
+	}
 
-			return ""
-		}
+	return dst.String()
+}
 
+// stripIter iterates over the input [io.Reader] and calls the yield function for each [html.TextToken].
+// Other kinds of [html.TokenType] are skipped.
+func stripIter(src io.Reader, yield func(string) bool) error {
+	tokenizer := html.NewTokenizer(src)
+
+	for tokenizer.Next() != html.ErrorToken {
 		token := tokenizer.Token()
-		if token.Type == html.TextToken {
-			buffer.WriteString(token.Data)
+		if token.Type != html.TextToken {
+			continue
 		}
+
+		if !yield(token.Data) {
+			break
+		}
+	}
+
+	if err := tokenizer.Err(); !errors.Is(err, io.EOF) {
+		return err
 	}
+
+	return nil
 }