|
|
@@ -14,22 +14,39 @@ import (
|
|
|
// StripTags removes all HTML/XML tags from the input string.
|
|
|
// This function must *only* be used for cosmetic purposes, not to prevent code injections like XSS.
|
|
|
func StripTags(input string) string {
|
|
|
- tokenizer := html.NewTokenizer(strings.NewReader(input))
|
|
|
- var buffer strings.Builder
|
|
|
+ dst := &strings.Builder{}
|
|
|
+ src := strings.NewReader(input)
|
|
|
|
|
|
- for {
|
|
|
- if tokenizer.Next() == html.ErrorToken {
|
|
|
- err := tokenizer.Err()
|
|
|
- if errors.Is(err, io.EOF) {
|
|
|
- return buffer.String()
|
|
|
- }
|
|
|
+ err := stripIter(src, func(text string) bool {
|
|
|
+ dst.WriteString(text)
|
|
|
+ return true
|
|
|
+ })
|
|
|
+ if err != nil {
|
|
|
+ return ""
|
|
|
+ }
|
|
|
|
|
|
- return ""
|
|
|
- }
|
|
|
+ return dst.String()
|
|
|
+}
|
|
|
|
|
|
+// stripIter iterates over the input [io.Reader] and calls the yield function for each [html.TextToken].
|
|
|
+// Other kinds of [html.TokenType] are skipped.
|
|
|
+func stripIter(src io.Reader, yield func(string) bool) error {
|
|
|
+ tokenizer := html.NewTokenizer(src)
|
|
|
+
|
|
|
+ for tokenizer.Next() != html.ErrorToken {
|
|
|
token := tokenizer.Token()
|
|
|
- if token.Type == html.TextToken {
|
|
|
- buffer.WriteString(token.Data)
|
|
|
+ if token.Type != html.TextToken {
|
|
|
+ continue
|
|
|
}
|
|
|
+
|
|
|
+ if !yield(token.Data) {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := tokenizer.Err(); !errors.Is(err, io.EOF) {
|
|
|
+ return err
|
|
|
}
|
|
|
+
|
|
|
+ return nil
|
|
|
}
|