فهرست منبع

refactor(xml): improve the performances of `NewXMLDecoder`

- Invert a condition to make the code more readable
- Extract the encoding directly from the slice of bytes instead of converting
  it to string first.
Julien Voisin 1 سال پیش
والد
کامیت
b193bc212a
1فایلهای تغییر یافته به همراه13 افزوده شده و 16 حذف شده
  1. 13 16
      internal/reader/xml/decoder.go

+ 13 - 16
internal/reader/xml/decoder.go

@@ -17,15 +17,15 @@ import (
 func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
 	var decoder *xml.Decoder
 	buffer, _ := io.ReadAll(data)
-	enc := procInst("encoding", string(buffer))
-	if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") {
-		// filter invalid chars later within decoder.CharsetReader
-		data.Seek(0, io.SeekStart)
-		decoder = xml.NewDecoder(data)
-	} else {
+	enc := getEncoding(buffer)
+	if enc == "" || strings.EqualFold(enc, "utf-8") {
 		// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
 		filteredBytes := bytes.Map(filterValidXMLChar, buffer)
 		decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
+	} else {
+		// filter invalid chars later within decoder.CharsetReader
+		data.Seek(0, io.SeekStart)
+		decoder = xml.NewDecoder(data)
 	}
 
 	decoder.Entity = xml.HTMLEntity
@@ -60,27 +60,24 @@ func filterValidXMLChar(r rune) rune {
 	return -1
 }
 
-// This function is copied from encoding/xml package,
-// procInst parses the `param="..."` or `param='...'`
-// value out of the provided string, returning "" if not found.
-func procInst(param, s string) string {
+// This function is copied from encoding/xml's procInst and adapted for []bytes instead of string
+func getEncoding(b []byte) string {
 	// TODO: this parsing is somewhat lame and not exact.
 	// It works for all actual cases, though.
-	param += "="
-	idx := strings.Index(s, param)
+	idx := bytes.Index(b, []byte("encoding="))
 	if idx == -1 {
 		return ""
 	}
-	v := s[idx+len(param):]
-	if v == "" {
+	v := b[idx+len("encoding="):]
+	if len(v) == 0 {
 		return ""
 	}
 	if v[0] != '\'' && v[0] != '"' {
 		return ""
 	}
-	idx = strings.IndexRune(v[1:], rune(v[0]))
+	idx = bytes.IndexRune(v[1:], rune(v[0]))
 	if idx == -1 {
 		return ""
 	}
-	return v[1 : idx+1]
+	return string(v[1 : idx+1])
 }