|
|
@@ -0,0 +1,50 @@
|
|
|
+// Copyright 2019 Frédéric Guillot. All rights reserved.
|
|
|
+// Use of this source code is governed by the Apache 2.0
|
|
|
+// license that can be found in the LICENSE file.
|
|
|
+
|
|
|
+package xml // import "miniflux.app/reader/xml"
|
|
|
+
|
|
|
+import (
|
|
|
+ "bytes"
|
|
|
+ "encoding/xml"
|
|
|
+ "fmt"
|
|
|
+ "io"
|
|
|
+ "io/ioutil"
|
|
|
+
|
|
|
+ "miniflux.app/reader/encoding"
|
|
|
+)
|
|
|
+
|
|
|
+// NewDecoder returns a XML decoder that filters illegal characters.
|
|
|
+func NewDecoder(data io.Reader) *xml.Decoder {
|
|
|
+ decoder := xml.NewDecoder(data)
|
|
|
+ decoder.Entity = xml.HTMLEntity
|
|
|
+ decoder.Strict = false
|
|
|
+ decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
|
|
|
+ utf8Reader, err := encoding.CharsetReader(charset, input)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ rawData, err := ioutil.ReadAll(utf8Reader)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("Unable to read data: %q", err)
|
|
|
+ }
|
|
|
+ filteredBytes := bytes.Map(filterValidXMLChar, rawData)
|
|
|
+ return bytes.NewReader(filteredBytes), nil
|
|
|
+ }
|
|
|
+
|
|
|
+ return decoder
|
|
|
+}
|
|
|
+
|
|
|
+// This function is copied from encoding/xml package,
|
|
|
+// and is used to check if all the characters are legal.
|
|
|
+func filterValidXMLChar(r rune) rune {
|
|
|
+ if r == 0x09 ||
|
|
|
+ r == 0x0A ||
|
|
|
+ r == 0x0D ||
|
|
|
+ r >= 0x20 && r <= 0xD7FF ||
|
|
|
+ r >= 0xE000 && r <= 0xFFFD ||
|
|
|
+ r >= 0x10000 && r <= 0x10FFFF {
|
|
|
+ return r
|
|
|
+ }
|
|
|
+ return -1
|
|
|
+}
|