Просмотр исходного кода

Use runes instead of bytes to truncate JSON feed titles

This fix avoid breaking Unicode string. 

It solves this error:

pq: invalid byte sequence for encoding "UTF8": 0xf0 0x9f 0x9a 0x2e
Jan-Lukas Else 4 лет назад
Родитель
Сommit
20cd023c07
2 измененных файлов с 40 добавлено и 2 удалено
  1. 5 2
      reader/json/json.go
  2. 35 0
      reader/json/parser_test.go

+ 5 - 2
reader/json/json.go

@@ -182,8 +182,11 @@ func getAuthor(author jsonAuthor) string {
 func truncate(str string) string {
 	max := 100
 	str = strings.TrimSpace(str)
-	if len(str) > max {
-		return str[:max] + "..."
+
+	// Convert to runes to be safe with unicode
+	runes := []rune(str)
+	if len(runes) > max {
+		return string(runes[:max]) + "…"
 	}
 
 	return str

+ 35 - 0
reader/json/parser_test.go

@@ -407,6 +407,41 @@ func TestParseTruncateItemTitle(t *testing.T) {
 	if len(feed.Entries[0].Title) != 103 {
 		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
 	}
+
+	if len([]rune(feed.Entries[0].Title)) != 101 {
+		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+	}
+}
+
+func TestParseTruncateItemTitleUnicode(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"title": "My Example Feed",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"title": "I’m riding my electric bike and came across this castle. It’s called “Schloss Richmond”. 🚴‍♂️"
+			}
+		]
+	}`
+
+	feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Title) != 110 {
+		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+	}
+
+	if len([]rune(feed.Entries[0].Title)) != 93 {
+		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+	}
 }
 
 func TestParseItemTitleWithXMLTags(t *testing.T) {