Optimize JSON string encoding using a lookup table
benchstat old new name old time/op new time/op delta AppendString/MultiBytesFirst-8 77.9ns ± 5% 70.2ns ± 1% -9.88% (p=0.008 n=5+5) AppendString/MultiBytesMiddle-8 64.2ns ± 1% 56.3ns ± 5% -12.19% (p=0.008 n=5+5) AppendString/MultiBytesLast-8 51.2ns ± 2% 45.2ns ± 4% -11.65% (p=0.008 n=5+5) AppendString/NoEncoding-8 36.2ns ± 4% 34.0ns ± 6% ~ (p=0.087 n=5+5) AppendString/EncodingFirst-8 67.7ns ± 2% 59.4ns ± 2% -12.26% (p=0.008 n=5+5) AppendString/EncodingMiddle-8 56.5ns ± 2% 50.6ns ± 5% -10.54% (p=0.008 n=5+5) AppendString/EncodingLast-8 41.3ns ± 1% 39.6ns ± 5% -4.11% (p=0.024 n=5+5) AppendBytes/MultiBytesLast-8 53.5ns ± 6% 45.6ns ± 4% -14.79% (p=0.008 n=5+5) AppendBytes/NoEncoding-8 36.3ns ± 3% 28.6ns ± 3% -21.10% (p=0.008 n=5+5) AppendBytes/EncodingFirst-8 67.3ns ± 4% 62.1ns ± 4% -7.75% (p=0.008 n=5+5) AppendBytes/EncodingMiddle-8 59.2ns ± 7% 51.0ns ± 6% -13.85% (p=0.008 n=5+5) AppendBytes/EncodingLast-8 43.7ns ± 6% 34.4ns ± 2% -21.32% (p=0.008 n=5+5) AppendBytes/MultiBytesFirst-8 77.7ns ± 2% 71.2ns ± 3% -8.37% (p=0.008 n=5+5) AppendBytes/MultiBytesMiddle-8 63.6ns ± 3% 57.8ns ± 5% -9.12% (p=0.008 n=5+5)
This commit is contained in:
parent
5250a1ba2d
commit
4ea03de40d
85
internal/json/bytes.go
Normal file
85
internal/json/bytes.go
Normal file
@ -0,0 +1,85 @@
|
||||
package json
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// AppendBytes is a mirror of appendString with []byte arg
|
||||
func AppendBytes(dst, s []byte) []byte {
|
||||
dst = append(dst, '"')
|
||||
for i := 0; i < len(s); i++ {
|
||||
if !noEscapeTable[s[i]] {
|
||||
dst = appendBytesComplex(dst, s, i)
|
||||
return append(dst, '"')
|
||||
}
|
||||
}
|
||||
dst = append(dst, s...)
|
||||
return append(dst, '"')
|
||||
}
|
||||
|
||||
// AppendHex encodes the input bytes to a hex string and appends
|
||||
// the encoded string to the input byte slice.
|
||||
//
|
||||
// The operation loops though each byte and encodes it as hex using
|
||||
// the hex lookup table.
|
||||
func AppendHex(dst, s []byte) []byte {
|
||||
dst = append(dst, '"')
|
||||
for _, v := range s {
|
||||
dst = append(dst, hex[v>>4], hex[v&0x0f])
|
||||
}
|
||||
return append(dst, '"')
|
||||
}
|
||||
|
||||
// appendBytesComplex is a mirror of the appendStringComplex
|
||||
// with []byte arg
|
||||
func appendBytesComplex(dst, s []byte, i int) []byte {
|
||||
start := 0
|
||||
for i < len(s) {
|
||||
b := s[i]
|
||||
if b >= utf8.RuneSelf {
|
||||
r, size := utf8.DecodeRune(s[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
if start < i {
|
||||
dst = append(dst, s[start:i]...)
|
||||
}
|
||||
dst = append(dst, `\ufffd`...)
|
||||
i += size
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
if noEscapeTable[b] {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
// We encountered a character that needs to be encoded.
|
||||
// Let's append the previous simple characters to the byte slice
|
||||
// and switch our operation to read and encode the remainder
|
||||
// characters byte-by-byte.
|
||||
if start < i {
|
||||
dst = append(dst, s[start:i]...)
|
||||
}
|
||||
switch b {
|
||||
case '"', '\\':
|
||||
dst = append(dst, '\\', b)
|
||||
case '\b':
|
||||
dst = append(dst, '\\', 'b')
|
||||
case '\f':
|
||||
dst = append(dst, '\\', 'f')
|
||||
case '\n':
|
||||
dst = append(dst, '\\', 'n')
|
||||
case '\r':
|
||||
dst = append(dst, '\\', 'r')
|
||||
case '\t':
|
||||
dst = append(dst, '\\', 't')
|
||||
default:
|
||||
dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
|
||||
}
|
||||
i++
|
||||
start = i
|
||||
}
|
||||
if start < len(s) {
|
||||
dst = append(dst, s[start:]...)
|
||||
}
|
||||
return dst
|
||||
}
|
82
internal/json/bytes_test.go
Normal file
82
internal/json/bytes_test.go
Normal file
@ -0,0 +1,82 @@
|
||||
package json
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
func TestAppendBytes(t *testing.T) {
|
||||
for _, tt := range encodeStringTests {
|
||||
b := AppendBytes([]byte{}, []byte(tt.in))
|
||||
if got, want := string(b), tt.out; got != want {
|
||||
t.Errorf("appendBytes(%q) = %#q, want %#q", tt.in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendHex(t *testing.T) {
|
||||
for _, tt := range encodeHexTests {
|
||||
b := AppendHex([]byte{}, []byte{tt.in})
|
||||
if got, want := string(b), tt.out; got != want {
|
||||
t.Errorf("appendHex(%x) = %s, want %s", tt.in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStringBytes(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Test that encodeState.stringBytes and encodeState.string use the same encoding.
|
||||
var r []rune
|
||||
for i := '\u0000'; i <= unicode.MaxRune; i++ {
|
||||
r = append(r, i)
|
||||
}
|
||||
s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
|
||||
|
||||
enc := string(AppendString([]byte{}, s))
|
||||
encBytes := string(AppendBytes([]byte{}, []byte(s)))
|
||||
|
||||
if enc != encBytes {
|
||||
i := 0
|
||||
for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] {
|
||||
i++
|
||||
}
|
||||
enc = enc[i:]
|
||||
encBytes = encBytes[i:]
|
||||
i = 0
|
||||
for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] {
|
||||
i++
|
||||
}
|
||||
enc = enc[:len(enc)-i]
|
||||
encBytes = encBytes[:len(encBytes)-i]
|
||||
|
||||
if len(enc) > 20 {
|
||||
enc = enc[:20] + "..."
|
||||
}
|
||||
if len(encBytes) > 20 {
|
||||
encBytes = encBytes[:20] + "..."
|
||||
}
|
||||
|
||||
t.Errorf("encodings differ at %#q vs %#q", enc, encBytes)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAppendBytes(b *testing.B) {
|
||||
tests := map[string]string{
|
||||
"NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"EncodingFirst": `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"EncodingMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"EncodingLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`,
|
||||
"MultiBytesFirst": `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"MultiBytesLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
|
||||
}
|
||||
for name, str := range tests {
|
||||
byt := []byte(str)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
buf := make([]byte, 0, 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = AppendBytes(buf, byt)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -4,6 +4,14 @@ import "unicode/utf8"
|
||||
|
||||
const hex = "0123456789abcdef"
|
||||
|
||||
var noEscapeTable = [256]bool{}
|
||||
|
||||
func init() {
|
||||
for i := 0; i <= 0x7e; i++ {
|
||||
noEscapeTable[i] = i >= 0x20 && i != '\\' && i != '"'
|
||||
}
|
||||
}
|
||||
|
||||
// AppendStrings encodes the input strings to json and
|
||||
// appends the encoded string list to the input byte slice.
|
||||
func AppendStrings(dst []byte, vals []string) []byte {
|
||||
@ -38,7 +46,7 @@ func AppendString(dst []byte, s string) []byte {
|
||||
// Check if the character needs encoding. Control characters, slashes,
|
||||
// and the double quote need json encoding. Bytes above the ascii
|
||||
// boundary needs utf8 encoding.
|
||||
if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' {
|
||||
if !noEscapeTable[s[i]] {
|
||||
// We encountered a character that needs to be encoded. Switch
|
||||
// to complex version of the algorithm.
|
||||
dst = appendStringComplex(dst, s, i)
|
||||
@ -76,89 +84,7 @@ func appendStringComplex(dst []byte, s string, i int) []byte {
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
// We encountered a character that needs to be encoded.
|
||||
// Let's append the previous simple characters to the byte slice
|
||||
// and switch our operation to read and encode the remainder
|
||||
// characters byte-by-byte.
|
||||
if start < i {
|
||||
dst = append(dst, s[start:i]...)
|
||||
}
|
||||
switch b {
|
||||
case '"', '\\':
|
||||
dst = append(dst, '\\', b)
|
||||
case '\b':
|
||||
dst = append(dst, '\\', 'b')
|
||||
case '\f':
|
||||
dst = append(dst, '\\', 'f')
|
||||
case '\n':
|
||||
dst = append(dst, '\\', 'n')
|
||||
case '\r':
|
||||
dst = append(dst, '\\', 'r')
|
||||
case '\t':
|
||||
dst = append(dst, '\\', 't')
|
||||
default:
|
||||
dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
|
||||
}
|
||||
i++
|
||||
start = i
|
||||
}
|
||||
if start < len(s) {
|
||||
dst = append(dst, s[start:]...)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// AppendBytes is a mirror of appendString with []byte arg
|
||||
func AppendBytes(dst, s []byte) []byte {
|
||||
dst = append(dst, '"')
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' {
|
||||
dst = appendBytesComplex(dst, s, i)
|
||||
return append(dst, '"')
|
||||
}
|
||||
}
|
||||
dst = append(dst, s...)
|
||||
return append(dst, '"')
|
||||
}
|
||||
|
||||
// AppendHex encodes the input bytes to a hex string and appends
|
||||
// the encoded string to the input byte slice.
|
||||
//
|
||||
// The operation loops though each byte and encodes it as hex using
|
||||
// the hex lookup table.
|
||||
func AppendHex(dst, s []byte) []byte {
|
||||
dst = append(dst, '"')
|
||||
for _, v := range s {
|
||||
dst = append(dst, hex[v>>4], hex[v&0x0f])
|
||||
}
|
||||
return append(dst, '"')
|
||||
}
|
||||
|
||||
// appendBytesComplex is a mirror of the appendStringComplex
|
||||
// with []byte arg
|
||||
func appendBytesComplex(dst, s []byte, i int) []byte {
|
||||
start := 0
|
||||
for i < len(s) {
|
||||
b := s[i]
|
||||
if b >= utf8.RuneSelf {
|
||||
r, size := utf8.DecodeRune(s[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
if start < i {
|
||||
dst = append(dst, s[start:i]...)
|
||||
}
|
||||
dst = append(dst, `\ufffd`...)
|
||||
i += size
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' {
|
||||
if noEscapeTable[b] {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ package json
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var encodeStringTests = []struct {
|
||||
@ -73,61 +72,6 @@ func TestAppendString(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendBytes(t *testing.T) {
|
||||
for _, tt := range encodeStringTests {
|
||||
b := AppendBytes([]byte{}, []byte(tt.in))
|
||||
if got, want := string(b), tt.out; got != want {
|
||||
t.Errorf("appendBytes(%q) = %#q, want %#q", tt.in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendHex(t *testing.T) {
|
||||
for _, tt := range encodeHexTests {
|
||||
b := AppendHex([]byte{}, []byte{tt.in})
|
||||
if got, want := string(b), tt.out; got != want {
|
||||
t.Errorf("appendHex(%x) = %s, want %s", tt.in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStringBytes(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Test that encodeState.stringBytes and encodeState.string use the same encoding.
|
||||
var r []rune
|
||||
for i := '\u0000'; i <= unicode.MaxRune; i++ {
|
||||
r = append(r, i)
|
||||
}
|
||||
s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
|
||||
|
||||
enc := string(AppendString([]byte{}, s))
|
||||
encBytes := string(AppendBytes([]byte{}, []byte(s)))
|
||||
|
||||
if enc != encBytes {
|
||||
i := 0
|
||||
for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] {
|
||||
i++
|
||||
}
|
||||
enc = enc[i:]
|
||||
encBytes = encBytes[i:]
|
||||
i = 0
|
||||
for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] {
|
||||
i++
|
||||
}
|
||||
enc = enc[:len(enc)-i]
|
||||
encBytes = encBytes[:len(encBytes)-i]
|
||||
|
||||
if len(enc) > 20 {
|
||||
enc = enc[:20] + "..."
|
||||
}
|
||||
if len(encBytes) > 20 {
|
||||
encBytes = encBytes[:20] + "..."
|
||||
}
|
||||
|
||||
t.Errorf("encodings differ at %#q vs %#q", enc, encBytes)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAppendString(b *testing.B) {
|
||||
tests := map[string]string{
|
||||
"NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
@ -147,24 +91,3 @@ func BenchmarkAppendString(b *testing.B) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAppendBytes(b *testing.B) {
|
||||
tests := map[string]string{
|
||||
"NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"EncodingFirst": `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"EncodingMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"EncodingLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`,
|
||||
"MultiBytesFirst": `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||
"MultiBytesLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
|
||||
}
|
||||
for name, str := range tests {
|
||||
byt := []byte(str)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
buf := make([]byte, 0, 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = AppendBytes(buf, byt)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user