From 4ea03de40d7ee400dfb3e97488df8f00b2a363b0 Mon Sep 17 00:00:00 2001
From: Olivier Poitrey <rs@rhapsodyk.net>
Date: Fri, 23 Mar 2018 02:45:05 -0700
Subject: [PATCH] Optimize JSON string encoding using a lookup table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

benchstat old new
name                             old time/op    new time/op    delta
AppendString/MultiBytesFirst-8     77.9ns ± 5%    70.2ns ± 1%   -9.88%  (p=0.008 n=5+5)
AppendString/MultiBytesMiddle-8    64.2ns ± 1%    56.3ns ± 5%  -12.19%  (p=0.008 n=5+5)
AppendString/MultiBytesLast-8      51.2ns ± 2%    45.2ns ± 4%  -11.65%  (p=0.008 n=5+5)
AppendString/NoEncoding-8          36.2ns ± 4%    34.0ns ± 6%     ~     (p=0.087 n=5+5)
AppendString/EncodingFirst-8       67.7ns ± 2%    59.4ns ± 2%  -12.26%  (p=0.008 n=5+5)
AppendString/EncodingMiddle-8      56.5ns ± 2%    50.6ns ± 5%  -10.54%  (p=0.008 n=5+5)
AppendString/EncodingLast-8        41.3ns ± 1%    39.6ns ± 5%   -4.11%  (p=0.024 n=5+5)
AppendBytes/MultiBytesLast-8       53.5ns ± 6%    45.6ns ± 4%  -14.79%  (p=0.008 n=5+5)
AppendBytes/NoEncoding-8           36.3ns ± 3%    28.6ns ± 3%  -21.10%  (p=0.008 n=5+5)
AppendBytes/EncodingFirst-8        67.3ns ± 4%    62.1ns ± 4%   -7.75%  (p=0.008 n=5+5)
AppendBytes/EncodingMiddle-8       59.2ns ± 7%    51.0ns ± 6%  -13.85%  (p=0.008 n=5+5)
AppendBytes/EncodingLast-8         43.7ns ± 6%    34.4ns ± 2%  -21.32%  (p=0.008 n=5+5)
AppendBytes/MultiBytesFirst-8      77.7ns ± 2%    71.2ns ± 3%   -8.37%  (p=0.008 n=5+5)
AppendBytes/MultiBytesMiddle-8     63.6ns ± 3%    57.8ns ± 5%   -9.12%  (p=0.008 n=5+5)
---
 internal/json/bytes.go       | 85 ++++++++++++++++++++++++++++++++
 internal/json/bytes_test.go  | 82 +++++++++++++++++++++++++++++++
 internal/json/string.go      | 94 ++++--------------------------------
 internal/json/string_test.go | 77 -----------------------------
 4 files changed, 177 insertions(+), 161 deletions(-)
 create mode 100644 internal/json/bytes.go
 create mode 100644 internal/json/bytes_test.go

diff --git a/internal/json/bytes.go b/internal/json/bytes.go
new file mode 100644
index 0000000..8f7d5fe
--- /dev/null
+++ b/internal/json/bytes.go
@@ -0,0 +1,85 @@
+package json
+
+import "unicode/utf8"
+
+// AppendBytes is a mirror of appendString with []byte arg
+func AppendBytes(dst, s []byte) []byte {
+	dst = append(dst, '"')
+	for i := 0; i < len(s); i++ {
+		if !noEscapeTable[s[i]] {
+			dst = appendBytesComplex(dst, s, i)
+			return append(dst, '"')
+		}
+	}
+	dst = append(dst, s...)
+	return append(dst, '"')
+}
+
+// AppendHex encodes the input bytes to a hex string and appends
+// the encoded string to the input byte slice.
+//
+// The operation loops though each byte and encodes it as hex using
+// the hex lookup table.
+func AppendHex(dst, s []byte) []byte {
+	dst = append(dst, '"')
+	for _, v := range s {
+		dst = append(dst, hex[v>>4], hex[v&0x0f])
+	}
+	return append(dst, '"')
+}
+
+// appendBytesComplex is a mirror of the appendStringComplex
+// with []byte arg
+func appendBytesComplex(dst, s []byte, i int) []byte {
+	start := 0
+	for i < len(s) {
+		b := s[i]
+		if b >= utf8.RuneSelf {
+			r, size := utf8.DecodeRune(s[i:])
+			if r == utf8.RuneError && size == 1 {
+				if start < i {
+					dst = append(dst, s[start:i]...)
+				}
+				dst = append(dst, `\ufffd`...)
+				i += size
+				start = i
+				continue
+			}
+			i += size
+			continue
+		}
+		if noEscapeTable[b] {
+			i++
+			continue
+		}
+		// We encountered a character that needs to be encoded.
+		// Let's append the previous simple characters to the byte slice
+		// and switch our operation to read and encode the remainder
+		// characters byte-by-byte.
+		if start < i {
+			dst = append(dst, s[start:i]...)
+		}
+		switch b {
+		case '"', '\\':
+			dst = append(dst, '\\', b)
+		case '\b':
+			dst = append(dst, '\\', 'b')
+		case '\f':
+			dst = append(dst, '\\', 'f')
+		case '\n':
+			dst = append(dst, '\\', 'n')
+		case '\r':
+			dst = append(dst, '\\', 'r')
+		case '\t':
+			dst = append(dst, '\\', 't')
+		default:
+			dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
+		}
+		i++
+		start = i
+	}
+	if start < len(s) {
+		dst = append(dst, s[start:]...)
+	}
+	return dst
+}
diff --git a/internal/json/bytes_test.go b/internal/json/bytes_test.go
new file mode 100644
index 0000000..e33c1e0
--- /dev/null
+++ b/internal/json/bytes_test.go
@@ -0,0 +1,82 @@
+package json
+
+import (
+	"testing"
+	"unicode"
+)
+
+func TestAppendBytes(t *testing.T) {
+	for _, tt := range encodeStringTests {
+		b := AppendBytes([]byte{}, []byte(tt.in))
+		if got, want := string(b), tt.out; got != want {
+			t.Errorf("appendBytes(%q) = %#q, want %#q", tt.in, got, want)
+		}
+	}
+}
+
+func TestAppendHex(t *testing.T) {
+	for _, tt := range encodeHexTests {
+		b := AppendHex([]byte{}, []byte{tt.in})
+		if got, want := string(b), tt.out; got != want {
+			t.Errorf("appendHex(%x) = %s, want %s", tt.in, got, want)
+		}
+	}
+}
+
+func TestStringBytes(t *testing.T) {
+	t.Parallel()
+	// Test that encodeState.stringBytes and encodeState.string use the same encoding.
+	var r []rune
+	for i := '\u0000'; i <= unicode.MaxRune; i++ {
+		r = append(r, i)
+	}
+	s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
+
+	enc := string(AppendString([]byte{}, s))
+	encBytes := string(AppendBytes([]byte{}, []byte(s)))
+
+	if enc != encBytes {
+		i := 0
+		for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] {
+			i++
+		}
+		enc = enc[i:]
+		encBytes = encBytes[i:]
+		i = 0
+		for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] {
+			i++
+		}
+		enc = enc[:len(enc)-i]
+		encBytes = encBytes[:len(encBytes)-i]
+
+		if len(enc) > 20 {
+			enc = enc[:20] + "..."
+		}
+		if len(encBytes) > 20 {
+			encBytes = encBytes[:20] + "..."
+		}
+
+		t.Errorf("encodings differ at %#q vs %#q", enc, encBytes)
+	}
+}
+
+func BenchmarkAppendBytes(b *testing.B) {
+	tests := map[string]string{
+		"NoEncoding":       `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
+		"EncodingFirst":    `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
+		"EncodingMiddle":   `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`,
+		"EncodingLast":     `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`,
+		"MultiBytesFirst":  `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
+		"MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
+		"MultiBytesLast":   `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
+	}
+	for name, str := range tests {
+		byt := []byte(str)
+		b.Run(name, func(b *testing.B) {
+			buf := make([]byte, 0, 100)
+			for i := 0; i < b.N; i++ {
+				_ = AppendBytes(buf, byt)
+			}
+		})
+	}
+}
diff --git a/internal/json/string.go b/internal/json/string.go
index 7f85ad6..bb606f0 100644
--- a/internal/json/string.go
+++ b/internal/json/string.go
@@ -4,6 +4,14 @@ import "unicode/utf8"
 
 const hex = "0123456789abcdef"
 
+var noEscapeTable = [256]bool{}
+
+func init() {
+	for i := 0; i <= 0x7e; i++ {
+		noEscapeTable[i] = i >= 0x20 && i != '\\' && i != '"'
+	}
+}
+
 // AppendStrings encodes the input strings to json and
 // appends the encoded string list to the input byte slice.
 func AppendStrings(dst []byte, vals []string) []byte {
@@ -38,7 +46,7 @@ func AppendString(dst []byte, s string) []byte {
 		// Check if the character needs encoding. Control characters, slashes,
 		// and the double quote need json encoding. Bytes above the ascii
 		// boundary needs utf8 encoding.
-		if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' {
+		if !noEscapeTable[s[i]] {
 			// We encountered a character that needs to be encoded. Switch
 			// to complex version of the algorithm.
 			dst = appendStringComplex(dst, s, i)
@@ -76,89 +84,7 @@ func appendStringComplex(dst []byte, s string, i int) []byte {
 			i += size
 			continue
 		}
-		if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' {
-			i++
-			continue
-		}
-		// We encountered a character that needs to be encoded.
-		// Let's append the previous simple characters to the byte slice
-		// and switch our operation to read and encode the remainder
-		// characters byte-by-byte.
-		if start < i {
-			dst = append(dst, s[start:i]...)
-		}
-		switch b {
-		case '"', '\\':
-			dst = append(dst, '\\', b)
-		case '\b':
-			dst = append(dst, '\\', 'b')
-		case '\f':
-			dst = append(dst, '\\', 'f')
-		case '\n':
-			dst = append(dst, '\\', 'n')
-		case '\r':
-			dst = append(dst, '\\', 'r')
-		case '\t':
-			dst = append(dst, '\\', 't')
-		default:
-			dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
-		}
-		i++
-		start = i
-	}
-	if start < len(s) {
-		dst = append(dst, s[start:]...)
-	}
-	return dst
-}
-
-// AppendBytes is a mirror of appendString with []byte arg
-func AppendBytes(dst, s []byte) []byte {
-	dst = append(dst, '"')
-	for i := 0; i < len(s); i++ {
-		if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' {
-			dst = appendBytesComplex(dst, s, i)
-			return append(dst, '"')
-		}
-	}
-	dst = append(dst, s...)
-	return append(dst, '"')
-}
-
-// AppendHex encodes the input bytes to a hex string and appends
-// the encoded string to the input byte slice.
-//
-// The operation loops though each byte and encodes it as hex using
-// the hex lookup table.
-func AppendHex(dst, s []byte) []byte {
-	dst = append(dst, '"')
-	for _, v := range s {
-		dst = append(dst, hex[v>>4], hex[v&0x0f])
-	}
-	return append(dst, '"')
-}
-
-// appendBytesComplex is a mirror of the appendStringComplex
-// with []byte arg
-func appendBytesComplex(dst, s []byte, i int) []byte {
-	start := 0
-	for i < len(s) {
-		b := s[i]
-		if b >= utf8.RuneSelf {
-			r, size := utf8.DecodeRune(s[i:])
-			if r == utf8.RuneError && size == 1 {
-				if start < i {
-					dst = append(dst, s[start:i]...)
-				}
-				dst = append(dst, `\ufffd`...)
-				i += size
-				start = i
-				continue
-			}
-			i += size
-			continue
-		}
-		if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' {
+		if noEscapeTable[b] {
 			i++
 			continue
 		}
diff --git a/internal/json/string_test.go b/internal/json/string_test.go
index 0d5fc6c..a30b124 100644
--- a/internal/json/string_test.go
+++ b/internal/json/string_test.go
@@ -2,7 +2,6 @@ package json
 
 import (
 	"testing"
-	"unicode"
 )
 
 var encodeStringTests = []struct {
@@ -73,61 +72,6 @@ func TestAppendString(t *testing.T) {
 	}
 }
 
-func TestAppendBytes(t *testing.T) {
-	for _, tt := range encodeStringTests {
-		b := AppendBytes([]byte{}, []byte(tt.in))
-		if got, want := string(b), tt.out; got != want {
-			t.Errorf("appendBytes(%q) = %#q, want %#q", tt.in, got, want)
-		}
-	}
-}
-
-func TestAppendHex(t *testing.T) {
-	for _, tt := range encodeHexTests {
-		b := AppendHex([]byte{}, []byte{tt.in})
-		if got, want := string(b), tt.out; got != want {
-			t.Errorf("appendHex(%x) = %s, want %s", tt.in, got, want)
-		}
-	}
-}
-
-func TestStringBytes(t *testing.T) {
-	t.Parallel()
-	// Test that encodeState.stringBytes and encodeState.string use the same encoding.
-	var r []rune
-	for i := '\u0000'; i <= unicode.MaxRune; i++ {
-		r = append(r, i)
-	}
-	s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
-
-	enc := string(AppendString([]byte{}, s))
-	encBytes := string(AppendBytes([]byte{}, []byte(s)))
-
-	if enc != encBytes {
-		i := 0
-		for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] {
-			i++
-		}
-		enc = enc[i:]
-		encBytes = encBytes[i:]
-		i = 0
-		for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] {
-			i++
-		}
-		enc = enc[:len(enc)-i]
-		encBytes = encBytes[:len(encBytes)-i]
-
-		if len(enc) > 20 {
-			enc = enc[:20] + "..."
-		}
-		if len(encBytes) > 20 {
-			encBytes = encBytes[:20] + "..."
-		}
-
-		t.Errorf("encodings differ at %#q vs %#q", enc, encBytes)
-	}
-}
-
 func BenchmarkAppendString(b *testing.B) {
 	tests := map[string]string{
 		"NoEncoding":       `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
@@ -147,24 +91,3 @@ func BenchmarkAppendString(b *testing.B) {
 		})
 	}
 }
-
-func BenchmarkAppendBytes(b *testing.B) {
-	tests := map[string]string{
-		"NoEncoding":       `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
-		"EncodingFirst":    `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
-		"EncodingMiddle":   `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`,
-		"EncodingLast":     `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`,
-		"MultiBytesFirst":  `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
-		"MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
-		"MultiBytesLast":   `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
-	}
-	for name, str := range tests {
-		byt := []byte(str)
-		b.Run(name, func(b *testing.B) {
-			buf := make([]byte, 0, 100)
-			for i := 0; i < b.N; i++ {
-				_ = AppendBytes(buf, byt)
-			}
-		})
-	}
-}