Optimize JSON encoding even further

Last optimization was for JSON string with no character to encode. This version focuses on strings with some chars to encode, trying to apply the same trick for substrings that do not need encoding. benchmark old ns/op new ns/op delta .../NoEncoding-8 60.2 51.3 -14.78% .../EncodingFirst-8 140 116 -17.14% .../EncodingMiddle-8 112 86.4 -22.86% .../EncodingLast-8 62.8 61.1 -2.71% .../MultiBytesFirst-8 164 129 -21.34% .../MultiBytesMiddle-8 133 96.9 -27.14% .../MultiBytesLast-8 81.9 73.5 -10.26%
2017-06-24 20:55:52 -07:00 · 2017-06-24 20:55:52 -07:00 · fde564e937
commit fde564e937
parent 274f2e4c61
3 changed files with 78 additions and 54 deletions
--- a/README.md
+++ b/README.md
@ -257,9 +257,9 @@ All operations are allocation free (those numbers *include* JSON encoding):
 ```
 BenchmarkLogEmpty-8        100000000    19.1 ns/op	   0 B/op       0 allocs/op
 BenchmarkDisabled-8        500000000     4.07 ns/op	   0 B/op       0 allocs/op
-BenchmarkInfo-8            30000000	    46.3 ns/op	   0 B/op       0 allocs/op
-BenchmarkContextFields-8   30000000	    47.1 ns/op	   0 B/op       0 allocs/op
-BenchmarkLogFields-8       10000000	   186 ns/op	   0 B/op       0 allocs/op
+BenchmarkInfo-8            30000000	    42.5 ns/op	   0 B/op       0 allocs/op
+BenchmarkContextFields-8   30000000	    44.9 ns/op	   0 B/op       0 allocs/op
+BenchmarkLogFields-8       10000000	   184 ns/op	   0 B/op       0 allocs/op
 ```

 Using Uber's zap [comparison benchmark](https://github.com/uber-go/zap#performance):
--- a/json.go
+++ b/json.go
@ -21,14 +21,55 @@ func appendJSONString(dst []byte, s string) []byte {
 		// Check if the character needs encoding. Control characters, slashes,
 		// and the double quote need json encoding. Bytes above the ascii
 		// boundary needs utf8 encoding.
-		if s[i] < ' ' || s[i] == '\\' || s[i] == '"' || s[i] > 126 {
-			// We encountered a character that needs to be encoded. Let's
-			// append the previous simple characters to the byte slice
+		if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' {
+			// We encountered a character that needs to be encoded. Switch
+			// to complex version of the algorithm.
+			dst = appendJSONStringComplex(dst, s, i)
+			return append(dst, '"')
+		}
+	}
+	// The string has no need for encoding an therefore is directly
+	// appended to the byte slice.
+	dst = append(dst, s...)
+	// End with a double quote
+	return append(dst, '"')
+}
+
+// appendJSONStringComplex is used by appendJSONString to take over an in
+// progress JSON string encoding that encountered a character that needs
+// to be encoded.
+func appendJSONStringComplex(dst []byte, s string, i int) []byte {
+	start := 0
+	for i < len(s) {
+		b := s[i]
+		if b >= utf8.RuneSelf {
+			r, size := utf8.DecodeRuneInString(s[i:])
+			if r == utf8.RuneError && size == 1 {
+				// In case of error, first append previous simple characters to
+				// the byte slice if any and append a remplacement character code
+				// in place of the invalid sequence.
+				if start < i {
+					dst = append(dst, s[start:i]...)
+				}
+				dst = append(dst, `\ufffd`...)
+				i += size
+				start = i
+				continue
+			}
+			i += size
+			continue
+		}
+		if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' {
+			i++
+			continue
+		}
+		// We encountered a character that needs to be encoded.
+		// Let's append the previous simple characters to the byte slice
 		// and switch our operation to read and encode the remainder
 		// characters byte-by-byte.
-			dst = append(dst, s[:i]...)
-			for i < len(s) {
-				if b := s[i]; b < utf8.RuneSelf {
+		if start < i {
+			dst = append(dst, s[start:i]...)
+		}
 		switch b {
 		case '"', '\\':
 			dst = append(dst, '\\', b)
@ -43,32 +84,13 @@ func appendJSONString(dst []byte, s string) []byte {
 		case '\t':
 			dst = append(dst, '\\', 't')
 		default:
-						if b >= 0x20 {
-							dst = append(dst, b)
-						} else {
-							dst = append(dst, '\\', 'u', '0', '0',
-								hex[b>>4], hex[b&0xF])
-						}
+			dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
 		}
 		i++
-					continue
+		start = i
 	}
-				r, size := utf8.DecodeRuneInString(s[i:])
-				if r == utf8.RuneError && size == 1 {
-					dst = append(dst, `\ufffd`...)
-					i++
-					continue
+	if start < len(s) {
+		dst = append(dst, s[start:]...)
 	}
-				dst = append(dst, s[i:i+size]...)
-				i += size
-			}
-			// End with a double quote
-			return append(dst, '"')
-		}
-	}
-	// The string has no need for encoding an therefore is directly
-	// appended to the byte slice.
-	dst = append(dst, s...)
-	// End with a double quote
-	return append(dst, '"')
+	return dst
 }
--- a/json_test.go
+++ b/json_test.go
@ -43,6 +43,8 @@ func TestAppendJSONString(t *testing.T) {
 		{"\x1d", `"\u001d"`},
 		{"\x1e", `"\u001e"`},
 		{"\x1f", `"\u001f"`},
+		{"✭", `"✭"`},
+		{"foo\xc2\x7fbar", `"foo\ufffd\u007fbar"`}, // invalid sequence
 		{"ascii", `"ascii"`},
 		{"\"a", `"\"a"`},
 		{"\x1fa", `"\u001fa"`},
@ -65,9 +67,9 @@ func BenchmarkAppendJSONString(b *testing.B) {
 		"EncodingFirst":    `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
 		"EncodingMiddle":   `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`,
 		"EncodingLast":     `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`,
-		"RuneFirst":      `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
-		"RuneMiddle":     `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
-		"RuneLast":       `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
+		"MultiBytesFirst":  `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
+		"MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
+		"MultiBytesLast":   `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
 	}
 	for name, str := range tests {
 		b.Run(name, func(b *testing.B) {