From f8aa7a1962d4028c8d2e68123d412c4deebbab5b Mon Sep 17 00:00:00 2001 From: Josh Baker Date: Fri, 23 Jun 2017 20:28:33 -0700 Subject: [PATCH] Optimistically expect simple strings for json (#6) Performance update to appendJSONString so that it now checks if the input is a simple string that contains no json delimiters, control characters, or unicode. If simple then the operation is only three appends. [double-quote, string, double-quote]. If a non-simple character is encountered then all of the previous characters are appended and the operation falls back to the original method for the remaining characters. Before: BenchmarkLogEmpty-8 100000000 17.1 ns/op BenchmarkDisabled-8 500000000 4.12 ns/op BenchmarkInfo-8 20000000 101 ns/op BenchmarkContextFields-8 20000000 105 ns/op BenchmarkLogFields-8 5000000 281 ns/op After: BenchmarkLogEmpty-8 100000000 16.7 ns/op BenchmarkDisabled-8 500000000 3.79 ns/op BenchmarkInfo-8 30000000 44.8 ns/op BenchmarkContextFields-8 30000000 67.5 ns/op BenchmarkLogFields-8 10000000 197 ns/op --- json.go | 90 ++++++++++++++++++++++++++++++++++------------------ json_test.go | 2 ++ 2 files changed, 62 insertions(+), 30 deletions(-) diff --git a/json.go b/json.go index 2b78542..8ff43a4 100644 --- a/json.go +++ b/json.go @@ -4,41 +4,71 @@ import "unicode/utf8" const hex = "0123456789abcdef" +// appendJSONString encodes the input string to json and appends +// the encoded string to the input byte slice. +// +// The operation loops though each byte in the string looking +// for characters that need json or utf8 encoding. If the string +// does not need encoding, then the string is appended in it's +// entirety to the byte slice. +// If we encounter a byte that does need encoding, switch up +// the operation and perform a byte-by-byte read-encode-append. func appendJSONString(dst []byte, s string) []byte { + // Start with a double quote. dst = append(dst, '"') - for i := 0; i < len(s); { - if b := s[i]; b < utf8.RuneSelf { - switch b { - case '"', '\\': - dst = append(dst, '\\', b) - case '\b': - dst = append(dst, '\\', 'b') - case '\f': - dst = append(dst, '\\', 'f') - case '\n': - dst = append(dst, '\\', 'n') - case '\r': - dst = append(dst, '\\', 'r') - case '\t': - dst = append(dst, '\\', 't') - default: - if b >= 0x20 { - dst = append(dst, b) - } else { - dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF]) + // Loop through each character in the string. + for i := 0; i < len(s); i++ { + // Check if the character needs encoding. Control characters, slashes, + // and the double quote need json encoding. Bytes above the ascii + // boundary needs utf8 encoding. + if s[i] < ' ' || s[i] == '\\' || s[i] == '"' || s[i] > 126 { + // We encountered a character that needs to be encoded. Let's + // append the previous simple characters to the byte slice + // and switch our operation to read and encode the remainder + // characters byte-by-byte. + dst = append(dst, s[:i]...) + for i < len(s) { + if b := s[i]; b < utf8.RuneSelf { + switch b { + case '"', '\\': + dst = append(dst, '\\', b) + case '\b': + dst = append(dst, '\\', 'b') + case '\f': + dst = append(dst, '\\', 'f') + case '\n': + dst = append(dst, '\\', 'n') + case '\r': + dst = append(dst, '\\', 'r') + case '\t': + dst = append(dst, '\\', 't') + default: + if b >= 0x20 { + dst = append(dst, b) + } else { + dst = append(dst, '\\', 'u', '0', '0', + hex[b>>4], hex[b&0xF]) + } + } + i++ + continue } + r, size := utf8.DecodeRuneInString(s[i:]) + if r == utf8.RuneError && size == 1 { + dst = append(dst, `\ufffd`...) + i++ + continue + } + dst = append(dst, s[i:i+size]...) + i += size } - i++ - continue + // End with a double quote + return append(dst, '"') } - r, size := utf8.DecodeRuneInString(s[i:]) - if r == utf8.RuneError && size == 1 { - dst = append(dst, `\ufffd`...) - i++ - continue - } - dst = append(dst, s[i:i+size]...) - i += size } + // The string has no need for encoding an therefore is directly + // appended to the byte slice. + dst = append(dst, s...) + // End with a double quote return append(dst, '"') } diff --git a/json_test.go b/json_test.go index fe12dee..4881b62 100644 --- a/json_test.go +++ b/json_test.go @@ -40,6 +40,8 @@ func TestAppendJSONString(t *testing.T) { {"\x1d", `"\u001d"`}, {"\x1e", `"\u001e"`}, {"\x1f", `"\u001f"`}, + {"ascii", `"ascii"`}, + {"emoji \u2764\ufe0f!", `"emoji ❤️!"`}, } for _, tt := range encodeStringTests {