Skip to content
Snippets Groups Projects
jsonstring.go 5.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • Volker Schukai's avatar
    Volker Schukai committed
    package logfmt
    
    import (
    	"bytes"
    	"io"
    	"strconv"
    	"sync"
    	"unicode"
    	"unicode/utf16"
    	"unicode/utf8"
    )
    
    // Taken from Go's encoding/json and modified for use here.
    
    // Copyright 2010 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    var hex = "0123456789abcdef"
    
    var bufferPool = sync.Pool{
    	New: func() interface{} {
    		return &bytes.Buffer{}
    	},
    }
    
    func getBuffer() *bytes.Buffer {
    	return bufferPool.Get().(*bytes.Buffer)
    }
    
    func poolBuffer(buf *bytes.Buffer) {
    	buf.Reset()
    	bufferPool.Put(buf)
    }
    
    // NOTE: keep in sync with writeQuotedBytes below.
    func writeQuotedString(w io.Writer, s string) (int, error) {
    	buf := getBuffer()
    	buf.WriteByte('"')
    	start := 0
    	for i := 0; i < len(s); {
    		if b := s[i]; b < utf8.RuneSelf {
    			if 0x20 <= b && b != '\\' && b != '"' {
    				i++
    				continue
    			}
    			if start < i {
    				buf.WriteString(s[start:i])
    			}
    			switch b {
    			case '\\', '"':
    				buf.WriteByte('\\')
    				buf.WriteByte(b)
    			case '\n':
    				buf.WriteByte('\\')
    				buf.WriteByte('n')
    			case '\r':
    				buf.WriteByte('\\')
    				buf.WriteByte('r')
    			case '\t':
    				buf.WriteByte('\\')
    				buf.WriteByte('t')
    			default:
    				// This encodes bytes < 0x20 except for \n, \r, and \t.
    				buf.WriteString(`\u00`)
    				buf.WriteByte(hex[b>>4])
    				buf.WriteByte(hex[b&0xF])
    			}
    			i++
    			start = i
    			continue
    		}
    		c, size := utf8.DecodeRuneInString(s[i:])
    		if c == utf8.RuneError {
    			if start < i {
    				buf.WriteString(s[start:i])
    			}
    			buf.WriteString(`\ufffd`)
    			i += size
    			start = i
    			continue
    		}
    		i += size
    	}
    	if start < len(s) {
    		buf.WriteString(s[start:])
    	}
    	buf.WriteByte('"')
    	n, err := w.Write(buf.Bytes())
    	poolBuffer(buf)
    	return n, err
    }
    
    // NOTE: keep in sync with writeQuoteString above.
    func writeQuotedBytes(w io.Writer, s []byte) (int, error) {
    	buf := getBuffer()
    	buf.WriteByte('"')
    	start := 0
    	for i := 0; i < len(s); {
    		if b := s[i]; b < utf8.RuneSelf {
    			if 0x20 <= b && b != '\\' && b != '"' {
    				i++
    				continue
    			}
    			if start < i {
    				buf.Write(s[start:i])
    			}
    			switch b {
    			case '\\', '"':
    				buf.WriteByte('\\')
    				buf.WriteByte(b)
    			case '\n':
    				buf.WriteByte('\\')
    				buf.WriteByte('n')
    			case '\r':
    				buf.WriteByte('\\')
    				buf.WriteByte('r')
    			case '\t':
    				buf.WriteByte('\\')
    				buf.WriteByte('t')
    			default:
    				// This encodes bytes < 0x20 except for \n, \r, and \t.
    				buf.WriteString(`\u00`)
    				buf.WriteByte(hex[b>>4])
    				buf.WriteByte(hex[b&0xF])
    			}
    			i++
    			start = i
    			continue
    		}
    		c, size := utf8.DecodeRune(s[i:])
    		if c == utf8.RuneError {
    			if start < i {
    				buf.Write(s[start:i])
    			}
    			buf.WriteString(`\ufffd`)
    			i += size
    			start = i
    			continue
    		}
    		i += size
    	}
    	if start < len(s) {
    		buf.Write(s[start:])
    	}
    	buf.WriteByte('"')
    	n, err := w.Write(buf.Bytes())
    	poolBuffer(buf)
    	return n, err
    }
    
    // getu4 decodes \uXXXX from the beginning of s, returning the hex value,
    // or it returns -1.
    func getu4(s []byte) rune {
    	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
    		return -1
    	}
    	r, err := strconv.ParseUint(string(s[2:6]), 16, 64)
    	if err != nil {
    		return -1
    	}
    	return rune(r)
    }
    
    func unquoteBytes(s []byte) (t []byte, ok bool) {
    	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
    		return
    	}
    	s = s[1 : len(s)-1]
    
    	// Check for unusual characters. If there are none,
    	// then no unquoting is needed, so return a slice of the
    	// original bytes.
    	r := 0
    	for r < len(s) {
    		c := s[r]
    		if c == '\\' || c == '"' || c < ' ' {
    			break
    		}
    		if c < utf8.RuneSelf {
    			r++
    			continue
    		}
    		rr, size := utf8.DecodeRune(s[r:])
    		if rr == utf8.RuneError {
    			break
    		}
    		r += size
    	}
    	if r == len(s) {
    		return s, true
    	}
    
    	b := make([]byte, len(s)+2*utf8.UTFMax)
    	w := copy(b, s[0:r])
    	for r < len(s) {
    		// Out of room?  Can only happen if s is full of
    		// malformed UTF-8 and we're replacing each
    		// byte with RuneError.
    		if w >= len(b)-2*utf8.UTFMax {
    			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
    			copy(nb, b[0:w])
    			b = nb
    		}
    		switch c := s[r]; {
    		case c == '\\':
    			r++
    			if r >= len(s) {
    				return
    			}
    			switch s[r] {
    			default:
    				return
    			case '"', '\\', '/', '\'':
    				b[w] = s[r]
    				r++
    				w++
    			case 'b':
    				b[w] = '\b'
    				r++
    				w++
    			case 'f':
    				b[w] = '\f'
    				r++
    				w++
    			case 'n':
    				b[w] = '\n'
    				r++
    				w++
    			case 'r':
    				b[w] = '\r'
    				r++
    				w++
    			case 't':
    				b[w] = '\t'
    				r++
    				w++
    			case 'u':
    				r--
    				rr := getu4(s[r:])
    				if rr < 0 {
    					return
    				}
    				r += 6
    				if utf16.IsSurrogate(rr) {
    					rr1 := getu4(s[r:])
    					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
    						// A valid pair; consume.
    						r += 6
    						w += utf8.EncodeRune(b[w:], dec)
    						break
    					}
    					// Invalid surrogate; fall back to replacement rune.
    					rr = unicode.ReplacementChar
    				}
    				w += utf8.EncodeRune(b[w:], rr)
    			}
    
    		// Quote, control characters are invalid.
    		case c == '"', c < ' ':
    			return
    
    		// ASCII
    		case c < utf8.RuneSelf:
    			b[w] = c
    			r++
    			w++
    
    		// Coerce to well-formed UTF-8.
    		default:
    			rr, size := utf8.DecodeRune(s[r:])
    			r += size
    			w += utf8.EncodeRune(b[w:], rr)
    		}
    	}
    	return b[0:w], true
    }