go-dirty.git

ref: master

./string.go


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package dirty

import (
	"strconv"
	"unicode/utf8"
)

type String string

func (String) isElement() {}
func (String) getType() ElementType {
	return ElemString
}
func (s String) String() string {
	return "‘" + string(s) + "’"
}

func parseString(t token) (token, error) {
	result := ""
	ucode := ""
	mode := 0
	for _, r := range t.t {
		if mode == 0 {
			if r < 0x20 || r == 0x7f || (r >= 0x80 && r <= 0x9f) {
				return token{}, NewInvalidCharError(rune(r))
			}
			if r == '\\' {
				mode = '\\'
				continue
			}
			result += string(rune(r))
		} else if mode == '\\' {
			switch r {
			case 'n':
				result += "\n"
				mode = 0
			case '\'':
				result += "'"
				mode = 0
			case 'r':
				result += "\r"
				mode = 0
			case 't':
				result += "\t"
				mode = 0
			case '\\':
				result += "\\"
				mode = 0
			case 'u':
				mode = 'u'
			case 'U':
				mode = 'U'
			default:
				return token{}, NewEscapeError(r)
			}
		} else if mode == 'u' {
			ucode += string(rune(r))
			if len(ucode) == 4 {
				mode = 0
				char, err := parseUnicode(ucode)
				ucode = ""
				if err != nil {
					return token{}, err
				}
				result += char
			}
		} else if mode == 'U' {
			ucode += string(rune(r))
			if len(ucode) == 8 {
				mode = 0
				char, err := parseUnicode(ucode)
				ucode = ""
				if err != nil {
					return token{}, err
				}
				result += char
			}
		}
	}
	t.t = result
	return t, nil
}

func parseUnicode(ucode string) (string, error) {
	var (
		b []byte
		r rune
	)
	codepoint, err := strconv.ParseInt(ucode, 16, 64)
	if err != nil {
		return "", err
	}
	switch {
	case codepoint < 0x7f:
		b = []byte{byte(codepoint)}
		// todo check r, s for error
		r, _ = utf8.DecodeRune(b)
	case codepoint < 0x7ff:
		b = []byte{
			byte((codepoint>>6)&0b00011111 | 0b11000000),
			byte(codepoint&0b00111111 | 0b10000000),
		}
		r, _ = utf8.DecodeRune(b)
	case codepoint < 0xffff:
		b = []byte{
			byte((codepoint>>12)&0b00001111 | 0b11100000),
			byte((codepoint>>6)&0b00111111 | 0b10000000),
			byte(codepoint&0b00111111 | 0b10000000),
		}
		r, _ = utf8.DecodeRune(b)
	case codepoint < 0x1fffff:
		b = []byte{
			byte((codepoint>>18)&0b00000111 | 0b11110000),
			byte((codepoint>>12)&0b00111111 | 0b10000000),
			byte((codepoint>>6)&0b00111111 | 0b10000000),
			byte(codepoint&0b00111111 | 0b10000000),
		}
		r, _ = utf8.DecodeRune(b)
	default:
		return "", InvalidCodepointError{ucode}
	}
	return string(r), nil
}