ref: master
./string.go
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
package dirty import ( "strconv" "unicode/utf8" ) type String string func (String) isElement() {} func (String) getType() ElementType { return ElemString } func (s String) String() string { return "‘" + string(s) + "’" } func parseString(t token) (token, error) { result := "" ucode := "" mode := 0 for _, r := range t.t { if mode == 0 { if r < 0x20 || r == 0x7f || (r >= 0x80 && r <= 0x9f) { return token{}, NewInvalidCharError(rune(r)) } if r == '\\' { mode = '\\' continue } result += string(rune(r)) } else if mode == '\\' { switch r { case 'n': result += "\n" mode = 0 case '\'': result += "'" mode = 0 case 'r': result += "\r" mode = 0 case 't': result += "\t" mode = 0 case '\\': result += "\\" mode = 0 case 'u': mode = 'u' case 'U': mode = 'U' default: return token{}, NewEscapeError(r) } } else if mode == 'u' { ucode += string(rune(r)) if len(ucode) == 4 { mode = 0 char, err := parseUnicode(ucode) ucode = "" if err != nil { return token{}, err } result += char } } else if mode == 'U' { ucode += string(rune(r)) if len(ucode) == 8 { mode = 0 char, err := parseUnicode(ucode) ucode = "" if err != nil { return token{}, err } result += char } } } t.t = result return t, nil } func parseUnicode(ucode string) (string, error) { var ( b []byte r rune ) codepoint, err := strconv.ParseInt(ucode, 16, 64) if err != nil { return "", err } switch { case codepoint < 0x7f: b = []byte{byte(codepoint)} // todo check r, s for error r, _ = utf8.DecodeRune(b) case codepoint < 0x7ff: b = []byte{ byte((codepoint>>6)&0b00011111 | 0b11000000), byte(codepoint&0b00111111 | 0b10000000), } r, _ = utf8.DecodeRune(b) case codepoint < 0xffff: b = []byte{ byte((codepoint>>12)&0b00001111 | 0b11100000), byte((codepoint>>6)&0b00111111 | 0b10000000), byte(codepoint&0b00111111 | 0b10000000), } r, _ = utf8.DecodeRune(b) case codepoint < 0x1fffff: b = []byte{ byte((codepoint>>18)&0b00000111 | 0b11110000), byte((codepoint>>12)&0b00111111 | 0b10000000), byte((codepoint>>6)&0b00111111 | 0b10000000), byte(codepoint&0b00111111 | 0b10000000), } r, _ = utf8.DecodeRune(b) default: return "", InvalidCodepointError{ucode} } return string(r), nil } |