Author: Adam <git@apiote.xyz>
structs, arrays, slices, and maps of string
array.go | 76 ++++++++++ const.go | 59 ++++++++ errors.go | 95 +++++++++++++ example.drt | 27 +++ go.mod | 3 main.go | 72 ++++++++++ main_test.go | 386 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ number.go | 180 +++++++++++++++++++++++++ string.go | 123 +++++++++++++++++ struct.go | 177 ++++++++++++++++++++++++ tokeniser.go | 270 +++++++++++++++++++++++++++++++++++++
diff --git a/array.go b/array.go new file mode 100644 index 0000000000000000000000000000000000000000..76f9f224ab6a0fbb7e850096cca5d28815a7e872 --- /dev/null +++ b/array.go @@ -0,0 +1,76 @@ +package dirty + +import ( + "bufio" +) + +type Array []Element + +func (Array) isElement() {} +func (Array) getType() ElementType { + return ElemArray +} + +type Element interface { + isElement() + getType() ElementType +} + +type ElementType int + +const ( + ElemArray ElementType = iota + ElemString + ElemConst + ElemInt + ElemFloat +) + +func loadArray(r *bufio.Reader) ([]Element, error) { + topArray := []Element{} + for { + t, err := nextToken(r) + if err != nil { + return []Element{}, err + } + //debugf("in LoadArray got %+v\n", t) + + switch t.ttype { + case LBRACKET: + //debugf("in LoadArray loading array\n") + array, err := loadArray(r) + if err != nil { + return []Element{}, err + } + topArray = append(topArray, Array(array)) + case RBRACKET: + //debugf("in LoadArray closing\n") + return topArray, nil + // todo atoms + case COMMENT: + continue + case STRING: + //debugf("in LoadArray adding string\n") + topArray = append(topArray, String(t.t)) + case STRING_RAW: + //debugf("in LoadArray adding raw string\n") + topArray = append(topArray, String(t.t)) + case CONST: + //debugf("in LoadArray adding const\n") + topArray = append(topArray, NewConst(t.t)) + case NUMBER: + //debugf("in LoadArray adding number %+v, %d\n", t, *t.i) + if t.i != nil { + topArray = append(topArray, Int(*t.i)) + } + case FLOAT: + //debugf("in LoadArray adding float %+v, %f\n", t, *t.f) + if t.f != nil { + topArray = append(topArray, Float(*t.f)) + } + default: + //debugln("loadArray") + return []Element{}, NewSyntaxError(t, []token{}) + } + } +} diff --git a/const.go b/const.go new file mode 100644 index 0000000000000000000000000000000000000000..1712b7fc43a3e70830b957080846c8b5cd41fbc9 --- /dev/null +++ b/const.go @@ -0,0 +1,59 @@ +package dirty + +import ( + "fmt" +) + +type Const int + +const ( + TRUE Const = iota + FALSE + NULL +) + +func NewConst(s string) Const { + if s == "true" { + return TRUE + } + if s == "false" { + return FALSE + } + if s == "null" { + return NULL + } + panic("invalid const " + s) +} + +func (Const) isElement() {} +func (Const) getType() ElementType { + return ElemConst +} +func (c Const) String() string { + if c == TRUE { + return "true" + } + if c == FALSE { + return "false" + } + if c == NULL { + return "null" + } + panic(fmt.Sprintf("invalid const %d", c)) +} +func (c Const) Bool() bool { + if c == TRUE { + return true + } else if c == FALSE { + return false + } else { + panic("Const is not bool") + } +} + +func parseConst(t token) (token, error) { + if t.t != "true" && t.t != "false" && t.t != "null" { + return token{}, NewConstError(t.t) + } + return t, nil +} diff --git a/errors.go b/errors.go new file mode 100644 index 0000000000000000000000000000000000000000..fc568d94db43b1ceb7317b827594eb0ae602bd1a --- /dev/null +++ b/errors.go @@ -0,0 +1,95 @@ +package dirty + +import ( + "fmt" +) + +type SyntaxError struct { + found token + expected []token +} + +func NewSyntaxError(found token, expected []token) SyntaxError { + return SyntaxError{found: found, expected: expected} +} +func (se SyntaxError) Error() string { + return fmt.Sprintf("expected %v; got %v\n", se.expected, se.found) +} + +type UnterminatedError struct { + ttype string + t string +} + +func NewUnterminatedError(ttype string, t string) UnterminatedError { + return UnterminatedError{ttype: ttype, t: t} +} +func (e UnterminatedError) Error() string { + return fmt.Sprintf("unterminated %s ‘%s’\n", e.ttype, e.t) +} + +type InvalidCharError struct { + r rune +} + +func NewInvalidCharError(r rune) InvalidCharError { + return InvalidCharError{r: r} +} +func (e InvalidCharError) Error() string { + return fmt.Sprintf("invalid character ‘%d’\n", e.r) +} + +type CommaError struct { + s string +} + +func NewCommaError(s string) CommaError { + return CommaError{s: s} +} +func (e CommaError) Error() string { + return fmt.Sprintf("comma in wrong place in ‘%s’\n", e.s) +} + +type InvalidCodepointError struct { + c string +} + +func NewInvalidCodepointError(c string) InvalidCodepointError { + return InvalidCodepointError{c: c} +} +func (e InvalidCodepointError) Error() string { + return fmt.Sprintf("invalid codepoint ‘%s’\n", e.c) +} + +type ConstError struct { + t string +} + +func NewConstError(t string) ConstError { + return ConstError{t: t} +} +func (e ConstError) Error() string { + return fmt.Sprintf("malformed const ‘%s’\n", e.t) +} + +type EscapeError struct { + char rune +} + +func NewEscapeError(char rune) EscapeError { + return EscapeError{char: char} +} +func (e EscapeError) Error() string { + return fmt.Sprintf("invalid escape sequence \\%v\n", e.char) +} + +type RawStringError struct { + s string +} + +func NewRawStringError(s string) RawStringError { + return RawStringError{s: s} +} +func (e RawStringError) Error() string { + return e.s +} diff --git a/example.drt b/example.drt new file mode 100644 index 0000000000000000000000000000000000000000..b91ea27c2ce4c6b707d552842542a756cf649cde --- /dev/null +++ b/example.drt @@ -0,0 +1,27 @@ +( + ('map' # array of pairs + ( + ('π' 3.14) + ('e' 2.73) + ('h' 10,000·) # dozenal = 12^4 in decimal + ('one' 1) # decimal integer + ('positivity' true) + ('negativity' false) + ('i don’t know' null) + ('256' 0xff) + ('7' 0b111) + (7 'is not the same') + #(0b111 'but this would be') + ('execute' 0o755) + ('ice point' 16↋7·6) + ('dozenal digits' (1· 2· 3· 4· 5· 6· 7· 8· 9· ↊· ↋·)) + ('dozenal negative' -10·1) + ('escaped' 'i won\'t say but i will \tit\nand this will be in new line') + ('scientific' (7e27 7e-27)) + ('cold' -10) + ('imagine' (10+8i 8-10i ↊·+8·i 8·-↊·i)) + ) + ) + ('array' (1 2 3 'some text')) + ('mapception' (('one' 1) ('two' 2))) +) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000000000000000000000000000000000000..749b4f08537b66b335b13c24c1854a20a3dbe207 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.apiote.xyz/me/dirty + +go 1.16 diff --git a/main.go b/main.go new file mode 100644 index 0000000000000000000000000000000000000000..13a26554b376c2c710bc95c9541ab8dc3000c18a --- /dev/null +++ b/main.go @@ -0,0 +1,72 @@ +package dirty + +import ( + "bufio" + "fmt" + "io" + "reflect" +) + +const DEBUG bool = true // build -X + +func debugf(format string, a ...interface{}) { + if DEBUG { + fmt.Printf(format, a...) + } +} +func debugln(a ...interface{}) { + if DEBUG { + fmt.Println(a...) + } +} + +// todo func LoadArray() +func Load(r io.Reader) (Array, error) { + scanner := bufio.NewReader(r) + array := []Element{} + comment := token{ttype: COMMENT} + lbrack := token{ttype: LBRACKET} + eof := token{ttype: EOF} + expected := lbrack + for { + t, err := nextToken(scanner) + //debugf("in Load got %+v\n", t) + if err != nil { + return []Element{}, err + } + if t == comment { + continue + } else if t == lbrack { + if expected != lbrack { + //debugln("expected lbrac") + return []Element{}, NewSyntaxError(t, []token{expected}) + } + //debugf("in Load loading array\n") + array, err = loadArray(scanner) + if err != nil { + return Array{}, err + } + expected = eof + } else if t == eof { + if expected != eof { + //debugln("expected eof") + return []Element{}, NewSyntaxError(t, []token{expected}) + } + //debugf("in Load eofing\n") + return array, nil + } else { + //debugln("garbage") + return []Element{}, NewSyntaxError(t, []token{expected}) + } + } +} + +// todo func Load() +func LoadStruct(r io.Reader, s interface{}) error { + array, err := Load(r) + if err != nil { + return err + } + v := reflect.ValueOf(s).Elem() + return convertStruct(array, v) +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000000000000000000000000000000000000..684ff94a74e3e024930d77cce40328c0b2c2b398 --- /dev/null +++ b/main_test.go @@ -0,0 +1,386 @@ +package dirty + +import ( + "strings" + "testing" + + "fmt" +) + +func TestLoadEmptyArray(t *testing.T) { + r := strings.NewReader("()") + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if len(arr) != 0 { + t.Fatalf("expected empty array, got %+v\n", arr) + } +} + +func TestLoadEmptyArrayInArray(t *testing.T) { + r := strings.NewReader("(())") + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && len(arr[0].(Array)) == 0) { + t.Fatalf("expected empty array in array, got %+v\n", arr) + } +} + +func TestLoadEmptyArrayWithComment(t *testing.T) { + r := strings.NewReader(` + ( + # this is a comment + ) + `) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if len(arr) != 0 { + t.Fatalf("expected empty array, got %+v\n", arr) + } +} + +func TestLoadArrayWithString(t *testing.T) { + r := strings.NewReader(`('this is a string')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "this is a string") { + t.Fatalf("expected [‘this is a string’], got %+v\n", arr) + } +} + +func TestEscapeTab(t *testing.T) { + r := strings.NewReader(`('tab\t')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "tab\t") { + t.Fatalf("expected [‘tab\t’], got %+v\n", arr) + } +} + +func TestEscapeNewline(t *testing.T) { + r := strings.NewReader(`('nl\n')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "nl\n") { + t.Fatalf("expected [‘nl\n’], got %+v\n", arr) + } +} + +func TestEscapeBackslash(t *testing.T) { + r := strings.NewReader(`('bs\\')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "bs\\") { + t.Fatalf("expected [‘bs\\’], got %+v\n", arr) + } +} + +func TestEscapeCarret(t *testing.T) { + r := strings.NewReader(`('\rcarret')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "\rcarret") { + t.Fatalf("expected [‘\rcarret’], got %+v\n", arr) + } +} + +func TestEscapeApos(t *testing.T) { + r := strings.NewReader(`('\'')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "'") { + t.Fatalf("expected [‘'’], got %+v\n", arr) + } +} + +func TestTrue(t *testing.T) { + r := strings.NewReader(`(true)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Const) == TRUE) { + t.Fatalf("expected [true], got %+v\n", arr) + } +} + +func TestFalse(t *testing.T) { + r := strings.NewReader(`(false)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Const) == FALSE) { + t.Fatalf("expected [false], got %+v\n", arr) + } +} + +func TestNull(t *testing.T) { + r := strings.NewReader(`(null)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Const) == NULL) { + t.Fatalf("expected [null], got %+v\n", arr) + } +} + +func TestU1B(t *testing.T) { + r := strings.NewReader(`('\u0040')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "@") { + t.Fatalf("expected [‘@’], got %+v\n", arr) + } +} + +func TestU2B(t *testing.T) { + r := strings.NewReader(`('\u00E5')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "å") { + t.Fatalf("expected [‘å’], got %+v\n", arr) + } +} + +func TestU3B(t *testing.T) { + r := strings.NewReader(`('\u2042')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "⁂") { + t.Fatalf("expected [‘⁂’], got %+v\n", arr) + } +} + +func TestU4B(t *testing.T) { + r := strings.NewReader(`('\U000200D1')`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(String) == "𠃑") { + t.Fatalf("expected [‘𠃑’], got %+v\n", arr) + } +} + +func Test0b(t *testing.T) { + r := strings.NewReader(`(0b01)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Int) == 1) { + t.Fatalf("expected [1], got %+v\n", arr) + } +} + +func Test0bCommaMiddle(t *testing.T) { + r := strings.NewReader(`(0b01,00000000)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Int) == 0b100000000) { + t.Fatalf("expected [256], got %+v\n", arr) + } +} + +func Test0bCommaBegin(t *testing.T) { + r := strings.NewReader(`(0b,00000000)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Int) == 0b100000000) { + t.Fatalf("expected [256], got %+v\n", arr) + } +} + +func Test0bCommas(t *testing.T) { + r := strings.NewReader(`(0b01,,00000000)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Int) == 0b10000000000000000) { + t.Fatalf("expected [65536], got %+v\n", arr) + } +} + +func Test0o(t *testing.T) { + r := strings.NewReader(`(0o77)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Int) == 0o77) { + t.Fatalf("expected [63], got %+v\n", arr) + } +} + +func Test0x(t *testing.T) { + r := strings.NewReader(`(0xff)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Int) == 0xff) { + t.Fatalf("expected [255], got %+v\n", arr) + } +} + +func Test0(t *testing.T) { + r := strings.NewReader(`(0)`) + + arr, err := Load(r) + if err != nil { + t.Fatalf("got error: %v\n", err) + } + if !(len(arr) == 1 && arr[0].(Int) == 0) { + t.Fatalf("expected [0], got %+v\n", arr) + } +} + +// todo test numbers +func TestShowMap(t *testing.T) { + r := strings.NewReader(`(1 10 10. 10.5 ↊· ↊·6 1e2 -1 -10 -10.5 -↊· -↊·6 -1e2 1e-2 -1e-2)`) + + arr, err := Load(r) + fmt.Println(arr) + if err != nil { + t.Fatalf("got error: %v\n", err) + } +} + +// todo test raw string +func TestShowRawString(t *testing.T) { + r := strings.NewReader("(`\n" + + " a\n" + + " `sql`\n" + + "`)") + + arr, err := Load(r) + fmt.Println(arr) + if err != nil { + t.Fatalf("got error: %v\n", err) + } +} + +func TestShowLoadStruct(t *testing.T) { + /*a := "" + b := 5*/ + r := strings.NewReader(`( + ('a' 'sss') + ('b' 42) + ('c' 3.14) + ('d' null) + ('e' true) + ('f' false) + ('g' 5) + )`) + s := &struct { + A string + B int + C float32 + D *int + E bool + F bool + G *int + }{} + LoadStruct(r, s) + fmt.Println(*s.G) + + r = strings.NewReader(`( + ('a' ( + ('b' true) + )) + )`) + LoadStruct(r, &struct { + A struct { + B bool + C bool + } + }{}) + r = strings.NewReader(`( + ('a' 'sss') + ('b' 42) + ('c' 3.14) + )`) + LoadStruct(r, &struct { + z string + }{}) + /*LoadStruct(nil, &[]int{}) + LoadStruct(nil, &map[string]int{}) + LoadStruct(nil, &a) + LoadStruct(nil, &b)*/ +} + +func TestShowLoadMap(t *testing.T) { + r := strings.NewReader(`( + ('a' 11) + ('b' 42) + ('c' 3) + )`) + m := map[string]int64{} + LoadStruct(r, &m) +} + +func TestShowLoadArray(t *testing.T) { + r := strings.NewReader(`(11 22 31)`) + s := [3]int64{} + LoadStruct(r, &s) +} + +func TestShowLoadSlice(t *testing.T) { + r := strings.NewReader(`(11 22 31)`) + s := []int64{} + LoadStruct(r, &s) +} +//todo escapeError, unterminatedError, invalidCharError, invalidCodepointError, constError, commaError +//todo syntax errors diff --git a/number.go b/number.go new file mode 100644 index 0000000000000000000000000000000000000000..0ebd92eccba900079e8821bb9b6de6c46214c718 --- /dev/null +++ b/number.go @@ -0,0 +1,180 @@ +package dirty + +import ( + "math" + "strconv" + "strings" +) + +type Int int + +func (Int) isElement() {} +func (Int) getType() ElementType { + return ElemInt +} +func (i Int) String() string { + return strconv.FormatInt(int64(i), 10) +} + +type Float float64 + +func (Float) isElement() {} +func (Float) getType() ElementType { + return ElemFloat +} +func (i Float) String() string { + return strconv.FormatFloat(float64(i), 'f', -1, 64) +} + +func convertDozenal(d string) string { + result := "" + for _, c := range d { + if c == '↊' { + result += "a" + } else if c == '↋' { + result += "b" + } else if c == '·' { + result += "." + } else { + result += string(c) + } + } + return result +} + +func parseNumber(t token) (token, error) { + l := len(t.t) + if (in('.', []rune(t.t)) && t.t[l-1] != '.') || (in('·', []rune(t.t)) && t.t[l-1] != '·') { + return parseFloat(t) + } else if in('e', []rune(t.t)) && t.t[0] != '0' { + return parseEngFloat(t) + } else { + return parseInt(t) + } +} + +func parseFloat(t token) (token, error) { + t.ttype = FLOAT + if in('·', []rune(t.t)) { + number := strings.SplitN(convertDozenal(t.t), ".", 2) + whole, err := strconv.ParseInt(number[0], 12, 64) + if err != nil { + return token{}, err + } + fraction, err := strconv.ParseUint(number[1], 12, 64) + if err != nil { + return token{}, err + } + + t.t = "" + w := float64(whole) + if fraction != 0 { + var sign float64 + if w < 0 { + sign = -1.0 + } else { + sign = 1.0 + } + l := math.Floor(math.Log10((float64(fraction))/math.Log10(12)) + 1) + w += (float64(fraction) / math.Pow(12, l)) * sign + } + t.f = &w + return t, nil + } else { + f, err := strconv.ParseFloat(t.t, 64) + t.t = "" + t.f = &f + return t, err + } +} + +func parseEngFloat(t token) (token, error) { + t.ttype = FLOAT + number := strings.SplitN(t.t, "e", 2) + a, err := strconv.ParseFloat(number[0], 64) + if err != nil { + return token{}, err + } + b, err := strconv.ParseInt(number[1], 10, 64) + if err != nil { + return token{}, err + } + num := a * math.Pow10(int(b)) + t.t = "" + t.f = &num + return t, nil +} + +func parseComma(num string, z int) (string, error) { + num2 := "" + prevIsComma := false + lastComma := -1 + zeroes := "" + for i := 0; i < z; i++ { + zeroes += "0" + } + for i, d := range num { + if d == ',' { + if prevIsComma { + num2 += zeroes + } else if !((i-lastComma) == z+1 || lastComma == -1) { + return "", NewCommaError(num) + } + lastComma = i + } else { + num2 += string(d) + } + prevIsComma = d == ',' + } + return num2, nil +} + +func parseInt(t token) (token, error) { + num2 := t.t + base := 10 + + if t.t[0] == '0' && t.t[1] == 'b' { + num := t.t[2:] + if num[0] == ',' { + num = "1" + num + } + + var err error = nil + num2, err = parseComma(num, 8) + if err != nil { + return token{}, err + } + + num2 = "0b" + num2 + base = 0 + } + if t.t[0] == '0' && t.t[1] == 'o' { + num2 = t.t + base = 0 + } + if t.t[0] == '0' && t.t[1] == 'x' { + num2 = t.t + base = 0 + } + if in('·', []rune(t.t)) { + num, err := parseComma(t.t, 3) // 4? + if err != nil { + return token{}, err + } + num2 = convertDozenal(num) + num2 = num2[:len(num2)-1] + base = 12 + } + if in('.', []rune(t.t)) { + num2 = t.t[:len(t.t)-1] + var err error = nil + parseComma(num2, 3) + if err != nil { + return token{}, err + } + base = 10 + } + result, err := strconv.ParseInt(num2, base, 64) + t.i = &result + return t, err +} diff --git a/string.go b/string.go new file mode 100644 index 0000000000000000000000000000000000000000..f951eb0e8f8341be44abd531ac5a82fd64d25131 --- /dev/null +++ b/string.go @@ -0,0 +1,123 @@ +package dirty + +import ( + "strconv" + "unicode/utf8" +) + +type String string + +func (String) isElement() {} +func (String) getType() ElementType { + return ElemString +} +func (s String) String() string { + return "‘" + string(s) + "’" +} + +func parseString(t token) (token, error) { + result := "" + ucode := "" + mode := 0 + for _, r := range t.t { + if mode == 0 { + if r < 0x20 || r == 0x7f || (r >= 0x80 && r <= 0x9f) { + return token{}, NewInvalidCharError(rune(r)) + } + if r == '\\' { + mode = '\\' + continue + } + result += string(rune(r)) + } else if mode == '\\' { + switch r { + case 'n': + result += "\n" + mode = 0 + case '\'': + result += "'" + mode = 0 + case 'r': + result += "\r" + mode = 0 + case 't': + result += "\t" + mode = 0 + case '\\': + result += "\\" + mode = 0 + case 'u': + mode = 'u' + case 'U': + mode = 'U' + default: + return token{}, NewEscapeError(r) + } + } else if mode == 'u' { + ucode += string(rune(r)) + if len(ucode) == 4 { + mode = 0 + char, err := parseUnicode(ucode) + ucode = "" + if err != nil { + return token{}, err + } + result += char + } + } else if mode == 'U' { + ucode += string(rune(r)) + if len(ucode) == 8 { + mode = 0 + char, err := parseUnicode(ucode) + ucode = "" + if err != nil { + return token{}, err + } + result += char + } + } + } + t.t = result + return t, nil +} + +func parseUnicode(ucode string) (string, error) { + var ( + b []byte + r rune + ) + codepoint, err := strconv.ParseInt(ucode, 16, 64) + if err != nil { + return "", err + } + switch { + case codepoint < 0x7f: + b = []byte{byte(codepoint)} + // todo check r, s for error + r, _ = utf8.DecodeRune(b) + case codepoint < 0x7ff: + b = []byte{ + byte((codepoint>>6)&0b00011111 | 0b11000000), + byte(codepoint&0b00111111 | 0b10000000), + } + r, _ = utf8.DecodeRune(b) + case codepoint < 0xffff: + b = []byte{ + byte((codepoint>>12)&0b00001111 | 0b11100000), + byte((codepoint>>6)&0b00111111 | 0b10000000), + byte(codepoint&0b00111111 | 0b10000000), + } + r, _ = utf8.DecodeRune(b) + case codepoint < 0x1fffff: + b = []byte{ + byte((codepoint>>18)&0b00000111 | 0b11110000), + byte((codepoint>>12)&0b00111111 | 0b10000000), + byte((codepoint>>6)&0b00111111 | 0b10000000), + byte(codepoint&0b00111111 | 0b10000000), + } + r, _ = utf8.DecodeRune(b) + default: + return "", InvalidCodepointError{ucode} + } + return string(r), nil +} diff --git a/struct.go b/struct.go new file mode 100644 index 0000000000000000000000000000000000000000..4370ce0b933c85ae94a927bb02f0920ff0ddf112 --- /dev/null +++ b/struct.go @@ -0,0 +1,177 @@ +package dirty + +import ( + "fmt" + "reflect" + "unicode" +) + +func getStructPair(e Element) Array { + if e.getType() != ElemArray && len(e.(Array)) != 2 { + // todo error: not a pair + fmt.Println("not a pair") + return nil + } + return e.(Array) +} + +func getStructFieldName(pair Array) string { + k := reflect.TypeOf(pair[0]).Kind() + if k != reflect.String { + // todo error: name not string + fmt.Println("name not string") + return "" + } + fieldName := string(pair[0].(String)) + runes := []rune(fieldName) + runes[0] = unicode.ToUpper(runes[0]) + fieldName = string(runes) + return fieldName +} + +func getStructMapKey(pair Array, mk reflect.Kind) reflect.Value { + sk := reflect.ValueOf(pair[0]).Kind() + if sk != mk { + fmt.Printf("pair[0] (%v) not map key (%v)\n", sk, mk) + return reflect.ValueOf(nil) + } + if sk == reflect.String { + return reflect.ValueOf(string(pair[0].(String))) + } + // fixme + if sk == reflect.Int64 { + return reflect.ValueOf(int(pair[0].(Int))) + } + if sk == reflect.Bool { + return reflect.ValueOf(pair[0].(Const).Bool()) + } + if sk == reflect.Float64 { + return reflect.ValueOf(float64(pair[0].(Float))) + } + return reflect.ValueOf(nil) +} + +func setStructValue(value Element, f reflect.Value) error { + switch value.getType() { + case ElemArray: + err := convertStruct(value.(Array), f) + if err != nil { + return err + } + case ElemConst: + if value == NULL && f.Kind() == reflect.Ptr { + // do nothing; default value is nil + } else if (value == TRUE || value == FALSE) && f.Kind() == reflect.Bool { + f.SetBool(value.(Const).Bool()) + } else { + // todo error: type mismatch + fmt.Println("type mismatch") + return nil + } + case ElemFloat: + if f.Kind() == reflect.Float64 || f.Kind() == reflect.Float32 { + f.SetFloat(float64(value.(Float))) + } else { + // todo error: type mismatch + fmt.Println("type mismatch") + return nil + } + case ElemInt: + if f.Kind() == reflect.Int || f.Kind() == reflect.Int8 || f.Kind() == reflect.Int16 || f.Kind() == reflect.Int32 || f.Kind() == reflect.Int64 { + f.SetInt(int64(value.(Int))) + } else { + // todo error: type mismatch + fmt.Println("type mismatch") + return nil + } + case ElemString: + if f.Kind() == reflect.String { + f.SetString(string(value.(String))) + } else { + // todo error: type mismatch + fmt.Println("type mismatch") + return nil + } + default: + // todo error: unknown type + fmt.Println("unknown type") + return nil + } + return nil +} + +func convertStruct(array Array, s reflect.Value) error { + kind := s.Kind() + if kind == reflect.Struct { + for _, e := range array { + pair := getStructPair(e) + fieldName := getStructFieldName(pair) + + f := s.FieldByName(fieldName) + if !f.IsValid() { + // todo error no such field + fmt.Println("no such field", fieldName) + return nil + } + if f.Kind() == reflect.Ptr && pair[1] != NULL { + f.Set(reflect.New(f.Type().Elem())) + f = f.Elem() + } + setStructValue(pair[1], f) + } + fmt.Printf("%+v\n", s) + return nil + } + if kind == reflect.Slice { + elemType := s.Type().Elem() + if s.Len() != 0 { + // todo error slice not empty + fmt.Println("slice len != 0") + return nil + } + capacity := s.Cap() + s.SetLen(capacity) + s2 := s + for i, e := range array { + if i < capacity { + f := s2.Index(i) + setStructValue(e, f) + } else { + f := reflect.New(elemType).Elem() + setStructValue(e, f) + s2 = reflect.Append(s2, f) + } + } + s.Set(s2) + fmt.Printf("%+v\n", s) + return nil + } + if kind == reflect.Array { + if s.Len() != len(array) { + // todo error array len not Array len + fmt.Println("array len not length of Array") + return nil + } + for i, e := range array { + f := s.Index(i) + setStructValue(e, f) + } + fmt.Printf("%+v\n", s) + return nil + } + if kind == reflect.Map { + keyKind := s.Type().Key().Kind() + valueType := s.Type().Elem() + for _, e := range array { + pair := getStructPair(e) + k := getStructMapKey(pair, keyKind) + v := reflect.New(valueType).Elem() + setStructValue(pair[1], v) + s.SetMapIndex(k, v) + } + fmt.Printf("%+v\n", s) + return nil + } + fmt.Println(kind) + return nil +} diff --git a/tokeniser.go b/tokeniser.go new file mode 100644 index 0000000000000000000000000000000000000000..30b8c54627e720b2c99febbc9eeb96623d89a913 --- /dev/null +++ b/tokeniser.go @@ -0,0 +1,270 @@ +package dirty + +import ( + "bufio" + "fmt" + "io" +) + +// todo use reader.Peek() +var skipped rune = 0 + +type tokenType int + +const ( + UNKNOWN tokenType = iota + LBRACKET + RBRACKET + STRING + STRING_RAW + NUMBER + FLOAT + CONST + COMMENT + EOF +) + +type token struct { + ttype tokenType + t string + i *int64 + f *float64 +} + +func nextToken(reader *bufio.Reader) (token, error) { + t, finished, e := nextToken_initial(reader) + if finished || e != nil { + return t, e + } + t, e = nextToken_rest(reader, t) + return t, e +} +func nextToken_initial(reader *bufio.Reader) (token, bool, error) { + var ( + r rune = 0 + err error = nil + t token + ) + +initialTokenLoop: + for { + if skipped != 0 { + r = skipped + skipped = 0 + } else { + r, _, err = reader.ReadRune() + } + //debugf("%c\n", r) + if err != nil { + if err == io.EOF { + t := token{ttype: EOF} + return t, true, nil + } + return token{}, true, fmt.Errorf("while reading: %w", err) + } + switch { + case r == '(': + return token{ttype: LBRACKET}, true, nil + case r == ')': + return token{ttype: RBRACKET}, true, nil + case r == '#': + t = token{ttype: COMMENT} + break initialTokenLoop + case r == '`': + t = token{ttype: STRING_RAW} + break initialTokenLoop + case r == '\'': + t = token{ttype: STRING} + break initialTokenLoop + case r == 't': + t = token{ttype: CONST, t: "t"} + break initialTokenLoop + case r == 'f': + t = token{ttype: CONST, t: "f"} + break initialTokenLoop + case r == 'n': + t = token{ttype: CONST, t: "n"} + break initialTokenLoop + case in(r, []rune{'1', '2', '3', '4', '5', '6', '7', '8', '9', '↊', '↋', '-', '.', '·', ','}): + t = token{ttype: NUMBER, t: string(r)} + break initialTokenLoop + case r == '0': + r, _, err = reader.ReadRune() + //debugf("%c\n", r) + if err != nil { + if err == io.EOF { + t := token{ttype: EOF} + return t, true, nil + } + return token{}, true, fmt.Errorf("while reading: %w", err) + } + switch r { + case 'b': + t = token{ttype: NUMBER, t: "0b"} + break initialTokenLoop + case 'o': + t = token{ttype: NUMBER, t: "0o"} + break initialTokenLoop + case 'x': + t = token{ttype: NUMBER, t: "0x"} + break initialTokenLoop + default: + skipped = r + var zero int64 = 0 + return token{ttype: NUMBER, i: &zero}, true, nil + } + case in(r, []rune{' ', '\t', '\n', '\r'}): + continue + default: + return token{}, true, nil + } + } + return t, false, err +} + +func nextToken_rest(reader *bufio.Reader, t token) (token, error) { + var ( + r rune + err error = nil + escaping bool = false + stringRawIndent string = "" + stringRawIndentSkip string = "" + stringRawState int = 0 // todo enum + ) + +tokenLoop: + for { + if skipped != 0 { + r = skipped + skipped = 0 + } else { + r, _, err = reader.ReadRune() + } + //debugf("%c\n", r) + // todo line, column + if err != nil { + if err == io.EOF { + if t.ttype == STRING || t.ttype == STRING_RAW { + return token{}, NewUnterminatedError("string", t.t) + } else { + return token{ttype: EOF}, nil + } + } + return token{}, fmt.Errorf("while reading: %w", err) + } + + switch t.ttype { + case COMMENT: + if r != '\n' { + t.t += string(r) + } else { + break tokenLoop + } + case STRING: + if !escaping && r == '\'' { + t, err = parseString(t) + if err != nil { + return token{}, err + } + break tokenLoop + } else if r == '\n' { + return token{}, NewUnterminatedError("string", t.t) + } else { + t.t += string(r) + } + if escaping { + escaping = false + } else if r == '\\' { + escaping = true + } + case STRING_RAW: + if stringRawState == 0 { + if r != '\n' { + return token{}, NewRawStringError("missing new line after opening `") + } else { + stringRawState = 1 + continue + } + } + if stringRawState == 1 { + if r == ' ' || r == '\t' { + stringRawIndent += string(r) + } else { + stringRawState = 2 + t.t += string(r) + } + continue + } + if stringRawState == 2 { + // fixme assumes lines ending with \n; get to end of line + if r == '\n' { + stringRawState = 3 + stringRawIndentSkip = "" + } + t.t += string(r) + continue + } + if stringRawState == 3 { + if len(stringRawIndentSkip) == 0 && r == '`' { + break tokenLoop + } + if len(stringRawIndentSkip) < len(stringRawIndent) { + stringRawIndentSkip += string(r) + } else { + if stringRawIndent != stringRawIndentSkip { + // todo convert whitespace to escape codes + return token{}, NewRawStringError("Indent ‘" + stringRawIndent + "’ does not begin with ‘" + stringRawIndentSkip + "’") + } + skipped = r + stringRawState = 2 + } + } + case CONST: + if t.t[0] == 't' && in(r, []rune{'r', 'u', 'e'}) && len(t.t) < 4 { + t.t += string(r) + continue + } + if t.t[0] == 'f' && in(r, []rune{'a', 'l', 's', 'e'}) && len(t.t) < 5 { + t.t += string(r) + continue + } + if in(r, []rune{'u', 'l'}) && len(t.t) < 4 { + t.t += string(r) + continue + } + skipped = r + t, err = parseConst(t) + break tokenLoop + case NUMBER: + if t.t[0] == '0' && t.t[1] == 'b' && in(r, []rune{'0', '1', ','}) { + t.t += string(r) + continue + } + if t.t[0] == '0' && t.t[1] == 'o' && in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7'}) { + t.t += string(r) + continue + } + if t.t[0] == '0' && t.t[1] == 'x' && in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F'}) { + t.t += string(r) + continue + } + if in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '↊', '↋', ',', '.', '·', 'e', '-'}) { + t.t += string(r) + continue + } + skipped = r + t, err = parseNumber(t) // todo errors that are not CommaError <- NumberError + break tokenLoop + } + } + return t, err +} + +func in(c rune, expected []rune) bool { + for _, e := range expected { + if c == e { + return true + } + } + return false +}