json.um
fn parse
Parses the JSON from the inp
string. On success, returns either a
[]any
or map[str]any
. On error, returns []Error
.
fn parse*(inp: str): any {
import (
"std.um" // change me for your std.um location
)
const (
tok_opener = 1
tok_closer
tok_colon
tok_str
tok_int
tok_real
tok_const
tok_separator
tok_lopener
tok_lcloser
)
type Error* = struct {
message: str
lno: int
}
type lexer = struct {
inp: str
len: int
pos: int
lineno: int
errors: []Error
}
type token = struct {
t: int
pos: int
value: str
}
fn (l: ^lexer) get(): char {
if l.pos >= l.len { return '\0' }
l.pos++
return l.inp[l.pos-1]
}
fn (l: ^lexer) lex_str(): str {
const STEP = 64
out := make([]char, STEP)
buflen := STEP
start := l.pos
for true {
c := l.inp[l.pos]
l.pos++
if l.pos > 1 && c == '"' && l.inp[l.pos-2] != '\\' {
break
}
if l.pos-start >= buflen {
out = append(out, make([]char, STEP * 4))
buflen += STEP * 4
}
out[l.pos-start-1] = c
}
return out
}
fn is_num(inp: char): bool {
return ((inp >= '0' && inp <= '9') || inp == '.')
}
fn (l: ^lexer) lex_num(): (str, bool) {
out := ""
is_real := false
for true {
c := l.get()
if c == '.' { is_real = true }
if !is_num(c) { break }
out += c
}
return out, is_real
}
fn (l: ^lexer) lex_space(): str {
out := ""
l.pos--
c := l.get()
for c != ' ' && c != '}' && c != ']' && c != '\n' && c != '\r' && c != ',' && c != '\t' {
out += c
c = l.get()
}
l.pos--
return out
}
fn (l: ^lexer) lex_next(): (token, bool) {
for l.pos < l.len && (l.inp[l.pos] == ' ' || l.inp[l.pos] == '\n' || l.inp[l.pos] == '\r' || l.inp[l.pos] == '\t') {
if l.inp[l.pos] == '\n' {
l.lineno++
}
l.pos++
}
switch l.get() {
case '{':
return token{tok_opener, l.pos, "{"}, true
case '}':
return token{tok_closer, l.pos, "}"}, true
case '[':
return token{tok_lopener, l.pos, "["}, true
case ']':
return token{tok_lcloser, l.pos, "]"}, true
case '"': // are ' or ` strings allowed?
return token{tok_str, l.pos, l.lex_str()}, true
case '\0':
return token{}, false
case ':':
return token{tok_colon, l.pos, str(l.inp[l.pos-1])}, true
case ',':
return token{tok_separator, l.pos, str(l.inp[l.pos-1])}, true
default:
if l.inp[l.pos-1] == '-' || is_num(l.inp[l.pos-1]) {
first := l.inp[l.pos-1]
val, is_real := l.lex_num()
t := tok_int
if is_real { t = tok_real }
l.pos--
return token{t, l.pos, first + val}, true
}
val := l.lex_space()
return token{tok_const, l.pos, val}, true
}
return token{}, false
}
fn (l: ^lexer) parser_error(msg: str) {
l.errors = append(l.errors, Error{msg, l.lineno})
}
fn (l: ^lexer) parse_object(): map[str]any
fn (l: ^lexer) parse_array(): []any
fn (l: ^lexer) parse_val(t: token): any {
switch (t.t) {
case tok_str:
return t.value
case tok_int:
return std::atoi(t.value)
case tok_real:
return std::atof(t.value)
case tok_opener:
return l.parse_object()
case tok_lopener:
return l.parse_array()
case tok_const:
if t.value == "true" {
return true
} else if t.value == "false" {
return false
} else if t.value == "null" {
return null
} else {
l.parser_error("unknown constant")
}
default:
l.parser_error("unsupported json feature")
}
return null
}
fn (l: ^lexer) parse_object(): map[str]any {
var key: str
var val: any
var out: map[str]any
// this looks horrible
t, stay := l.lex_next()
for stay && t.t != tok_closer {
if t.t == tok_str {
next, stay := l.lex_next()
if next.t != tok_colon {
l.parser_error("missing colon")
break
}
key = t.value
next, stay = l.lex_next()
val = l.parse_val(next)
next, stay = l.lex_next()
if stay && next.t != tok_separator && next.t != tok_closer {
l.parser_error("missing comma")
}
out[key] = val
if next.t == tok_closer {
break
}
}
t, stay = l.lex_next()
}
return out
}
fn (l: ^lexer) parse_array(): []any {
out := []any{}
stay := true
t := token{}
for stay && t.t != tok_lcloser {
t, stay = l.lex_next()
if t.t == tok_lcloser {
break
}
out = append(out, l.parse_val(t))
t, stay = l.lex_next()
if stay && (t.t != tok_separator && t.t != tok_lcloser) {
l.parser_error("array elements are not separated correctly")
break
}
}
return out
}
//~~fn parse
// Parses the JSON from the `inp` string. On success, returns either a
// `[]any` or `map[str]any`. On error, returns `[]Error`.
fn parse*(inp: str): any {
//~~
l := lexer{inp, len(inp), 0, 1, {}}
t := l.lex_next().item0
var out: any
switch (t.t) {
case tok_opener:
out = l.parse_object()
case tok_lopener:
out = l.parse_array()
default:
l.parser_error("top level type can only be an object or an array")
}
if len(l.errors) > 0 {
return l.errors
}
return out
}
fn main() {
printf("%v\n", parse("{ \"a\": 1.2, \"b\": 34 }"))
printf("%v\n", parse("{ \"a\": 1.2 \"b\": 34 }"))
}