{ open Token;; open Common;; exception Lex_err of (string * Common.pos);; let fail lexbuf s = let p = lexbuf.Lexing.lex_start_p in let pos = (p.Lexing.pos_fname, p.Lexing.pos_lnum , (p.Lexing.pos_cnum) - (p.Lexing.pos_bol)) in raise (Lex_err (s, pos)) ;; let bump_line p = { p with Lexing.pos_lnum = p.Lexing.pos_lnum + 1; Lexing.pos_bol = p.Lexing.pos_cnum } ;; let keyword_table = Hashtbl.create 100 let _ = List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok) [ ("mod", MOD); ("use", USE); ("meta", META); ("auth", AUTH); ("syntax", SYNTAX); ("if", IF); ("else", ELSE); ("while", WHILE); ("do", DO); ("alt", ALT); ("case", CASE); ("for", FOR); ("each", EACH); ("put", PUT); ("ret", RET); ("be", BE); ("fail", FAIL); ("drop", DROP); ("type", TYPE); ("check", CHECK); ("claim", CLAIM); ("prove", PROVE); ("io", IO); ("state", STATE); ("unsafe", UNSAFE); ("native", NATIVE); ("mutable", MUTABLE); ("auto", AUTO); ("fn", FN); ("iter", ITER); ("import", IMPORT); ("export", EXPORT); ("let", LET); ("log", LOG); ("spawn", SPAWN); ("thread", THREAD); ("yield", YIELD); ("join", JOIN); ("bool", BOOL); ("int", INT); ("uint", UINT); ("char", CHAR); ("str", STR); ("rec", REC); ("tup", TUP); ("tag", TAG); ("vec", VEC); ("any", ANY); ("obj", OBJ); ("port", PORT); ("chan", CHAN); ("task", TASK); ("true", LIT_BOOL true); ("false", LIT_BOOL false); ("in", IN); ("as", AS); ("with", WITH); ("bind", BIND); ("u8", MACH TY_u8); ("u16", MACH TY_u16); ("u32", MACH TY_u32); ("u64", MACH TY_u64); ("i8", MACH TY_i8); ("i16", MACH TY_i16); ("i32", MACH TY_i32); ("i64", MACH TY_i64); ("f32", MACH TY_f32); ("f64", MACH TY_f64) ] ;; } let hexdig = ['0'-'9' 'a'-'f' 'A'-'F'] let bin = "0b" ['0' '1']['0' '1' '_']* let hex = "0x" hexdig ['0'-'9' 'a'-'f' 'A'-'F' '_']* let dec = ['0'-'9']+ let exp = ['e''E']['-''+']? dec let flo = (dec '.' dec (exp?)) | (dec exp) let ws = [ ' ' '\t' '\r' ] let id = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']* rule token = parse ws+ { token lexbuf } | '\n' { lexbuf.Lexing.lex_curr_p <- (bump_line lexbuf.Lexing.lex_curr_p); token lexbuf } | "//" [^'\n']* { token lexbuf } | '+' { PLUS } | '-' { MINUS } | '*' { STAR } | '/' { SLASH } | '%' { PERCENT } | '=' { EQ } | '<' { LT } | "<=" { LE } | "==" { EQEQ } | "!=" { NE } | ">=" { GE } | '>' { GT } | '!' { NOT } | '&' { AND } | "&&" { ANDAND } | '|' { OR } | "||" { OROR } | "<<" { LSL } | ">>" { LSR } | ">>>" { ASR } | '~' { TILDE } | '{' { LBRACE } | '_' (dec as n) { IDX (int_of_string n) } | '_' { UNDERSCORE } | '}' { RBRACE } | "+=" { OPEQ (PLUS) } | "-=" { OPEQ (MINUS) } | "*=" { OPEQ (STAR) } | "/=" { OPEQ (SLASH) } | "%=" { OPEQ (PERCENT) } | "&=" { OPEQ (AND) } | "|=" { OPEQ (OR) } | "<<=" { OPEQ (LSL) } | ">>=" { OPEQ (LSR) } | ">>>=" { OPEQ (ASR) } | "^=" { OPEQ (CARET) } | '#' { POUND } | '@' { AT } | '^' { CARET } | '.' { DOT } | ',' { COMMA } | ';' { SEMI } | ':' { COLON } | "<-" { LARROW } | "<|" { SEND } | "->" { RARROW } | '(' { LPAREN } | ')' { RPAREN } | '[' { LBRACKET } | ']' { RBRACKET } | id as i { try Hashtbl.find keyword_table i with Not_found -> IDENT (i) } | bin as n { LIT_INT (Int64.of_string n, n) } | hex as n { LIT_INT (Int64.of_string n, n) } | dec as n { LIT_INT (Int64.of_string n, n) } | flo as n { LIT_FLO n } | '\'' { char lexbuf } | '"' { let buf = Buffer.create 32 in str buf lexbuf } | eof { EOF } and str buf = parse _ as ch { match ch with '"' -> LIT_STR (Buffer.contents buf) | '\\' -> str_escape buf lexbuf | _ -> Buffer.add_char buf ch; let c = Char.code ch in if bounds 0 c 0x7f then str buf lexbuf else if ((c land 0b1110_0000) == 0b1100_0000) then ext_str 1 buf lexbuf else if ((c land 0b1111_0000) == 0b1110_0000) then ext_str 2 buf lexbuf else if ((c land 0b1111_1000) == 0b1111_0000) then ext_str 3 buf lexbuf else if ((c land 0b1111_1100) == 0b1111_1000) then ext_str 4 buf lexbuf else if ((c land 0b1111_1110) == 0b1111_1100) then ext_str 5 buf lexbuf else fail lexbuf "bad initial utf-8 byte" } and str_escape buf = parse 'x' ((hexdig hexdig) as h) | 'u' ((hexdig hexdig hexdig hexdig) as h) | 'U' ((hexdig hexdig hexdig hexdig hexdig hexdig hexdig hexdig) as h) { Buffer.add_string buf (char_as_utf8 (int_of_string ("0x" ^ h))); str buf lexbuf } | 'n' { Buffer.add_char buf '\n'; str buf lexbuf } | 'r' { Buffer.add_char buf '\r'; str buf lexbuf } | 't' { Buffer.add_char buf '\t'; str buf lexbuf } | '\\' { Buffer.add_char buf '\\'; str buf lexbuf } | '"' { Buffer.add_char buf '"'; str buf lexbuf } | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } and ext_str n buf = parse _ as ch { let c = Char.code ch in if ((c land 0b1100_0000) == (0b1000_0000)) then begin Buffer.add_char buf ch; if n = 1 then str buf lexbuf else ext_str (n-1) buf lexbuf end else fail lexbuf "bad trailing utf-8 byte" } and char = parse '\\' { char_escape lexbuf } | _ as c { let c = Char.code c in if bounds 0 c 0x7f then end_char c lexbuf else if ((c land 0b1110_0000) == 0b1100_0000) then ext_char 1 (c land 0b0001_1111) lexbuf else if ((c land 0b1111_0000) == 0b1110_0000) then ext_char 2 (c land 0b0000_1111) lexbuf else if ((c land 0b1111_1000) == 0b1111_0000) then ext_char 3 (c land 0b0000_0111) lexbuf else if ((c land 0b1111_1100) == 0b1111_1000) then ext_char 4 (c land 0b0000_0011) lexbuf else if ((c land 0b1111_1110) == 0b1111_1100) then ext_char 5 (c land 0b0000_0001) lexbuf else fail lexbuf "bad initial utf-8 byte" } and char_escape = parse 'x' ((hexdig hexdig) as h) | 'u' ((hexdig hexdig hexdig hexdig) as h) | 'U' ((hexdig hexdig hexdig hexdig hexdig hexdig hexdig hexdig) as h) { end_char (int_of_string ("0x" ^ h)) lexbuf } | 'n' { end_char (Char.code '\n') lexbuf } | 'r' { end_char (Char.code '\r') lexbuf } | 't' { end_char (Char.code '\t') lexbuf } | '\\' { end_char (Char.code '\\') lexbuf } | '\'' { end_char (Char.code '\'') lexbuf } | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } and ext_char n accum = parse _ as c { let c = Char.code c in if ((c land 0b1100_0000) == (0b1000_0000)) then let accum = (accum lsl 6) lor (c land 0b0011_1111) in if n = 1 then end_char accum lexbuf else ext_char (n-1) accum lexbuf else fail lexbuf "bad trailing utf-8 byte" } and end_char accum = parse '\'' { LIT_CHAR accum } and bracequote buf depth = parse '\\' '{' { Buffer.add_char buf '{'; bracequote buf depth lexbuf } | '{' { Buffer.add_char buf '{'; bracequote buf (depth+1) lexbuf } | '\\' '}' { Buffer.add_char buf '}'; bracequote buf depth lexbuf } | '}' { if depth = 1 then BRACEQUOTE (Buffer.contents buf) else begin Buffer.add_char buf '}'; bracequote buf (depth-1) lexbuf end } | '\\' [^'{' '}'] { let s = Lexing.lexeme lexbuf in Buffer.add_string buf s; bracequote buf depth lexbuf } | [^'\\' '{' '}']+ { let s = Lexing.lexeme lexbuf in Buffer.add_string buf s; bracequote buf depth lexbuf }