/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Interpreter and compiler for a simple imperative language. Written May 2006 by Markus Triska (triska@gmx.at) Public domain code. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ :- use_module(library(assoc)). % interpreter run(Tree) :- env_new(Env), interpret(Tree, Env, _). interpret(print(P), Env, Env) :- eval(P, Env, Value), format("~w\n", [Value]). interpret(sequence(A,B), Env0, Env) :- interpret(A, Env0, Env1), ( A = return(_) -> Env = Env1 ; interpret(B, Env1, Env) ). interpret(call(Name, Arg), Env0, Env0) :- eval(Arg, Env0, ArgVal), env_func_body(Env0, Name, ArgName, Body), env_clear_variables(Env0, Env1), env_put_var(ArgName, ArgVal, Env1, Env2), interpret(Body, Env2, _). interpret(function(Name,Arg,Body), Env0, Env) :- env_put_func(Name, Arg, Body, Env0, Env). interpret(if(Cond,Then,Else), Env0, Env) :- eval(Cond, Env0, Value), ( Value =\= 0 -> interpret(Then, Env0, Env) ; interpret(Else, Env0, Env) ). interpret(assign(Var, Expr), Env0, Env) :- eval(Expr, Env0, Value), env_put_var(Var, Value, Env0, Env). interpret(while(Cond, Body), Env0, Env) :- eval(Cond, Env0, Value), ( Value =\= 0 -> interpret(Body, Env0, Env1), interpret(while(Cond, Body), Env1, Env) ; Env = Env0 ). interpret(return(Expr), Env0, Value) :- eval(Expr, Env0, Value). interpret(nop, Env, Env). eval(bin(Op,A,B), Env, Value) :- eval(A, Env, VA), eval(B, Env, VB), eval_(Op, VA, VB, Value). eval(v(V), Env, Value) :- env_get_var(Env, V, Value). eval(n(N), _, N). eval(call(Name, Arg), Env0, Value) :- eval(Arg, Env0, ArgVal), env_func_body(Env0, Name, ArgName, Body), env_clear_variables(Env0, Env1), env_put_var(ArgName, ArgVal, Env1, Env2), interpret(Body, Env2, Value). eval_(+, A, B, V) :- V is A + B. eval_(-, A, B, V) :- V is A - B. eval_(*, A, B, V) :- V is A * B. eval_(/, A, B, V) :- V is A // B. eval_(=, A, B, V) :- (A =:= B -> V = 1 ; V = 0). eval_(>, A, B, V) :- ( A > B -> V = 1 ; V = 0). eval_(<, A, B, V) :- ( A < B -> V = 1 ; V = 0). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% asm_intcode(Asm, Intcode) :- maplist(asm_intcode_, Asm, Intcode0), flatten(Intcode0, Intcode). asm_intcode_([halt], [0]). asm_intcode_([alloc,A], [1,A]). asm_intcode_([pushc,C], [2,C]). asm_intcode_([pushv,V], [3,V]). asm_intcode_([pop,V], [4,V]). asm_intcode_([add], [5]). asm_intcode_([sub], [6]). asm_intcode_([mul], [7]). asm_intcode_([div], [8]). asm_intcode_([jmp,Adr], [9,Adr]). asm_intcode_([jne,Adr], [10,Adr]). asm_intcode_([jge,Adr], [11,Adr]). asm_intcode_([jle,Adr], [12,Adr]). asm_intcode_([call,Adr], [13,Adr]). asm_intcode_([print], [14]). asm_intcode_([ret], [15]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% compile(Tree, Asm) :- gen_state(S0), compile(Tree, S0, S), state_instrs(S, Asm). gen_state(s([],[],[],0)). state_instrs(s(Is0,Fs,_,_), Is) :- reverse([[halt]|Is0], Is1), resolve_calls(Is1, Fs, Is). resolve_calls([], _, []). resolve_calls([I0|Is0], Fs, [I|Is]) :- ( I0 = [call,Name] -> memberchk(Name-Adr, Fs), I = [call,Adr] ; I = I0 ), resolve_calls(Is0, Fs, Is). emit(Code, s(Is0,Fs,Vs,PC0), s([Code|Is0],Fs,Vs,PC)) :- length(Code, L), PC is PC0 + L. start_function(Name, Arg, s(Is0,Fs,_,PC0), s(Is0,[Name-PC0|Fs],[Arg-0],PC0)). num_variables(Num, S, S) :- S = s(_,_,Vs,_), length(Vs, Num0), Num is Num0 - 1. % don't count parameter variable_offset(Name, Offset, s(Is0,Fs0,Vs0,PC0), s(Is0,Fs0,Vs,PC0)) :- ( memberchk(Name-Offset, Vs0) -> Vs = Vs0 ; Vs0 = [_-Curr|_], Offset is Curr + 1, Vs = [Name-Offset|Vs0] ). current_pc(PC, s(Is,Fs,Vs,PC), s(Is,Fs,Vs,PC)). compile(nop) --> []. compile(print(P)) --> compile(P), emit([print]). compile(sequence(A,B)) --> compile(A), compile(B). compile(call(Name, Arg)) --> compile(Arg), emit([call,Name]). compile(function(Name,Arg,Body)) --> emit([jmp,Skip]), start_function(Name, Arg), emit([alloc,NumVars]), compile(Body), num_variables(NumVars), current_pc(Skip). compile(if(Cond,Then,Else)) --> { Cond = bin(Op,A,B) }, compile(A), compile(B), { condition(Op, Instr) }, emit([Instr,Adr1]), compile(Then), emit([jmp,Adr2]), current_pc(Adr1), compile(Else), current_pc(Adr2). compile(assign(Var,Expr)) --> variable_offset(Var, Offset), compile(Expr), emit([pop,Offset]). compile(while(Cond,Body)) --> current_pc(Head), { Cond = bin(Op,A,B) }, compile(A), compile(B), { condition(Op, Instr) }, emit([Instr,Break]), compile(Body), emit([jmp,Head]), current_pc(Break). compile(return(Expr)) --> compile(Expr), emit([ret]). compile(bin(Op,A,B)) --> compile(A), compile(B), { op_vminstr(Op,VI) }, emit([VI]). compile(n(N)) --> emit([pushc,N]). compile(v(V)) --> variable_offset(V, O), emit([pushv,O]). op_vminstr(+, add). op_vminstr(-, sub). op_vminstr(*, mul). op_vminstr(/, div). condition(=, jne). condition(<, jge). condition(>, jle). display_asm([], _). display_asm([Cmd|Cmds], N0) :- ( N0 < 10 -> format(" ") ; true ), format("~w: ", [N0]), length(Cmd, L), ( L = 1 -> format("~w\n", Cmd) ; format("~w ~w\n", Cmd) ), N1 is N0 + L, display_asm(Cmds, N1). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% env_new(E-E) :- empty_assoc(E). env_put_func(Name, Arg, Body, Vars0-Funcs0, Vars0-Funcs) :- put_assoc(Name, Funcs0, Arg-Body, Funcs). env_func_body(_-Funcs, Name, ArgName, Body) :- get_assoc(Name, Funcs, ArgName-Body). env_put_var(Name, Value, Vars0-Funcs0, Vars-Funcs0) :- put_assoc(Name, Vars0, Value, Vars). env_get_var(Vars-_, Name, Value) :- get_assoc(Name, Vars, Value). env_clear_variables(_-Funcs0, E-Funcs0) :- empty_assoc(E). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % lexical analysis - split input sequence into tokens lex_analysis(Chars, Tokens) :- phrase(tokens(Tokens), Chars). tokens(Ts) --> whitespace, tokens(Ts). tokens([T|Ts]) --> tok(T), !, % single solution: longest input match tokens(Ts). tokens([]) --> "". tok('{') --> "{". tok('}') --> "}". tok(';') --> ";". tok(',') --> ",". tok('(') --> "(". tok(')') --> ")". tok(rop(=)) --> "==". tok(rop(<)) --> "<". tok(rop(>)) --> ">". tok(aop(+)) --> "+". tok(aop(-)) --> "-". tok(mop(*)) --> "*". tok(mop(/)) --> "/". tok(=) --> "=". tok(ID_or_KW) --> ident(Cs), { name(I, Cs), ( keyword(I) -> ID_or_KW = I ; ID_or_KW = id(I) ) }. tok(num(N)) --> number(Cs), { name(N, Cs) }. ident([C|Cs]) --> letter(C), identr(Cs). identr([C|Cs]) --> letter(C), identr(Cs). identr([C|Cs]) --> digit(C), identr(Cs). identr([]) --> []. number([C|Cs]) --> digit(C), number(Cs). number([C]) --> digit(C). letter(C) --> [C], { between(0'A, 0'Z, C) ; between(0'a, 0'z, C)}. digit(C) --> [C], { between(0'0, 0'9, C) }. whitespace --> [C], {C =< 0' }. % close ' for syntax highlighting keyword(K) :- memberchk(K, [if,else,while,return,print]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % syntax analysis - generate abstract syntax tree (AST) from tokens parse(Chars, Tree) :- lex_analysis(Chars, Tokens), syn_analysis(Tokens, Tree). syn_analysis(Tokens, Tree) :- phrase(program(Tree), Tokens). program(nop) --> []. program(P) --> func_or_print(FP), program_r(FP, P). program_r(P, P) --> []. program_r(P0, sequence(P0, P1)) --> func_or_print(FP), program_r(FP, P1). func_or_print(F) --> func(F). func_or_print(print(P)) --> stm(print(P)). func(function(Name,Arg,Body)) --> [id(Name)], ['('], [id(Arg)], [')'], block_(Body). stms(S) --> stm(S1), stmr(S1, S). stms(nop) --> []. stmr(S1, sequence(S1, S)) --> stm(S2), stmr(S2, S). stmr(S, S) --> []. stm(call(Name, Arg)) --> [id(Name)], ['('], exp(Arg), [')'], [';']. stm(assign(Id, E)) --> [id(Id)], ['='], exp(E), [';']. stm(if(Cond,S1,S2)) --> [if], cond(Cond), stm(S1), [else], stm(S2). stm(while(Cond, S)) --> [while], cond(Cond), stm(S). stm(return(E)) --> [return], exp(E), [';']. stm(print(E)) --> [print], exp(E), [';']. stm(S) --> block_(S). stm(nop) --> [';']. block_(S) --> ['{'], stms(S), ['}']. cond(bin(Op,A,B)) --> ['('], exp(A), [rop(Op)], exp(B), [')']. exp(E) --> term(E1), expr(E1, E). expr(E1, E) --> [aop(Op)], term(E2), expr(bin(Op, E1, E2), E). expr(E, E) --> []. term(E) --> factor(E1), termr(E1, E). termr(E1, E) --> [mop(Op)], factor(E2), termr(bin(Op, E1, E2), E). termr(E, E) --> []. factor(n(N)) --> [num(N)]. factor(v(Id)) --> [id(Id)]. factor(call(Name, Arg)) --> [id(Name)], ['('], exp(Arg), [')']. factor(E) --> ['('], exp(E), [')']. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % AST type definition is_program(nop). is_program(sequence(A,B)) :- ( (A = print(E), is_exp(E)) ; is_function(A) ), is_program(B). is_function(function(Name,Arg,Body)) :- atom(Name), atom(Arg), is_stm(Body). is_stm(print(E)) :- is_exp(E). is_stm(sequence(S1,S2)) :- is_stm(S1), is_stm(S2). is_stm(call(Name, Arg)) :- atom(Name), is_exp(Arg). is_stm(if(Cond,Then,Else)) :- is_exp(Cond), is_stm(Then), is_stm(Else). is_stm(while(Cond,Body)) :- is_exp(Cond), is_stm(Body). is_stm(return(E)) :- is_exp(E). is_stm(nop). is_stm(assign(Id, E)) :- atom(Id), is_exp(E). is_exp(n(N)) :- number(N). is_exp(v(V)) :- atom(V). is_exp(call(Id, E)) :- atom(Id), is_exp(E). is_exp(bin(Op,E1,E2)) :- member(Op, [=,#,>,<,+,-,*,/]), is_exp(E1), is_exp(E2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% read_chars(Cs) :- ( at_end_of_stream -> Cs = [] ; Cs = [C|Rest], get_code(C), read_chars(Rest) ). doit :- read_chars(Chars), ( lex_analysis(Chars, Tokens) -> format("\n\ntokens:\n\n~w\n", [Tokens]), ( syn_analysis(Tokens, Tree) -> %is_program(Tree), % type check format("\nAST:\n\n~w\n", [Tree]), compile(Tree, Asm), nl,nl, format("VM code:\n\n"), display_asm(Asm, 0), asm_intcode(Asm, Intcode), format("\nintcode:\n\n~w\n\n", [Intcode]), format("program output:\n\n"), run(Tree) ; format("syntax error\n") ) ; format("lexical error") ).