/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Markov chain demonstration. Written Apr. 2nd 2007 by Markus Triska (markus.triska@gmx.at) Public domain code. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Main predicates: markov(+File, +Order, -Chain): Estimate transition probabilities of a Markov chain of order Order from file contents of File. generate(+Chain, +N, -Ls): Generate a random list of N atoms following the transition probabilities of Markov chain Chain. chain_entropy(+Chain, ?Entropy): Determine Chain's entropy. For example, generate 100 tokens from a Markov chain of order 8 estimated from this file: %?- markov('markov.pl',8,C), generate(C,100,Ls), maplist(write,Ls). %@% . %@% log_2s([_-W|Ws], S0, S) :- %@% S1 is S0 + W, %@% sumlist(Logs, E). %@% chain_entropy([_|Rest], A, E) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ markov(File, Order, mc(Order,A)) :- open(File, read, Stream), empty_assoc(A0), length(Firsts, Order), read_list(Firsts, Stream), append(Firsts, X, Xs), build(Stream, Xs-X, A0, A). read_list([], _). read_list([C|Cs], Stream) :- get_char(Stream, C), read_list(Cs, Stream). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Generate a stream of tokens from the trained chain. The stream can run into "dead ends", i.e., paths that don't lead to concrete tokens, since they weren't encountered during the estimation phase. In that case, a new sequence is started like the initial one: choosing a random path of the chain, taking weights into account. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ initial_sequence(mc(Order,A), Ss) :- length(Ss, Order), initial_(Ss, A). initial_([], _). initial_([S|Ss], A) :- assoc_to_list(A, As0), maplist(without_child, As0, As), choose_weighted(As, S), get_assoc(S, A, nc(_,Child)), initial_(Ss, Child). without_child(A-nc(N,_), A-N). generate(Chain, N, Ls) :- length(Ls, N), initial_sequence(Chain, Ss), append(Ss, Y, Ys), Chain = mc(_,A), generate(Ls, Chain, Ys-Y, A). generate([], _, _, _) :- !. generate(Ls, Chain, Path0, Assoc) :- copy_term(Path0, Cs0-[]), ( generate_(Cs0, Assoc, L) -> Path0 = [_|Ps0]-Tail0, Tail0 = [L|Tail], Ls = [L|Rest] ; initial_sequence(Chain, Ss), append(Ss, Tail, Ps0), Ls = Rest ), generate(Rest, Chain, Ps0-Tail, Assoc). generate_([], Assoc, Elem) :- assoc_to_list(Assoc, List), choose_weighted(List, Elem). generate_([C|Cs], Assoc, Gen) :- get_assoc(C, Assoc, nc(_,Child)), generate_(Cs, Child, Gen). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Choose a random element from a list of pairs E-W, respecting weights. %?- choose_weighted([a-2, b-1], S). %@% S = a - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ swap(A-B, B-A). choose_weighted(Ls0, Elem) :- maplist(swap, Ls0, Ls1), keysort(Ls1, Sorted), upper(Sorted, 0, Upper), X is random(Upper), choose(Sorted, 0, X, Elem). upper([], U, U). upper([N-_|Ns], U0, U) :- U1 is U0 + N, upper(Ns, U1, U). choose([N-G|NGs], A0, X, Gen) :- A1 is N + A0, A2 is A1 - 1, ( between(A0, A2, X) -> Gen = G ; choose(NGs, A1, X, Gen) ). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Build a nested association list used to compute conditional probabilities. Internal nodes are again association lists, associating atoms with number of occurrences along this path and further maps of the same structure. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ build(Stream, Path0, A0, A) :- get_char(Stream, Char), ( Char == end_of_file -> A = A0 ; copy_term(Path0, Cs0-[]), insert_path(Cs0, Char, A0, A1), Path0 = [_|Ps0]-Tail0, Tail0 = [Char|Tail], build(Stream, Ps0-Tail, A1, A) ). insert_path([], Char, A0, A) :- ( get_assoc(Char, A0, N0) -> N is N0 + 1 ; N is 1 ), put_assoc(Char, A0, N, A). insert_path([P|Ps], Char, A0, A) :- ( get_assoc(P, A0, nc(N0,Child0)) -> N is N0 + 1 ; empty_assoc(Child0), N is 1 ), insert_path(Ps, Char, Child0, Child), put_assoc(P, A0, nc(N,Child), A). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Entropy of a given Markov chain. For order 2 chain: H(S) = -Sum_i p_i * Sum_j p_i(j) * Sum_k p_{i,j}(k) * log_2 p_{i,j}(k) Where p_i(j) is the probability of j given previous character i. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ chain_entropy(mc(Order,A), E) :- length(Ls, Order), chain_entropy(Ls, A, E0), E is -E0. log_2s([], _, []). log_2s([_-W|Ws], S, [L|Ls]) :- P is W/S, L0 is log(P)/log(2), % binary logarithm L is P*L0, log_2s(Ws, S, Ls). sum_weights([], S, S). sum_weights([_-W|Ws], S0, S) :- S1 is S0 + W, sum_weights(Ws, S1, S). chain_entropy([], A, E) :- assoc_to_list(A, As), sum_weights(As, 0, S), log_2s(As, S, Logs0), sort(Logs0, Logs), sumlist(Logs, E). chain_entropy([_|Rest], A, E) :- assoc_to_list(A, As0), maplist(without_child, As0, As), sum_weights(As, 0, W), maplist(entropy_(Rest, W), As0, Ss0), sort(Ss0, Ss), sumlist(Ss, E). entropy_(Rest, W, _-nc(N,Child), S) :- P is N / W, chain_entropy(Rest, Child, E), S is P * E.