# Library RelationAlgebra.ka_completeness

# ka_completeness: completeness of Kleene algebra

We mostly follow Dexter Kozen's proof: A completeness theorem for Kleene algebras and the algebra of regular events. Information and Computation, 110(2):366-390, May 1994.Require Import kleene sums matrix_ext lset boolean lang nfa regex bmx rmx normalisation.

Require dfa.

Set Implicit Arguments.

Unset Printing Implicit Defensive.

locally prevent simpl from unfolding relation algebra projections

Local Arguments lattice.car {_}: simpl never.

Local Arguments lattice.weq {_} _ _: simpl never.

Local Arguments lattice.leq {_} _ _: simpl never.

Local Arguments lattice.cup {_} _ _: simpl never.

Local Arguments lattice.bot {_}: simpl never.

Local Arguments monoid.mor {_} _ _: simpl never.

Local Arguments monoid.one {_} _: simpl never.

Local Arguments monoid.dot {_} _ _ _ _ _: simpl never.

Local Arguments monoid.str {_} _ _: simpl never.

Local Arguments lattice.weq {_} _ _: simpl never.

Local Arguments lattice.leq {_} _ _: simpl never.

Local Arguments lattice.cup {_} _ _: simpl never.

Local Arguments lattice.bot {_}: simpl never.

Local Arguments monoid.mor {_} _ _: simpl never.

Local Arguments monoid.one {_} _: simpl never.

Local Arguments monoid.dot {_} _ _ _ _ _: simpl never.

Local Arguments monoid.str {_} _ _: simpl never.

# 1. constructing eNFA out of regular expressions (Thompson)

## construction

empty automaton for 0

one state automaton for 1

two states automata for variables

for pls, we take the disjoint union of the two automata

for dot, we take the disjoint union, and we add epsilon
transitions from accepting states of the first automaton to starting
states of the second one

strict iteration is obtained simply by adding epsilon transitions
from accepting states to initial ones

Kleene star is derived from the other operations

summing up all together, we get the final construction

Fixpoint enfa (e: regex'): t :=

match e with

| r_zer ⇒ zer

| r_one ⇒ one

| r_var _ ⇒ cst e

| r_pls e f ⇒ pls (enfa e) (enfa f)

| r_dot e f ⇒ dot (enfa e) (enfa f)

| r_str e ⇒ str (enfa e)

end.

match e with

| r_zer ⇒ zer

| r_one ⇒ one

| r_var _ ⇒ cst e

| r_pls e f ⇒ pls (enfa e) (enfa f)

| r_dot e f ⇒ dot (enfa e) (enfa f)

| r_str e ⇒ str (enfa e)

end.

Lemma eval_zer: eval zer ≡ 0.

Proof. reflexivity. Qed.

Lemma eval_cst e: eval (cst e) ≡ e.

Proof.

set (o:=eval (cst e)). vm_compute in o; subst o. ra_normalise.

rewrite str1. ra.

Qed.

Lemma eval_one: eval one ≡ 1.

Proof. set (o:=eval one). vm_compute in o; subst o. ra. Qed.

Lemma eval_pls e f: eval (pls e f) ≡ eval e + eval f.

Proof.

destruct e as [n u M v]. destruct f as [m s N t].

change (mx_scal (row_mx u s ⋅ (blk_mx M 0 0 N)^* ⋅ col_mx v t) ≡ mx_scal (u ⋅ M ^* ⋅ v) + mx_scal (s ⋅ N ^* ⋅ t)).

rewrite <-mx_scal_pls. apply mx_scal_weq.

rewrite mx_str_diagonal.

setoid_rewrite mx_dot_rowcol. rewrite dotplsx.

rewrite <-2dotA, 2mx_dot_rowcol. ra.

Qed.

Lemma eval_dot e f: eval (dot e f) ≡ eval e ⋅ eval f.

Proof.

destruct e as [n u M v]. destruct f as [m s N t].

change (mx_scal (row_mx u 0 ⋅ (blk_mx M (v ⋅ s) 0 N)^* ⋅ col_mx 0 t) ≡ mx_scal (u ⋅ M ^* ⋅ v) ⋅ mx_scal (s ⋅ N ^* ⋅ t)).

rewrite <-mx_scal_dot. apply mx_scal_weq.

rewrite mx_str_trigonal. setoid_rewrite mx_dot_rowcol. rewrite dotplsx.

rewrite <-dotA, mx_dot_rowcol. ra.

Qed.

Lemma eval_itr e: eval (itr e) ≡ (eval e)^+.

Proof.

rewrite itr_str_l. destruct e as [n u M v].

change (mx_scal (u ⋅ (M + v ⋅ u) ^* ⋅ v) ≡ mx_scal (u ⋅ M ^* ⋅ v) ⋅ (mx_scal (u ⋅ M ^* ⋅ v))^*).

rewrite <-mx_scal_str, <-mx_scal_dot. apply mx_scal_weq.

rewrite str_pls. rewrite <-3dotA, <-str_dot. ra.

Qed.

Lemma eval_str e: eval (str e) ≡ (eval e)^*.

Proof. unfold str. now rewrite eval_pls, eval_one, eval_itr, str_itr. Qed.

algebraic correcteness of the global construction

Theorem correct e: eval (enfa e) ≡ e.

Proof.

induction e; simpl enfa.

apply eval_zer.

apply eval_one.

rewrite eval_pls. now apply cup_weq.

rewrite eval_dot. now apply dot_weq.

rewrite eval_str. now apply str_weq.

apply eval_cst.

Qed.

the produced matricial automaton actually is an NFA with epsilon
transitions (i.e., the transition matrix is simple, and starting and
accepting vectors are 01)

Lemma is_enfa e: is_enfa (enfa e).

Proof.

Opaque Peano.plus.

unfold is_enfa. induction e; cbn; intuition auto with mx_predicates.

Transparent Peano.plus.

Qed.

End Thompson.

# 2. removing espilon-transitions

## construction

Definition nfa e :=

let e := Thompson.enfa e in

let J := (epsilon_mx e^M)^* in

mk e^u (J ⋅ pure_part_mx e^M) (J ⋅ e^v).

Theorem eval_nfa e: eval (nfa e) ≡ e.

Proof.

rewrite <- (Thompson.correct e) at 2. unfold nfa.

change (mx_scal

((Thompson.enfa e)^u ⋅

((epsilon_mx (Thompson.enfa e)^M)^* ⋅ pure_part_mx (Thompson.enfa e) ^M)

^* ⋅ ((epsilon_mx (Thompson.enfa e)^M)^* ⋅ (Thompson.enfa e) ^v)) ≡

eval (Thompson.enfa e)).

set (f := Thompson.enfa e). set (J := epsilon_mx f^M). apply mx_scal_weq.

rewrite (@expand_simple_mx _ _ f^M) at 2 by apply Thompson.is_enfa.

rewrite str_pls. rewrite <-dotA, (dotA _ (J^*)). rewrite <-str_dot. apply dotA.

Qed.

the produced matricial automaton is actually a NFA (i.e., the
transition matrix is pure, and starting and accepting vectors are 01)

Lemma is_nfa_nfa e: is_nfa (nfa e).

Proof.

generalize (Thompson.is_enfa e).

unfold nfa, is_nfa, is_enfa. cbn.

intuition auto with mx_predicates.

Qed.

# 3. determinisation

## construction

NFA to determinise

Variable nfa: nfa.t.

Hypothesis Hnfa: is_nfa nfa.

Notation n := (n nfa).

Notation u := nfa^u.

Notation M := nfa^M.

Notation v := nfa^v.

Hypothesis Hnfa: is_nfa nfa.

Notation n := (n nfa).

Notation u := nfa^u.

Notation M := nfa^M.

Notation v := nfa^v.

alphabet of the generated DFA: those appearing in M, plus the
imposed ones

(unlabelled) transition matrix of the NFA, restricted to a

transition matrix of the NFA restricted to a

decoding matrix, establishing the link between the state of the
determinised automaton (i.e., sets of states), with those of the starting one.
We exploit for this the aforementioned bijection ;
X_(x,i) holds iff i\in x.

determinised automaton:

- the initial state is the set of intial states
- the transition function is obtained by reading the transition matrix in a "parallel" way: along a, the set x maps to the set {j / i -a→ j for some i\in x}. this computation is performed matricially.
- the accepting states are those containing at least one accepting state again, this computation is done matricially, using the decoding matrix X.

Definition det := dfa.mk

(of_row u)

(fun x a ⇒ of_row (to_row x ⋅ T_(a)))

(fun j ⇒ epsilon ((X ⋅ v) j ord0))

vars.

(of_row u)

(fun x a ⇒ of_row (to_row x ⋅ T_(a)))

(fun j ⇒ epsilon ((X ⋅ v) j ord0))

vars.

## correctness

Lemma det_uX: u ≡ det^u ⋅ X.

Proof.

intros i j. cbn. rewrite mx_dot_fun, dot1x.

symmetry. apply mem_of_row, Hnfa.

Qed.

Lemma M_sum: M ≡ \sum_(a\in vars) M_(a).

Proof.

intros i j. simpl. rewrite (@expand' (M i j) (mx_vars M ++vars')).

2: apply leq_xcup; left; apply mx_vars_full.

rewrite epsilon_pure by apply Hnfa.

setoid_rewrite cupbx. setoid_rewrite mx_sup.

apply sup_weq. 2: reflexivity.

intro a. now rewrite <-epsilon_deriv_pure by apply Hnfa.

Qed.

Lemma det_MX: X ⋅ M^* ≡ det^M^* ⋅ X.

Proof.

apply str_move.

rewrite M_sum. cbn. rewrite dotxsum, dotsumx. apply sup_weq. 2: reflexivity.

intros a x j.

rewrite mx_dot_fun. unfold X.

rewrite (mem_of_row ord0) by (unfold T_; auto with mx_predicates).

unfold weq.

setoid_rewrite dotxsum. apply sup_weq. 2: reflexivity.

intro j'. unfold M_, T_, epsilon_mx, mx_map.

now rewrite 2dotA, dot_ofboolx.

Qed.

Lemma det_Xv: X⋅v ≡ det^v.

Proof.

intros x j. setoid_rewrite ord0_unique. apply expand_01.

apply is_01_mx_dot. intros ? ?. apply is_01_ofbool. apply Hnfa.

Qed.

algebraic correctness of determinisation

Theorem eval_det: eval det ≡ eval nfa.

Proof.

apply mx_scal_weq.

rewrite det_uX.

rewrite <-(dotA _ X), det_MX.

rewrite <-(dotA _ _ v), <-(dotA _ X), det_Xv.

now rewrite dotA.

Qed.

End det.

Proof.

apply mx_scal_weq.

rewrite det_uX.

rewrite <-(dotA _ X), det_MX.

rewrite <-(dotA _ _ v), <-(dotA _ X), det_Xv.

now rewrite dotA.

Qed.

End det.

# Kleene theorem as a corollary

Definition dfa vars e := det vars (nfa e).

Corollary eval_dfa vars e: nfa.eval (dfa vars e) ≡ e.

Proof. setoid_rewrite eval_det. apply eval_nfa. apply is_nfa_nfa. Qed.

since the Kleene algebra of matrices allows us to compute a
regular expression out of any finite automaton, we obtain Kleene
theorem as a consequence:
``the languages recognised by a regular expression are those
recognised by a (deterministic) finite automaton''.

Theorem Kleene: ∀ l: lang' sigma,

(∃ e: regex, l ≡ regex.lang e) ↔ (∃ A: dfa.t, l ≡ dfa.lang A (dfa.u A)).

Proof.

intro l. setoid_rewrite <-nfa.dfa.eval_lang. setoid_rewrite <-dfa.reroot_id.

split.

intros [e He]. ∃ (dfa [] e). rewrite eval_dfa. assumption.

intros [A HA]. ∃ (nfa.eval A). assumption.

Qed.

# 4. algebraic correctness of language inclusion checking for DFA

thanks to the previous generalisation of the determinisation
construction, we can assume w.l.o.g. that vars A ≦ vars B. This
is required by our reduction of language inclusion to language
emptiness (dfa.lang_incl_dec), but also to prove Lemma R_M below

language inclusion matrix:
R_(j,i) holds iff dfa.lang A i ≦ dfa.lang B j
(Note that this matrix goes from B to A.)

the algebraic proof is quite similar to that of determinisation: we use the
bisimulation rule for inclusions, with R:
from A^u ≦ B^u ⋅ R, R ⋅ A^M ≦ B^M ⋅ R, and R ⋅ A^v ≦ B^v,
we deduce A^u⋅A^M^*⋅A^v ≦ B^u⋅B^M^*⋅B^v
the second and third hypotheses always hold, while the first one
holds only if the language of A is contained in that of B.

Lemma R_v: R ⋅ A^v ≦ B^v.

Proof.

intros j i'. apply leq_supx. intros i _. unfold dot; simpl. clear i'.

match goal with [|- ?P (_ ?x ?y) ?z ] ⇒ change (leq (x ⋅ y) z) end.

setoid_rewrite <-andb_dot. apply ofbool_leq. unfold leq;simpl; unfold le_bool.

setoid_rewrite Bool.andb_true_iff. setoid_rewrite is_true_sumbool.

now intros [H H0%(H nil)].

Qed.

Lemma R_M: R ⋅ A^M^* ≦ B^M^* ⋅ R.

Proof. apply str_move_l.

setoid_rewrite dotxsum. setoid_rewrite dotsumx. apply sup_leq'. exact Hvars.

intros a Ha j i'. apply leq_supx. intros i _.

rewrite mx_dot_fun. setoid_rewrite <-dot_ofboolx.

unfold mx_fun. case eqb_spec. 2: ra.

intros →. apply dot_leq. 2: reflexivity. apply ofbool_leq. unfold leq;simpl; unfold le_bool.

setoid_rewrite is_true_sumbool. intros H w Hw. apply (H (cons a w)). now split.

Qed.

Hypothesis HAB: regex.lang (eval A) ≦ regex.lang (eval B).

Lemma R_u: A^u ≦ B^u ⋅ R.

Proof.

intros z i. cbn. rewrite mx_dot_fun, dot1x. unfold mx_fun. clear z.

case eqb_ord_spec. 2: intro; apply leq_bx.

intros →. apply epsilon_reflexive. unfold R.

rewrite sumbool_true. reflexivity.

rewrite <-2dfa.eval_lang, <-2dfa.reroot_id. exact HAB.

Qed.

algebraic correctness of language inclusion test

Theorem dfa_complete_leq: eval A ≦ eval B.

Proof.

apply mx_scal_leq.

rewrite R_u.

rewrite <-(dotA _ R), R_M.

rewrite <-2dotA, R_v.

now rewrite dotA.

Qed.

End E.

Proof.

apply mx_scal_leq.

rewrite R_u.

rewrite <-(dotA _ R), R_M.

rewrite <-2dotA, R_v.

now rewrite dotA.

Qed.

End E.

Corollary ka_complete_leq: ∀ e f, lang e ≦ lang f → e ≦ f.

Proof.

intros e f.

rewrite <-(eval_dfa nil e), <-(eval_dfa (dfa.vars (dfa nil e)) f).

apply dfa_complete_leq. lattice.

Qed.

and thus for language equivalence

Corollary ka_complete_weq: ∀ e f, lang e ≡ lang f → e ≡ f.

Proof. intros e f. rewrite 2weq_spec. now split; apply ka_complete_leq. Qed.

since languages form a model of KA, the converse directions
trivially hold, so that we actually have equivalences

Corollary ka_correct_complete_leq: ∀ e f, lang e ≦ lang f ↔ e ≦ f.

Proof. split. apply ka_complete_leq. apply lang_leq. Qed.

Corollary ka_correct_complete_weq: ∀ e f, lang e ≡ lang f ↔ e ≡ f.

Proof. split. apply ka_complete_weq. apply lang_weq. Qed.

# Decidability of KA

Corollary ka_leq_dec: ∀ e f: regex', {e ≦f} + {~(e ≦f)}.

Proof.

assert(G: ∀ e f: regex',

let A := dfa [] e in

let B := dfa (dfa.vars A) f in

dfa.lang A (dfa.u A) ≦ dfa.lang B (dfa.u B) ↔ e ≦f).

intros e f A B.

rewrite <-2nfa.dfa.eval_lang, <-2dfa.reroot_id.

rewrite ka_correct_complete_leq.

unfold A, B. now rewrite 2eval_dfa.

intros. eapply sumbool_iff. apply G. apply dfa.lang_incl_dec. lattice.

Qed.

Corollary ka_weq_dec: ∀ e f: regex', {e ≡f} + {~(e ≡f)}.

Proof.

intros e f. destruct (ka_leq_dec e f). destruct (ka_leq_dec f e).

left. now apply antisym.

right. rewrite weq_spec. tauto.

right. rewrite weq_spec. tauto.

Qed.