section ‹Deciding Regular Expression Equivalence› theory Equivalence_Checking imports NDerivative "HOL-Library.While_Combinator" begin subsection ‹Bisimulation between languages and regular expressions› coinductive bisimilar :: "'a lang ⇒ 'a lang ⇒ bool" where "([] ∈ K ⟷ [] ∈ L) ⟹ (⋀x. bisimilar (Deriv x K) (Deriv x L)) ⟹ bisimilar K L" lemma equal_if_bisimilar: assumes "bisimilar K L" shows "K = L" proof (rule set_eqI) fix w from ‹bisimilar K L› show "w ∈ K ⟷ w ∈ L" proof (induct w arbitrary: K L) case Nil thus ?case by (auto elim: bisimilar.cases) next case (Cons a w K L) from ‹bisimilar K L› have "bisimilar (Deriv a K) (Deriv a L)" by (auto elim: bisimilar.cases) then have "w ∈ Deriv a K ⟷ w ∈ Deriv a L" by (rule Cons(1)) thus ?case by (auto simp: Deriv_def) qed qed lemma language_coinduct: fixes R (infixl "∼" 50) assumes "K ∼ L" assumes "⋀K L. K ∼ L ⟹ ([] ∈ K ⟷ [] ∈ L)" assumes "⋀K L x. K ∼ L ⟹ Deriv x K ∼ Deriv x L" shows "K = L" apply (rule equal_if_bisimilar) apply (rule bisimilar.coinduct[of R, OF ‹K ∼ L›]) apply (auto simp: assms) done type_synonym 'a rexp_pair = "'a rexp * 'a rexp" type_synonym 'a rexp_pairs = "'a rexp_pair list" definition is_bisimulation :: "'a::order list ⇒ 'a rexp_pair set ⇒ bool" where "is_bisimulation as R = (∀(r,s)∈ R. (atoms r ∪ atoms s ⊆ set as) ∧ (nullable r ⟷ nullable s) ∧ (∀a∈set as. (nderiv a r, nderiv a s) ∈ R))" lemma bisim_lang_eq: assumes bisim: "is_bisimulation as ps" assumes "(r, s) ∈ ps" shows "lang r = lang s" proof - define ps' where "ps' = insert (Zero, Zero) ps" from bisim have bisim': "is_bisimulation as ps'" by (auto simp: ps'_def is_bisimulation_def) let ?R = "λK L. (∃(r,s)∈ps'. K = lang r ∧ L = lang s)" show ?thesis proof (rule language_coinduct[where R="?R"]) from ‹(r, s) ∈ ps› have "(r, s) ∈ ps'" by (auto simp: ps'_def) thus "?R (lang r) (lang s)" by auto next fix K L assume "?R K L" then obtain r s where rs: "(r, s) ∈ ps'" and KL: "K = lang r" "L = lang s" by auto with bisim' have "nullable r ⟷ nullable s" by (auto simp: is_bisimulation_def) thus "[] ∈ K ⟷ [] ∈ L" by (auto simp: nullable_iff KL) fix a show "?R (Deriv a K) (Deriv a L)" proof cases assume "a ∈ set as" with rs bisim' have "(nderiv a r, nderiv a s) ∈ ps'" by (auto simp: is_bisimulation_def) thus ?thesis by (force simp: KL lang_nderiv) next assume "a ∉ set as" with bisim' rs have "a ∉ atoms r" "a ∉ atoms s" by (auto simp: is_bisimulation_def) then have "nderiv a r = Zero" "nderiv a s = Zero" by (auto intro: deriv_no_occurrence) then have "Deriv a K = lang Zero" "Deriv a L = lang Zero" unfolding KL lang_nderiv[symmetric] by auto thus ?thesis by (auto simp: ps'_def) qed qed qed subsection ‹Closure computation› definition closure :: "'a::order list ⇒ 'a rexp_pair ⇒ ('a rexp_pairs * 'a rexp_pair set) option" where "closure as = rtrancl_while (%(r,s). nullable r = nullable s) (%(r,s). map (λa. (nderiv a r, nderiv a s)) as)" definition pre_bisim :: "'a::order list ⇒ 'a rexp ⇒ 'a rexp ⇒ 'a rexp_pairs * 'a rexp_pair set ⇒ bool" where "pre_bisim as r s = (λ(ws,R). (r,s) ∈ R ∧ set ws ⊆ R ∧ (∀(r,s)∈ R. atoms r ∪ atoms s ⊆ set as) ∧ (∀(r,s)∈ R - set ws. (nullable r ⟷ nullable s) ∧ (∀a∈set as. (nderiv a r, nderiv a s) ∈ R)))" theorem closure_sound: assumes result: "closure as (r,s) = Some([],R)" and atoms: "atoms r ∪ atoms s ⊆ set as" shows "lang r = lang s" proof- let ?test = "While_Combinator.rtrancl_while_test (%(r,s). nullable r = nullable s)" let ?step = "While_Combinator.rtrancl_while_step (%(r,s). map (λa. (nderiv a r, nderiv a s)) as)" { fix st assume inv: "pre_bisim as r s st" and test: "?test st" have "pre_bisim as r s (?step st)" proof (cases st) fix ws R assume "st = (ws, R)" with test obtain r s t where st: "st = ((r, s) # t, R)" and "nullable r = nullable s" by (cases ws) auto with inv show ?thesis using atoms_nderiv[of _ r] atoms_nderiv[of _ s] unfolding st rtrancl_while_test.simps rtrancl_while_step.simps pre_bisim_def Ball_def by simp_all blast+ qed } moreover from atoms have "pre_bisim as r s ([(r,s)],{(r,s)})" by (simp add: pre_bisim_def) ultimately have pre_bisim_ps: "pre_bisim as r s ([],R)" by (rule while_option_rule[OF _ result[unfolded closure_def rtrancl_while_def]]) then have "is_bisimulation as R" "(r, s) ∈ R" by (auto simp: pre_bisim_def is_bisimulation_def) thus "lang r = lang s" by (rule bisim_lang_eq) qed subsection ‹Bisimulation-free proof of closure computation› text‹The equivalence check can be viewed as the product construction of two automata. The state space is the reflexive transitive closure of the pair of next-state functions, i.e. derivatives.› lemma rtrancl_nderiv_nderivs: defines "nderivs == foldl (%r a. nderiv a r)" shows "{((r,s),(nderiv a r,nderiv a s))| r s a. a : A}^* = {((r,s),(nderivs r w,nderivs s w))| r s w. w : lists A}" (is "?L = ?R") proof- note [simp] = nderivs_def { fix r s r' s' have "((r,s),(r',s')) : ?L ⟹ ((r,s),(r',s')) : ?R" proof(induction rule: converse_rtrancl_induct2) case refl show ?case by (force intro!: foldl.simps(1)[symmetric]) next case step thus ?case by(force intro!: foldl.simps(2)[symmetric]) qed } moreover { fix r s r' s' { fix w have "∀x∈set w. x ∈ A ⟹ ((r, s), nderivs r w, nderivs s w) :?L" proof(induction w rule: rev_induct) case Nil show ?case by simp next case snoc thus ?case by (auto elim!: rtrancl_into_rtrancl) qed } hence "((r,s),(r',s')) : ?R ⟹ ((r,s),(r',s')) : ?L" by auto } ultimately show ?thesis by (auto simp: in_lists_conv_set) blast qed lemma nullable_nderivs: "nullable (foldl (%r a. nderiv a r) r w) = (w : lang r)" by (induct w arbitrary: r) (simp_all add: nullable_iff lang_nderiv Deriv_def) theorem closure_sound_complete: assumes result: "closure as (r,s) = Some(ws,R)" and atoms: "set as = atoms r ∪ atoms s" shows "ws = [] ⟷ lang r = lang s" proof - have leq: "(lang r = lang s) = (∀(r',s') ∈ {((r0,s0),(nderiv a r0,nderiv a s0))| r0 s0 a. a : set as}^* `` {(r,s)}. nullable r' = nullable s')" by(simp add: atoms rtrancl_nderiv_nderivs Ball_def lang_eq_ext imp_ex nullable_nderivs del:Un_iff) have "{(x,y). y ∈ set ((λ(p,q). map (λa. (nderiv a p, nderiv a q)) as) x)} = {((r,s), nderiv a r, nderiv a s) |r s a. a ∈ set as}" by auto with atoms rtrancl_while_Some[OF result[unfolded closure_def]] show ?thesis by (auto simp add: leq Ball_def split: if_splits) qed subsection ‹The overall procedure› primrec add_atoms :: "'a rexp ⇒ 'a list ⇒ 'a list" where "add_atoms Zero = id" | "add_atoms One = id" | "add_atoms (Atom a) = List.insert a" | "add_atoms (Plus r s) = add_atoms s o add_atoms r" | "add_atoms (Times r s) = add_atoms s o add_atoms r" | "add_atoms (Star r) = add_atoms r" lemma set_add_atoms: "set (add_atoms r as) = atoms r ∪ set as" by (induct r arbitrary: as) auto definition check_eqv :: "nat rexp ⇒ nat rexp ⇒ bool" where "check_eqv r s = (let nr = norm r; ns = norm s; as = add_atoms nr (add_atoms ns []) in case closure as (nr, ns) of Some([],_) ⇒ True | _ ⇒ False)" lemma soundness: assumes "check_eqv r s" shows "lang r = lang s" proof - let ?nr = "norm r" let ?ns = "norm s" let ?as = "add_atoms ?nr (add_atoms ?ns [])" obtain R where 1: "closure ?as (?nr,?ns) = Some([],R)" using assms by (auto simp: check_eqv_def Let_def split:option.splits list.splits) then have "lang (norm r) = lang (norm s)" by (rule closure_sound) (auto simp: set_add_atoms dest!: subsetD[OF atoms_norm]) thus "lang r = lang s" by simp qed text‹Test:› lemma "check_eqv (Plus One (Times (Atom 0) (Star(Atom 0)))) (Star(Atom 0))" by eval end