Theory Gabow_SCC

theory Gabow_SCC
imports Gabow_Skeleton
section ‹Enumerating the SCCs of a Graph \label{sec:scc}›
theory Gabow_SCC
imports Gabow_Skeleton
begin

text ‹
  As a first variant, we implement an algorithm that computes a list of SCCs 
  of a graph, in topological order. This is the standard variant described by
  Gabow~\cite{Gabow2000}.
›

section ‹Specification›
context fr_graph
begin
  text ‹We specify a distinct list that covers all reachable nodes and
    contains SCCs in topological order›

  definition "compute_SCC_spec ≡ SPEC (λl. 
    distinct l ∧ ⋃(set l) = E*``V0 ∧ (∀U∈set l. is_scc E U) 
    ∧ (∀i j. i<j ∧ j<length l ⟶ l!j × l!i ∩ E* = {}) )"
end

section ‹Extended Invariant›

locale cscc_invar_ext = fr_graph G
  for G :: "('v,'more) graph_rec_scheme" + 
  fixes l :: "'v set list" and D :: "'v set"
  assumes l_is_D: "⋃(set l) = D" ― ‹The output contains all done CNodes›
  assumes l_scc: "set l ⊆ Collect (is_scc E)" ― ‹The output contains only SCCs›
  assumes l_no_fwd: "⋀i j. ⟦i<j; j<length l⟧ ⟹ l!j × l!i ∩ E* = {}" 
    ― ‹The output contains no forward edges›
begin
  lemma l_no_empty: "{}∉set l" using l_scc by (auto simp: in_set_conv_decomp)
end
  
locale cscc_outer_invar_loc = outer_invar_loc G it D + cscc_invar_ext G l D
  for G :: "('v,'more) graph_rec_scheme" and it l D 
begin
  lemma locale_this: "cscc_outer_invar_loc G it l D" by unfold_locales
  lemma abs_outer_this: "outer_invar_loc G it D" by unfold_locales
end

locale cscc_invar_loc = invar_loc G v0 D0 p D pE + cscc_invar_ext G l D
  for G :: "('v,'more) graph_rec_scheme" and v0 D0 and l :: "'v set list" 
  and p D pE
begin
  lemma locale_this: "cscc_invar_loc G v0 D0 l p D pE" by unfold_locales
  lemma invar_this: "invar_loc G v0 D0 p D pE" by unfold_locales
end

context fr_graph
begin
  definition "cscc_outer_invar ≡ λit (l,D). cscc_outer_invar_loc G it l D"
  definition "cscc_invar ≡ λv0 D0 (l,p,D,pE). cscc_invar_loc G v0 D0 l p D pE"
end

section ‹Definition of the SCC-Algorithm›

context fr_graph
begin
  definition compute_SCC :: "'v set list nres" where
    "compute_SCC ≡ do {
      let so = ([],{});
      (l,D) ← FOREACHi cscc_outer_invar V0 (λv0 (l,D0). do {
        if v0∉D0 then do {
          let s = (l,initial v0 D0);

          (l,p,D,pE) ←
          WHILEIT (cscc_invar v0 D0)
            (λ(l,p,D,pE). p ≠ []) (λ(l,p,D,pE). 
          do {
            ― ‹Select edge from end of path›
            (vo,(p,D,pE)) ← select_edge (p,D,pE);

            ASSERT (p≠[]);
            case vo of 
              Some v ⇒ do {
                if v ∈ ⋃(set p) then do {
                  ― ‹Collapse›
                  RETURN (l,collapse v (p,D,pE))
                } else if v∉D then do {
                  ― ‹Edge to new node. Append to path›
                  RETURN (l,push v (p,D,pE))
                } else RETURN (l,p,D,pE)
              }
            | None ⇒ do {
                ― ‹No more outgoing edges from current node on path›
                ASSERT (pE ∩ last p × UNIV = {});
                let V = last p;
                let (p,D,pE) = pop (p,D,pE);
                let l = V#l;
                RETURN (l,p,D,pE)
              }
          }) s;
          ASSERT (p=[] ∧ pE={});
          RETURN (l,D)
        } else
          RETURN (l,D0)
      }) so;
      RETURN l
    }"
end

section ‹Preservation of Invariant Extension›
context cscc_invar_ext
begin
  lemma l_disjoint: 
    assumes A: "i<j" "j<length l"
    shows "l!i ∩ l!j = {}"
  proof (rule disjointI)
    fix u
    assume "u∈l!i" "u∈l!j"
    with l_no_fwd A show False by auto
  qed

  corollary l_distinct: "distinct l"
    using l_disjoint l_no_empty
    by (metis distinct_conv_nth inf_idem linorder_cases nth_mem)
end

context fr_graph
begin
  definition "cscc_invar_part ≡ λ(l,p,D,pE). cscc_invar_ext G l D"

  lemma cscc_invarI[intro?]:
    assumes "invar v0 D0 PDPE"
    assumes "invar v0 D0 PDPE ⟹ cscc_invar_part (l,PDPE)"
    shows "cscc_invar v0 D0 (l,PDPE)"
    using assms
    unfolding initial_def cscc_invar_def invar_def
    apply (simp split: prod.split_asm)
    apply intro_locales
    apply (simp add: invar_loc_def)
    apply (simp add: cscc_invar_part_def cscc_invar_ext_def)
    done

  thm cscc_invarI[of v_0 D_0 s l]

  lemma cscc_outer_invarI[intro?]:
    assumes "outer_invar it D"
    assumes "outer_invar it D ⟹ cscc_invar_ext G l D"
    shows "cscc_outer_invar it (l,D)"
    using assms
    unfolding initial_def cscc_outer_invar_def outer_invar_def
    apply (simp split: prod.split_asm)
    apply intro_locales
    apply (simp add: outer_invar_loc_def)
    apply (simp add: cscc_invar_ext_def)
    done

  lemma cscc_invar_initial[simp, intro!]:
    assumes A: "v0∈it" "v0∉D0"
    assumes INV: "cscc_outer_invar it (l,D0)"
    shows "cscc_invar_part (l,initial v0 D0)"
  proof -
    from INV interpret cscc_outer_invar_loc G it l D0 
      unfolding cscc_outer_invar_def by simp
    
    show ?thesis
      unfolding cscc_invar_part_def initial_def
      apply simp
      by unfold_locales
  qed

  lemma cscc_invar_pop:
    assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
    assumes "invar v0 D0 (pop (p,D,pE))"
    assumes NE[simp]: "p≠[]"
    assumes NO': "pE ∩ (last p × UNIV) = {}"
    shows "cscc_invar_part (last p # l, pop (p,D,pE))"
  proof -
    from INV interpret cscc_invar_loc G v0 D0 l p D pE 
      unfolding cscc_invar_def by simp

    have AUX_l_scc: "is_scc E (last p)"
      unfolding is_scc_pointwise
    proof safe
      {
        assume "last p = {}" thus False 
          using p_no_empty by (cases p rule: rev_cases) auto 
      }

      fix u v
      assume "u∈last p" "v∈last p"
      with p_sc[of "last p"] have "(u,v) ∈ (lvE ∩ last p × last p)*" by auto
      with lvE_ss_E show "(u,v)∈(E ∩ last p × last p)*"
        by (metis Int_mono equalityE rtrancl_mono_mp)
      
      fix u'
      assume "u'∉last p" "(u,u')∈E*" "(u',v)∈E*"

      from ‹u'∉last p› ‹u∈last p› ‹(u,u')∈E*
        and rtrancl_reachable_induct[OF order_refl lastp_un_D_closed[OF NE NO']]
      have "u'∈D" by auto
      with ‹(u',v)∈E* and rtrancl_reachable_induct[OF order_refl D_closed] 
      have "v∈D" by auto
      with ‹v∈last p› p_not_D show False by (cases p rule: rev_cases) auto
    qed

    {
      fix i j
      assume A: "i<j" "j<Suc (length l)"
      have "l ! (j - Suc 0) × (last p # l) ! i ∩ E* = {}"
      proof (rule disjointI, safe)
        fix u v
        assume "(u, v) ∈ E*" "u ∈ l ! (j - Suc 0)" "v ∈ (last p # l) ! i"
        from ‹u ∈ l ! (j - Suc 0)› A have "u∈⋃(set l)"
          by (metis Ex_list_of_length Suc_pred UnionI length_greater_0_conv 
            less_nat_zero_code not_less_eq nth_mem) 
        with l_is_D have "u∈D" by simp
        with rtrancl_reachable_induct[OF order_refl D_closed] ‹(u,v)∈E* 
        have "v∈D" by auto

        show False proof cases
          assume "i=0" hence "v∈last p" using ‹v ∈ (last p # l) ! i› by simp
          with p_not_D ‹v∈D› show False by (cases p rule: rev_cases) auto
        next
          assume "i≠0" with ‹v ∈ (last p # l) ! i› have "v∈l!(i - 1)" by auto
          with l_no_fwd[of "i - 1" "j - 1"] 
            and ‹u ∈ l ! (j - Suc 0)› ‹(u, v) ∈ E* ‹i≠0› A
          show False by fastforce 
        qed
      qed
    } note AUX_l_no_fwd = this

    show ?thesis
      unfolding cscc_invar_part_def pop_def apply simp
      apply unfold_locales
      apply clarsimp_all
      using l_is_D apply auto []

      using l_scc AUX_l_scc apply auto []

      apply (rule AUX_l_no_fwd, assumption+) []
      done
  qed

  thm cscc_invar_pop[of v_0 D_0 l p D pE]

  lemma cscc_invar_unchanged: 
    assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
    shows "cscc_invar_part (l,p',D,pE')"
    using INV unfolding cscc_invar_def cscc_invar_part_def cscc_invar_loc_def
    by simp

  corollary cscc_invar_collapse:
    assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
    shows "cscc_invar_part (l,collapse v (p',D,pE'))"
    unfolding collapse_def
    by (simp add: cscc_invar_unchanged[OF INV])

  corollary cscc_invar_push:
    assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
    shows "cscc_invar_part (l,push v (p',D,pE'))"
    unfolding push_def
    by (simp add: cscc_invar_unchanged[OF INV])


  lemma cscc_outer_invar_initial: "cscc_invar_ext G [] {}"
    by unfold_locales auto


  lemma cscc_invar_outer_newnode:
    assumes A: "v0∉D0" "v0∈it" 
    assumes OINV: "cscc_outer_invar it (l,D0)"
    assumes INV: "cscc_invar v0 D0 (l',[],D',pE)"
    shows "cscc_invar_ext G l' D'"
  proof -
    from OINV interpret cscc_outer_invar_loc G it l D0 
      unfolding cscc_outer_invar_def by simp
    from INV interpret inv: cscc_invar_loc G v0 D0 l' "[]" D' pE 
      unfolding cscc_invar_def by simp
    
    show ?thesis 
      by unfold_locales

  qed

  lemma cscc_invar_outer_Dnode:
    assumes "cscc_outer_invar it (l, D)"
    shows "cscc_invar_ext G l D"
    using assms
    by (simp add: cscc_outer_invar_def cscc_outer_invar_loc_def)
    
  lemmas cscc_invar_preserve = invar_preserve
    cscc_invar_initial
    cscc_invar_pop cscc_invar_collapse cscc_invar_push cscc_invar_unchanged 
    cscc_outer_invar_initial cscc_invar_outer_newnode cscc_invar_outer_Dnode

  text ‹On termination, the invariant implies the specification›
  lemma cscc_finI:
    assumes INV: "cscc_outer_invar {} (l,D)"
    shows fin_l_is_scc: "⟦U∈set l⟧ ⟹ is_scc E U"
    and fin_l_distinct: "distinct l"
    and fin_l_is_reachable: "⋃(set l) = E* `` V0"
    and fin_l_no_fwd: "⟦i<j; j<length l⟧ ⟹ l!j ×l!i ∩ E* = {}"
  proof -
    from INV interpret cscc_outer_invar_loc G "{}" l D
      unfolding cscc_outer_invar_def by simp

    show "⟦U∈set l⟧ ⟹ is_scc E U" using l_scc by auto

    show "distinct l" by (rule l_distinct)

    show "⋃(set l) = E* `` V0"
      using fin_outer_D_is_reachable[OF outer_invar_this] l_is_D
      by auto

    show "⟦i<j; j<length l⟧ ⟹ l!j ×l!i ∩ E* = {}"
      by (rule l_no_fwd)

  qed

end

section ‹Main Correctness Proof›

context fr_graph 
begin
  lemma invar_from_cscc_invarI: "cscc_invar v0 D0 (L,PDPE) ⟹ invar v0 D0 PDPE"
    unfolding cscc_invar_def invar_def
    apply (simp split: prod.splits)
    unfolding cscc_invar_loc_def by simp

  lemma outer_invar_from_cscc_invarI: 
    "cscc_outer_invar it (L,D) ⟹outer_invar it D"
    unfolding cscc_outer_invar_def outer_invar_def
    apply (simp split: prod.splits)
    unfolding cscc_outer_invar_loc_def by simp

  text ‹With the extended invariant and the auxiliary lemmas, the actual 
    correctness proof is straightforward:›
  theorem compute_SCC_correct: "compute_SCC ≤ compute_SCC_spec"
  proof -
    note [[goals_limit = 2]]
    note [simp del] = Union_iff

    show ?thesis
      unfolding compute_SCC_def compute_SCC_spec_def select_edge_def select_def
      apply (refine_rcg
        WHILEIT_rule[where R="inv_image (abs_wf_rel v0) snd" for v0]
        refine_vcg 
      )

      apply (vc_solve
        rec: cscc_invarI cscc_outer_invarI
        solve: cscc_invar_preserve cscc_finI
        intro: invar_from_cscc_invarI outer_invar_from_cscc_invarI
        dest!: sym[of "pop A" for A]
        simp: pE_fin'[OF invar_from_cscc_invarI] finite_V0
      )
      apply auto
      done
  qed


  text ‹Simple proof, for presentation›
  context 
    notes [refine]=refine_vcg
    notes [[goals_limit = 1]]
  begin
    theorem "compute_SCC ≤ compute_SCC_spec"
      unfolding compute_SCC_def compute_SCC_spec_def select_edge_def select_def
      by (refine_rcg 
        WHILEIT_rule[where R="inv_image (abs_wf_rel v0) snd" for v0])
      (vc_solve 
        rec: cscc_invarI cscc_outer_invarI solve: cscc_invar_preserve cscc_finI
        intro: invar_from_cscc_invarI outer_invar_from_cscc_invarI
        dest!: sym[of "pop A" for A]
        simp: pE_fin'[OF invar_from_cscc_invarI] finite_V0, auto)
  end

end


section ‹Refinement to Gabow's Data Structure›

context GS begin
  definition "seg_set_impl l u ≡ do {
    (_,res) ← WHILET
      (λ(l,_). l<u) 
      (λ(l,res). do { 
        ASSERT (l<length S); 
        let x = S!l;
        ASSERT (x∉res); 
        RETURN (Suc l,insert x res)
      }) 
      (l,{});
      
    RETURN res
  }"

  lemma seg_set_impl_aux:
    fixes l u
    shows "⟦l<u; u≤length S; distinct S⟧ ⟹ seg_set_impl l u 
    ≤ SPEC (λr. r = {S!j | j. l≤j ∧ j<u})"
    unfolding seg_set_impl_def
    apply (refine_rcg 
      WHILET_rule[where 
        I="λ(l',res). res = {S!j | j. l≤j ∧ j<l'} ∧ l≤l' ∧ l'≤u"
        and R="measure (λ(l',_). u-l')" 
      ]
      refine_vcg)

    apply (auto simp: less_Suc_eq nth_eq_iff_index_eq)
    done

  lemma (in GS_invar) seg_set_impl_correct:
    assumes "i<length B"
    shows "seg_set_impl (seg_start i) (seg_end i) ≤ SPEC (λr. r=p_α!i)"
    apply (refine_rcg order_trans[OF seg_set_impl_aux] refine_vcg)

    using assms 
    apply (simp_all add: seg_start_less_end seg_end_bound S_distinct) [3]

    apply (auto simp: p_α_def assms seg_def) []
    done

  definition "last_seg_impl 
    ≡ do {
      ASSERT (length B - 1 < length B);
      seg_set_impl (seg_start (length B - 1)) (seg_end (length B - 1))
    }"

  lemma (in GS_invar) last_seg_impl_correct:
    assumes "p_α ≠ []"
    shows "last_seg_impl ≤ SPEC (λr. r=last p_α)"
    unfolding last_seg_impl_def
    apply (refine_rcg order_trans[OF seg_set_impl_correct] refine_vcg)
    using assms apply (auto simp add: p_α_def last_conv_nth)
    done

end

context fr_graph
begin

  definition "last_seg_impl s ≡ GS.last_seg_impl s"
  lemmas last_seg_impl_def_opt = 
    last_seg_impl_def[abs_def, THEN opt_GSdef, 
      unfolded GS.last_seg_impl_def GS.seg_set_impl_def 
    GS.seg_start_def GS.seg_end_def GS_sel_simps] 
    (* TODO: Some potential for optimization here: the assertion 
      guarantees that length B - 1 + 1 = length B !*)

  lemma last_seg_impl_refine: 
    assumes A: "(s,(p,D,pE))∈GS_rel"
    assumes NE: "p≠[]"
    shows "last_seg_impl s ≤ ⇓Id (RETURN (last p))"
  proof -
    from A have 
      [simp]: "p=GS.p_α s ∧ D=GS.D_α s ∧ pE=GS.pE_α s" 
        and INV: "GS_invar s"
      by (auto simp add: GS_rel_def br_def GS_α_split)

    show ?thesis
      unfolding last_seg_impl_def[abs_def]
      apply (rule order_trans[OF GS_invar.last_seg_impl_correct])
      using INV NE
      apply (simp_all) 
      done
  qed

  definition compute_SCC_impl :: "'v set list nres" where
    "compute_SCC_impl ≡ do {
      stat_start_nres;
      let so = ([],Map.empty);
      (l,D) ← FOREACHi (λit (l,s). cscc_outer_invar it (l,oGS_α s)) 
        V0 (λv0 (l,I0). do {
          if ¬is_done_oimpl v0 I0 then do {
            let ls = (l,initial_impl v0 I0);

            (l,(S,B,I,P))←WHILEIT (λ(l,s). cscc_invar v0 (oGS_α I0) (l,GS.α s))
              (λ(l,s). ¬path_is_empty_impl s) (λ(l,s).
            do {
              ― ‹Select edge from end of path›
              (vo,s) ← select_edge_impl s;

              case vo of 
                Some v ⇒ do {
                  if is_on_stack_impl v s then do {
                    s←collapse_impl v s;
                    RETURN (l,s)
                  } else if ¬is_done_impl v s then do {
                    ― ‹Edge to new node. Append to path›
                    RETURN (l,push_impl v s)
                  } else do {
                    ― ‹Edge to done node. Skip›
                    RETURN (l,s)
                  }
                }
              | None ⇒ do {
                  ― ‹No more outgoing edges from current node on path›
                  scc ← last_seg_impl s;
                  s←pop_impl s;
                  let l = scc#l;
                  RETURN (l,s)
                }
            }) (ls);
            RETURN (l,I)
          } else RETURN (l,I0)
      }) so;
      stat_stop_nres;
      RETURN l
    }"

  lemma compute_SCC_impl_refine: "compute_SCC_impl ≤ ⇓Id compute_SCC"
  proof -
    note [refine2] = bind_Let_refine2[OF last_seg_impl_refine]

    have [refine2]: "⋀s' p D pE l' l v' v. ⟦
      (s',(p,D,pE))∈GS_rel;
      (l',l)∈Id;
      (v',v)∈Id;
      v∈⋃(set p)
    ⟧ ⟹ do { s'←collapse_impl v' s'; RETURN (l',s') } 
      ≤ ⇓(Id ×r GS_rel) (RETURN (l,collapse v (p,D,pE)))"
      apply (refine_rcg order_trans[OF collapse_refine] refine_vcg)
      apply assumption+
      apply (auto simp add: pw_le_iff refine_pw_simps)
      done

    note [[goals_limit = 1]]
    show ?thesis
      unfolding compute_SCC_impl_def compute_SCC_def
      apply (refine_rcg
        bind_refine'
        select_edge_refine push_refine 
        pop_refine
        (*collapse_refine*) 
        initial_refine
        oinitial_refine
        (*last_seg_impl_refine*)
        prod_relI IdI
        inj_on_id
      )

      apply refine_dref_type
      apply (vc_solve (nopre) solve: asm_rl I_to_outer
        simp: GS_rel_def br_def GS.α_def oGS_rel_def oGS_α_def 
        is_on_stack_refine path_is_empty_refine is_done_refine is_done_orefine
      )

      done
  qed

end

end