getting started on mutual_information_le. Several dependent type issues

teorth · teorth · commit eeb9d638f398 · 2025-07-27T12:03:07.000-07:00
diff --git a/PFR/BoundingMutual.lean b/PFR/BoundingMutual.lean
@@ -1,3 +1,4 @@
+import Mathlib.Algebra.BigOperators.Group.Multiset.Defs
 import PFR.MultiTauFunctional
 
 
@@ -15,6 +16,34 @@ import PFR.MultiTauFunctional
 universe u
 open MeasureTheory ProbabilityTheory
 
+
+theorem Fin.cast_surjective {k l:ℕ} (h: k = l) : Function.Surjective (Fin.cast h) :=
+  (rightInverse_cast h).surjective -- or `(finCongr h).surjective`
+
+theorem Fin.cast_bijective {k l:ℕ} (h: k = l) : Function.Bijective (Fin.cast h) :=
+  ⟨ cast_injective h, cast_surjective h ⟩ -- or `(finCongr h).bijective`
+
+lemma multiDist_of_cast {m m' : ℕ} (h : m' = m) {Ω : Fin m → Type*}
+    (hΩ : ∀ i, MeasureSpace (Ω i)) (hΩprob : ∀ i, IsProbabilityMeasure (hΩ i).volume)
+    {G: Type*} [MeasureableFinGroup G] (X : ∀ i, (Ω i) → G)  :
+    D[fun i ↦ X (i.cast h); fun i ↦ hΩ (i.cast h)] = D[X ; hΩ] := by
+    unfold multiDist
+    congr 1
+    . apply IdentDistrib.entropy_congr
+      exact {
+        aemeasurable_fst := by fun_prop
+        aemeasurable_snd := by fun_prop
+        map_eq := by
+          have : (fun (x: Fin m' → G) ↦ ∑ i, x i) = (fun (x: Fin m → G) ↦ ∑ i, x i) ∘ (fun (x: Fin m' → G) ↦ x ∘ (Fin.cast h.symm)) := by
+            ext x; simp; symm; apply Function.Bijective.sum_comp (Fin.cast_bijective h.symm)
+          rw [this, ←MeasureTheory.Measure.map_map] <;> try fun_prop
+          congr
+          convert MeasureTheory.Measure.pi_map_piCongrLeft (finCongr h) (fun i ↦ Measure.map (X i) ℙ)
+      }
+    congr 1
+    . rw [h]
+    convert Finset.sum_bijective _ (Fin.cast_bijective h) ?_ ?_ using 1 <;> simp
+
 -- Spelling here is *very* janky. Feel free to respell
 /-- Suppose that $X_{i,j}$, $1 \leq i,j \leq m$, are jointly independent $G$-valued random variables, such that for each $j = 1,\dots,m$, the random variables $(X_{i,j})_{i = 1}^m$
 coincide in distribution with some permutation of $X_{[m]}$.
@@ -26,16 +55,18 @@ $$ {\mathcal I} := \bbI[ \bigl(\sum_{i=1}^m X_{i,j}\bigr)_{j =1}^{m}
  Then ${\mathcal I} \leq 4 m^2 \eta k.$
 -/
 lemma mutual_information_le {G Ωₒ : Type u} [MeasureableFinGroup G] [MeasureSpace Ωₒ]
-  (p : multiRefPackage G Ωₒ) (Ω : Type u) [hΩ : MeasureSpace Ω] (X : ∀ i, Ω → G)
-  (h_indep : iIndepFun X)
-  (h_min : multiTauMinimizes p (fun _ ↦ Ω) (fun _ ↦ hΩ) X) (Ω' : Type*) [hΩ': MeasureSpace Ω']
+  {p : multiRefPackage G Ωₒ} {Ω : Type u} [hΩ : MeasureSpace Ω] [IsProbabilityMeasure hΩ.volume]
+  {X : ∀ i, Ω → G} (hX : ∀ i, Measurable (X i)) (h_indep : iIndepFun X)
+  (h_min : multiTauMinimizes p (fun _ ↦ Ω) (fun _ ↦ hΩ) X) {Ω' : Type*} [hΩ': MeasureSpace Ω']
   [IsProbabilityMeasure hΩ'.volume]
-  (X' : Fin p.m × Fin p.m → Ω' → G) (h_indep': iIndepFun X')
+  {X' : Fin p.m × Fin p.m → Ω' → G} (hX' : ∀ i j, Measurable (X' (i, j)))
+  (h_indep': iIndepFun X')
   (hperm : ∀ j, ∃ e : Fin p.m ≃ Fin p.m, IdentDistrib (fun ω ↦ (fun i ↦ X' (i, j) ω))
     (fun ω ↦ (fun i ↦ X (e i) ω))) :
   I[ fun ω ↦ ( fun j ↦ ∑ i, X' (i, j) ω) : fun ω ↦ ( fun i ↦ ∑ j, X' (i, j) ω) |
     fun ω ↦ ∑ i, ∑ j, X' (i, j) ω ] ≤ 2 * p.m * (2*p.m + 1) * p.η * D[ X; (fun _ ↦ hΩ)] := by
     have hm := p.hm
+    have hη := p.hη
     set I₀ := I[ fun ω ↦ ( fun j ↦ ∑ i, X' (i, j) ω) : fun ω ↦ ( fun i ↦ ∑ j, X' (i, j) ω) |
     fun ω ↦ ∑ i, ∑ j, X' (i, j) ω ]
     set k := D[X ; fun x ↦ hΩ]
@@ -46,13 +77,53 @@ lemma mutual_information_le {G Ωₒ : Type u} [MeasureableFinGroup G] [MeasureS
     set S : Fin p.m → Fin p.m → Ω' → G := fun i j ↦ ∑ k ∈ .Ici j, X' (i, k)
     set A : Fin p.m → ℝ := fun j ↦ D[ column j; fun _ ↦ hΩ']
       - D[ column j | fun i ↦ S i j; fun _ ↦ hΩ']
-    set B : ℝ := D[ column last; fun _ ↦ hΩ'] - D[ fun j ω ↦ ∑ i, X' (i, j) ω; fun _ ↦ hΩ']
+    set B : ℝ := D[ column last; fun _ ↦ hΩ'] - D[ fun i ω ↦ ∑ j, X' (i, j) ω; fun _ ↦ hΩ']
 
     have h1 : I₀ ≤ ∑ j ∈ .Iio last, A j + B := by
-      sorry
+      set m := p.m - 1
+      have hm' : m+1 = p.m := by omega
+      let X'' : Fin (m+1) × Fin (m+1) → Ω' → G := fun (i,j) ↦ X' (i.cast hm', j.cast hm')
+      convert cor_multiDist_chainRule _ X'' (by fun_prop) _ using 1 <;> try infer_instance
+      . simp [I₀]
+        let ι : (Fin (m+1) → G) → (Fin p.m → G) := fun f ↦ f ∘ (Fin.cast hm'.symm)
+        have hι : Function.Injective ι := by
+          intro f g h; ext i; replace h := congrFun h (i.cast hm'); simpa [ι] using h
+        observe hid : Function.Injective (id: G → G)
+        convert condMutualInfo_of_inj' _ _ _ _ hι hι hid using 2 <;> try infer_instance
+        all_goals try fun_prop
+        . ext ω j; simp [ι, X'']; symm
+          apply Function.Bijective.sum_comp (Fin.cast_bijective hm') (fun i ↦ X' (i, j) ω)
+        . ext ω i; simp [ι, X'']; symm
+          apply Function.Bijective.sum_comp (Fin.cast_bijective hm') (fun j ↦ X' (i, j) ω)
+        . ext ω
+          rw [←Multiset.sum_eq_foldr, ←Finset.sum_eq_multiset_sum, ←Finset.sum_product']
+          simp; apply Function.Bijective.sum_comp ⟨ _, _ ⟩ (fun x ↦ X' x ω)
+          . intro ⟨ i, j ⟩ ⟨ i', j' ⟩ h; simpa using h
+          intro ⟨ i, j ⟩; use ⟨ i.cast hm'.symm, j.cast hm'.symm ⟩; simp
+        simp_rw [←Multiset.sum_eq_foldr, ←Finset.sum_eq_multiset_sum]
+        fun_prop
+      . rw [add_sub_assoc]; congr 1
+        . convert Finset.sum_image (g := fun j:Fin m ↦ j.castSucc.cast hm')
+            (f := A) (s := Finset.univ) _ using 2 with _ _ n _
+          . ext ⟨ n, hn ⟩; simp [last]; constructor
+            . intro h; use ⟨ n, by omega ⟩; simp
+            rintro ⟨ ⟨ n', hn' ⟩, h ⟩; simp at h; omega
+          . simp [A, X'', column, S]; congr 1
+            . convert multiDist_of_cast hm' (fun _ ↦ hΩ') inferInstance _ with i
+              rfl
+            sorry
+          simp
+        simp [B, column, X'']; congr 1
+        . symm; convert multiDist_of_cast hm' (fun _ ↦ hΩ') inferInstance _ with i
+          rfl
+        symm; convert multiDist_of_cast hm' (fun _ ↦ hΩ') inferInstance _ with i
+        ext ω; simp
+        apply Function.Bijective.sum_comp (Fin.cast_bijective hm') (fun j ↦ X' (Fin.cast hm' i, j) ω)
+      apply ProbabilityTheory.iIndepFun.precomp _ h_indep'
+      intro ⟨ i, j ⟩ ⟨ i', j' ⟩ h; simpa using h
 
     have h2 {j : Fin p.m} (hj: j ∈ Finset.Iio last)
-      : A j ≤ p.η * ∑ i, d[ X' (i,j) # X' (i,j) | S i j ] := by
+      : A j ≤ p.η * (k + ∑ i, d[ X' (i,j) # X' (i,j) | S i j ]) := by
         sorry
 
     have h3 : B ≤ p.η * ∑ i, d[ X' (i, last) # V i ] := by
@@ -73,7 +144,7 @@ lemma mutual_information_le {G Ωₒ : Type u} [MeasureableFinGroup G] [MeasureS
         + (H[V i] - H[X' (i, last)]) / 2 := by
         sorry
 
-    have h7 : I₀/p.η ≤ p.m * ∑ i, d[X i # X i] + ∑ i, H[V i] - ∑ i, H[X i] := by
+    have h7 : I₀/p.η ≤ p.m * k + p.m * ∑ i, d[X i # X i] + ∑ i, H[V i] - ∑ i, H[X i] := by
       sorry
 
     have h8 (i: Fin p.m) : H[V i] ≤ H[ ∑ j, X j] + ∑ j, d[X' (i,j) # X' (i,j)] := by
@@ -82,9 +153,12 @@ lemma mutual_information_le {G Ωₒ : Type u} [MeasureableFinGroup G] [MeasureS
     have h9 : ∑ i, H[V i] - ∑ i, H[X i] ≤ p.m * ∑ i, d[X i # X i] + p.m * k := by
       sorry
 
-    have h10 : I₀/p.η ≤ 2 * p.m * ∑ i, d[X i # X i] + p.m * k := by linarith
+    have h10 : I₀/p.η ≤ 2 * p.m * ∑ i, d[X i # X i] + 2 * p.m * k := by linarith
 
     have h11 : ∑ i, d[X i # X i] ≤ 2 * p.m * k := by
-      sorry
+      convert multidist_ruzsa_II hm _ _ _ hX _ <;> try infer_instance
 
-    sorry
+    calc
+       _ ≤ p.η * (2 * p.m * ∑ i, d[X i # X i] + 2 * p.m * k) := by rwa [←div_le_iff₀' (by positivity)]
+      _ ≤ p.η * (2 * p.m * (2 * p.m * k) + 2 * p.m * k) := by gcongr
+      _ = _ := by ring
diff --git a/PFR/TorsionEndgame.lean b/PFR/TorsionEndgame.lean
@@ -64,7 +64,7 @@ lemma mutual_information_le_t_12 : I[Z1 : Z2 | W] ≤ 2 * p.m * (2 * p.m + 1) *
   have hm := p.hm
   let zero : Fin p.m := ⟨ 0, by linarith [hm]⟩
   have hindep_j (j: Fin p.m) : iIndepFun (fun i ↦ Y (i, j)) := indep_yj h_mes h_indep j
-  have := mutual_information_le p Ω' (fun i ω ↦ Y (i,zero) ω) (hindep_j zero) ?_ Ω' Y h_indep ?_
+  have := mutual_information_le (by fun_prop) (hindep_j zero) ?_ h_mes h_indep ?_
   . have k_eq : k = D[fun i ω ↦ Y (i, zero) ω ; fun x ↦ hΩ'] := by
       apply multiDist_copy; intro i; exact (hident i zero).symm
     rw [←k_eq, condMutualInfo_comm] at this
@@ -106,7 +106,7 @@ lemma mutual_information_le_t_23 : I[Z2 : Z3 | W] ≤ 2 * p.m * (2 * p.m + 1) *
     convert iIndepFun.finsets_comp S _ hX'_indep (by fun_prop) φ (by fun_prop) with i ω
     rw [Finset.pairwiseDisjoint_iff]; rintro _ _ _ _ ⟨ ⟨ _, _ ⟩, hij ⟩
     simp [S] at hij; cc
-  have := mutual_information_le p Ω' (fun i ω ↦ Y (i,zero) ω) (indep_yj h_mes h_indep zero) ?_ Ω' X' hX'_indep ?_
+  have := mutual_information_le (by fun_prop) (indep_yj h_mes h_indep zero) ?_ (by fun_prop) hX'_indep ?_
   . have k_eq : k = D[fun i ω ↦ Y (i, zero) ω ; fun x ↦ hΩ'] := by
       apply multiDist_copy; intro i; exact (hident i zero).symm
     rw [←k_eq] at this
@@ -158,7 +158,7 @@ lemma mutual_information_le_t_21 : I[Z1 : Z3 | W] ≤ 2 * p.m * (2 * p.m + 1) *
     rw [Finset.pairwiseDisjoint_iff]; rintro _ _ _ _ ⟨ ⟨ _, _ ⟩, hij ⟩
     simp [S] at hij; cc
   have hindep_yj (j: Fin p.m) : iIndepFun (fun i ↦ Y (i, j)) := indep_yj h_mes h_indep j
-  have := mutual_information_le p Ω' (fun i ω ↦ Y (i,zero) ω) (hindep_yj zero) ?_ Ω' X' hX'_indep ?_
+  have := mutual_information_le (by fun_prop) (hindep_yj zero) ?_ (by fun_prop) hX'_indep ?_
   . have k_eq : k = D[fun i ω ↦ Y (i, zero) ω ; fun x ↦ hΩ'] := by
       apply multiDist_copy; intro i; exact (hident i zero).symm
     rw [←k_eq,condMutualInfo_comm] at this
diff --git a/blueprint/src/chapter/torsion.tex b/blueprint/src/chapter/torsion.tex
@@ -517,7 +517,7 @@ \section{Bounding the mutual information}
 Let $\sigma = \sigma_j \colon I \to I$ be a permutation such that $X_{i,j} = X_{\sigma(i)}$, and write $X'_i := X_{i,j}$ and $Y_i := X_{i,j} + \cdots + X_{i,m}$.
 By \Cref{cond-multidist-lower-II}, we have
 \begin{align}
-  A_j & \leq \eta \sum_{i = 1}^{m} d[X_{i,j}; X_{i, j}|X_{i, j} + \cdots + X_{i,m}].\label{54a}
+  A_j & \leq \eta (k+\sum_{i = 1}^{m} d[X_{i,j}; X_{i, j}|X_{i, j} + \cdots + X_{i,m}]).\label{54a}
 \end{align}
 We similarly consider $B$.  By \Cref{multidist-perm} applied to the $m$-th column,
 \[
@@ -554,7 +554,7 @@ \section{Bounding the mutual information}
 Combining~\eqref{441},~\eqref{54a} and~\eqref{55a} with~\eqref{eq:distbnd1} and~\eqref{eq:distbnd2} (the latter two summed over $i$), we get
 \begin{align}
   \nonumber
-  \frac1{\eta} {\mathcal I} &\leq \sum_{i,j=1}^m d[X_{i,j};X_{i,j}] + \sum_{i=1}^m (\bbH[V_i] - \bbH[X_{i,m}]) \\
+  \frac1{\eta} {\mathcal I} &\leq mk + \sum_{i,j=1}^m d[X_{i,j};X_{i,j}] + \sum_{i=1}^m (\bbH[V_i] - \bbH[X_{i,m}]) \\
       &= m \sum_{i=1}^m d[X_i; X_i] + \sum_{i=1}^m \bbH[V_i] - \sum_{i=1}^m \bbH[X_i].
       \label{eq:distbnd3}
 \end{align}