spectroxide/
kompaneets.rs

1//! Kompaneets equation: Compton scattering of photons off thermal electrons.
2//!
3//! The Kompaneets equation describes the Fokker-Planck (diffusion) approximation
4//! to Compton scattering in the non-relativistic limit:
5//!
6//!   dn/dτ|_C = (θ_e / x²) ∂/∂x [x⁴ (∂n/∂x + φ n(n+1))]
7//!
8//! where φ = T_z/T_e, θ_e = kT_e/(m_e c²), τ = t/t_C.
9//!
10//! For small distortions Δn = n - n_pl, the linearized form is:
11//!
12//!   dΔn/dτ|_C = (θ_e / x²) ∂/∂x [x⁴ (∂Δn/∂x + φ(2n_pl+1)Δn)] + source
13//!
14//! Discretized with second-order conservative finite differences and solved
15//! with Crank-Nicolson time stepping → tridiagonal system.
16//!
17//! References:
18//! - Kompaneets (1957), JETP
19//! - Chluba & Sunyaev (2012), MNRAS 419, 1294 [Eq. 4]
20
21use crate::grid::FrequencyGrid;
22use crate::spectrum::planck;
23
24/// Compute the Kompaneets operator applied to Δn on the grid (test-only).
25///
26/// Returns dΔn/dτ|_C at each grid point. Production code uses the
27/// coupled inplace solver instead.
28#[cfg(test)]
29pub fn kompaneets_rhs(
30    grid: &FrequencyGrid,
31    delta_n: &[f64],
32    theta_e: f64,
33    theta_z: f64,
34) -> Vec<f64> {
35    let n = grid.n;
36    let phi = theta_z / theta_e; // T_z / T_e = 1/ρ_e
37    let mut rhs = vec![0.0; n];
38
39    // Compute fluxes at cell interfaces using the split form.
40    //
41    // Full flux: F = x⁴ [dn/dx + φ n(1+n)]
42    // With n = n_pl + Δn and the identity dn_pl/dx = -n_pl(1+n_pl):
43    //   dn/dx + φ n(1+n) = -n_pl(1+n_pl) + dΔn/dx + φ[n_pl(1+n_pl) + (2n_pl+1)Δn + Δn²]
44    //                     = (φ-1)n_pl(1+n_pl) + dΔn/dx + φ(2n_pl+1)Δn + φΔn²
45    //
46    // This form is numerically stable because each term is computed directly.
47    let mut flux = vec![0.0; n - 1];
48
49    for i in 0..n - 1 {
50        let x_half = grid.x_half[i];
51        let dx = grid.dx[i];
52
53        let n_pl_half = planck(x_half);
54        let dn_half = 0.5 * (delta_n[i] + delta_n[i + 1]);
55
56        // Derivative of the distortion at the interface
57        let ddn_dx = (delta_n[i + 1] - delta_n[i]) / dx;
58
59        // Source from T_e ≠ T_z (analytical, no cancellation)
60        let source = (phi - 1.0) * n_pl_half * (n_pl_half + 1.0);
61
62        // Linearized drift on Δn
63        let drift_linear = phi * (2.0 * n_pl_half + 1.0) * dn_half;
64
65        // Nonlinear stimulated term
66        let drift_nonlinear = phi * dn_half * dn_half;
67
68        flux[i] = x_half.powi(4) * (ddn_dx + source + drift_linear + drift_nonlinear);
69    }
70
71    // Convert flux divergence to dΔn/dτ:
72    // dΔn/dτ = (θ_e / x²) × (F_{i+1/2} - F_{i-1/2}) / Δx_cell
73    for i in 1..n - 1 {
74        let x = grid.x[i];
75        let dx_cell = 0.5 * (grid.dx[i - 1] + grid.dx[i]);
76        rhs[i] = theta_e / (x * x) * (flux[i] - flux[i - 1]) / dx_cell;
77    }
78
79    // Boundary conditions: Δn → 0 at both ends
80    rhs[0] = 0.0;
81    rhs[n - 1] = 0.0;
82
83    rhs
84}
85
86/// Build tridiagonal matrix coefficients for the linearized Kompaneets equation (test-only).
87///
88/// Production code uses the coupled inplace solver instead.
89#[cfg(test)]
90pub fn kompaneets_tridiagonal(
91    grid: &FrequencyGrid,
92    theta_e: f64,
93    theta_z: f64,
94) -> (Vec<f64>, Vec<f64>, Vec<f64>, Vec<f64>) {
95    let n = grid.n;
96    let phi = theta_z / theta_e;
97
98    let mut lower = vec![0.0; n]; // A_i (coefficient of Δn_{i-1})
99    let mut diag = vec![0.0; n]; // B_i (coefficient of Δn_i)
100    let mut upper = vec![0.0; n]; // C_i (coefficient of Δn_{i+1})
101    let mut source = vec![0.0; n]; // Source term from T_e ≠ T_z
102
103    for i in 1..n - 1 {
104        let x = grid.x[i];
105        let x2 = x * x;
106
107        // Cell widths
108        let dx_left = grid.dx[i - 1];
109        let dx_right = grid.dx[i];
110        let dx_cell = 0.5 * (dx_left + dx_right);
111
112        // Half-point values
113        let x_left = grid.x_half[i - 1];
114        let x_right = grid.x_half[i];
115
116        // Diffusion coefficients at half-points: D = θ_e x⁴
117        let d_left = theta_e * x_left.powi(4);
118        let d_right = theta_e * x_right.powi(4);
119
120        // Drift coefficients at half-points: V = θ_e x⁴ φ (2n_pl + 1)
121        // For linearization: n(n+1) ≈ n_pl(n_pl+1) + (2n_pl+1)Δn
122        let n_pl_left = planck(x_left);
123        let n_pl_right = planck(x_right);
124        let v_left = theta_e * x_left.powi(4) * phi * (2.0 * n_pl_left + 1.0);
125        let v_right = theta_e * x_right.powi(4) * phi * (2.0 * n_pl_right + 1.0);
126
127        // Finite difference: (1/x²) * [1/Δx_cell] * [F_{i+1/2} - F_{i-1/2}]
128        // F_{i+1/2} = D_{i+1/2} (Δn_{i+1} - Δn_i)/dx_right + V_{i+1/2} Δn_{i+1/2}
129        // F_{i-1/2} = D_{i-1/2} (Δn_i - Δn_{i-1})/dx_left + V_{i-1/2} Δn_{i-1/2}
130
131        // Using upwind-like averaging for drift: Δn_{i+1/2} ≈ (Δn_i + Δn_{i+1})/2
132        let coeff = 1.0 / (x2 * dx_cell);
133
134        // Contribution from right flux F_{i+1/2}
135        let a_right = d_right / dx_right; // diffusion
136        let b_right = v_right * 0.5; // drift (averaged)
137
138        // Contribution from left flux F_{i-1/2}
139        let a_left = d_left / dx_left;
140        let b_left = v_left * 0.5;
141
142        // F_{i+1/2} = D_R/dx_R * (Δn_{i+1} - Δn_i) + V_R/2 * (Δn_i + Δn_{i+1})
143        // F_{i-1/2} = D_L/dx_L * (Δn_i - Δn_{i-1}) + V_L/2 * (Δn_{i-1} + Δn_i)
144        //
145        // dΔn_i/dτ = coeff * (F_R - F_L) where coeff = 1/(x² Δx_cell)
146
147        // Coefficient of Δn_{i-1}: coeff * (D_L/dx_L - V_L/2)
148        lower[i] = coeff * (a_left - b_left);
149
150        // Coefficient of Δn_{i+1}: coeff * (D_R/dx_R + V_R/2)
151        upper[i] = coeff * (a_right + b_right);
152
153        // Coefficient of Δn_i: coeff * (-D_R/dx_R + V_R/2 - D_L/dx_L - V_L/2)
154        diag[i] = coeff * (-a_right + b_right - a_left - b_left);
155
156        // Source term from T_e ≠ T_z:
157        // F^{pl} = x⁴ [dn_pl/dx + φ n_pl(1+n_pl)] = x⁴ (φ-1) n_pl(1+n_pl)
158        // HOWEVER, the energy from this source is:
159        //   ∫ x³ source dx = -θ_e(φ-1)I₄ = θ_e(1-φ)I₄ > 0 for T_e > T_z
160        // So the source should be the DIVERGENCE of F^{pl} = x⁴(φ-1)n(1+n):
161        let source_right =
162            theta_e * x_right.powi(4) * (phi - 1.0) * n_pl_right * (n_pl_right + 1.0);
163        let source_left = theta_e * x_left.powi(4) * (phi - 1.0) * n_pl_left * (n_pl_left + 1.0);
164        source[i] = coeff * (source_right - source_left);
165    }
166
167    // Boundary conditions: Δn fixed at boundaries
168    diag[0] = 1.0;
169    diag[n - 1] = 1.0;
170
171    (lower, diag, upper, source)
172}
173
174/// Solve a tridiagonal system Ax = d using the Thomas algorithm.
175///
176/// A is given by (lower, diag, upper) vectors.
177/// Modifies `rhs` in place to contain the solution.
178/// Uses pre-allocated `work` buffer for the forward sweep (must be same length as `diag`).
179pub fn thomas_solve_inplace(
180    lower: &[f64],
181    diag: &[f64],
182    upper: &[f64],
183    rhs: &mut [f64],
184    work: &mut [f64],
185) {
186    let n = diag.len();
187    assert!(n >= 2);
188    assert!(lower.len() >= n);
189    assert!(upper.len() >= n);
190    assert!(rhs.len() >= n);
191    assert!(work.len() >= n);
192    assert!(
193        diag[0].abs() > 0.0,
194        "Thomas algorithm: zero pivot at row 0 (singular tridiagonal)"
195    );
196
197    // SAFETY: All slice accesses below are within [0, n). The asserts above
198    // guarantee sufficient length. Using get_unchecked eliminates per-element
199    // bounds checks in the forward/back sweeps — this function is called
200    // ~10^6 times per solve (Newton iters × timesteps), so the overhead matters.
201    unsafe {
202        // Forward sweep
203        let d0 = *diag.get_unchecked(0);
204        *work.get_unchecked_mut(0) = *upper.get_unchecked(0) / d0;
205        *rhs.get_unchecked_mut(0) /= d0;
206
207        for i in 1..n - 1 {
208            let denom =
209                *diag.get_unchecked(i) - *lower.get_unchecked(i) * *work.get_unchecked(i - 1);
210            *work.get_unchecked_mut(i) = *upper.get_unchecked(i) / denom;
211            *rhs.get_unchecked_mut(i) = (*rhs.get_unchecked(i)
212                - *lower.get_unchecked(i) * *rhs.get_unchecked(i - 1))
213                / denom;
214        }
215        // Last row (no upper entry needed)
216        let i = n - 1;
217        let denom = *diag.get_unchecked(i) - *lower.get_unchecked(i) * *work.get_unchecked(i - 1);
218        *work.get_unchecked_mut(i) = 0.0;
219        *rhs.get_unchecked_mut(i) =
220            (*rhs.get_unchecked(i) - *lower.get_unchecked(i) * *rhs.get_unchecked(i - 1)) / denom;
221
222        // Back substitution
223        for i in (0..n - 1).rev() {
224            *rhs.get_unchecked_mut(i) -= *work.get_unchecked(i) * *rhs.get_unchecked(i + 1);
225        }
226    }
227}
228
229/// Solve a tridiagonal system (allocating version for tests/convenience).
230pub fn thomas_solve(lower: &[f64], diag: &[f64], upper: &[f64], rhs: &mut [f64]) -> Vec<f64> {
231    let n = diag.len();
232    let mut work = vec![0.0; n];
233    let mut result = rhs.to_vec();
234    thomas_solve_inplace(lower, diag, upper, &mut result, &mut work);
235    result
236}
237
238/// Perform one Crank-Nicolson step of the linearized Kompaneets equation (test-only).
239///
240/// Production code uses the nonlinear coupled inplace solver instead.
241#[cfg(test)]
242pub fn kompaneets_step(
243    grid: &FrequencyGrid,
244    delta_n: &[f64],
245    theta_e: f64,
246    theta_z: f64,
247    dtau: f64,
248) -> Vec<f64> {
249    let n = grid.n;
250    let (lower, diag, upper, source) = kompaneets_tridiagonal(grid, theta_e, theta_z);
251
252    // Build RHS: (I + 0.5 Δτ L) Δn + Δτ S
253    let mut rhs = vec![0.0; n];
254    for i in 1..n - 1 {
255        rhs[i] = delta_n[i]
256            + 0.5
257                * dtau
258                * (lower[i] * delta_n[i.saturating_sub(1)]
259                    + diag[i] * delta_n[i]
260                    + upper[i] * delta_n[(i + 1).min(n - 1)])
261            + dtau * source[i];
262    }
263    // BCs
264    rhs[0] = 0.0;
265    rhs[n - 1] = 0.0;
266
267    // Build LHS: (I - 0.5 Δτ L)
268    let lhs_lower: Vec<f64> = lower.iter().map(|&a| -0.5 * dtau * a).collect();
269    let lhs_diag: Vec<f64> = diag
270        .iter()
271        .enumerate()
272        .map(|(i, &b)| {
273            if i == 0 || i == n - 1 {
274                1.0
275            } else {
276                1.0 - 0.5 * dtau * b
277            }
278        })
279        .collect();
280    let lhs_upper: Vec<f64> = upper.iter().map(|&c| -0.5 * dtau * c).collect();
281
282    thomas_solve(&lhs_lower, &lhs_diag, &lhs_upper, &mut rhs)
283}
284
285/// Perform one implicit step of the NONLINEAR Kompaneets equation on Δn.
286///
287/// Allocating convenience wrapper around the inplace solver. Used by tests;
288/// production code calls `kompaneets_step_coupled_inplace` directly.
289pub fn kompaneets_step_nonlinear(
290    grid: &FrequencyGrid,
291    delta_n_old: &[f64],
292    theta_e: f64,
293    theta_z: f64,
294    dtau: f64,
295) -> Vec<f64> {
296    let mut result = delta_n_old.to_vec();
297    let mut ws = KompaneetsWorkspace::new(grid);
298    let (_converged, _rho_e, _residual) = kompaneets_step_coupled_inplace(
299        grid,
300        &mut result,
301        theta_e,
302        theta_z,
303        dtau,
304        None::<&DcbrCoupling>,
305        None,
306        &mut ws,
307        0.0,
308        10,
309    );
310    result
311}
312
313/// Pre-allocated workspace for Kompaneets solver.
314///
315/// Grid-constant arrays (set once in `new()`) depend only on the frequency grid.
316/// Per-step arrays are reused across timesteps to avoid heap allocation
317/// (~15 vectors × 1000 elements, called 100K+ times per run).
318pub struct KompaneetsWorkspace {
319    // Grid-constant (length n_half = ng - 1):
320    n_pl_half: Vec<f64>,
321    np1_half: Vec<f64>,
322    twonp1_half: Vec<f64>,
323    x4_half: Vec<f64>,
324    inv_dx: Vec<f64>,
325    x4_over_dx: Vec<f64>,
326    // Grid-constant geometry (length ng, only interior points used):
327    inv_x2_dx_cell_geom: Vec<f64>,
328    // Grid-constant quadrature weights for ∫x³ f(x) dx (length ng):
329    // w_j = share of trapezoidal weight from cells on either side of grid point j.
330    quad_weights_x3: Vec<f64>,
331    // Per-step reusable buffers (length ng):
332    inv_x2_dx_cell: Vec<f64>,
333    half_dtau_coeff: Vec<f64>,
334    k_old: Vec<f64>,
335    dn_old: Vec<f64>,
336    j_lower: Vec<f64>,
337    j_diag: Vec<f64>,
338    j_upper: Vec<f64>,
339    rhs_buf: Vec<f64>,
340    thomas_work: Vec<f64>,
341    // Bordered system workspace (for coupled ρ_e solve):
342    c_vec: Vec<f64>,
343    v_buf: Vec<f64>,
344    thomas_work2: Vec<f64>,
345    // Precomputed w_j × em_j for the bordered Newton. Fixed across the
346    // Newton iteration (em_rates and weights don't change within a step),
347    // so we compute it once per step and reuse it in both the h_dcbr pass
348    // and the bp_dot_u/bp_dot_v pass.
349    wem: Vec<f64>,
350    // DC/BR old-step buffers for Crank-Nicolson DC/BR option:
351    pub(crate) dcbr_em_old: Vec<f64>,
352    pub(crate) dcbr_neq_old: Vec<f64>,
353}
354
355/// Parameters for coupling ρ_e into the bordered Newton system.
356///
357/// When passed to `kompaneets_step_coupled_inplace`, ρ_e becomes
358/// the (N+1)-th unknown solved simultaneously with Δn. The system
359/// becomes bordered tridiagonal, solved in O(N) via two Thomas solves.
360pub struct RhoECoupling {
361    /// ρ_e at the start of this timestep (before update_temperatures).
362    pub rho_e_old: f64,
363    /// Compton coupling coefficient R = (8/3)(ρ̃_γ/α_h).
364    /// This is the rate at which Compton scattering drives ρ_e → ρ_eq,
365    /// per unit Thomson optical depth.
366    pub r_compton: f64,
367    /// Source term in the ρ_e ODE: R·ρ_eq + δρ_inj.
368    pub rho_source: f64,
369    /// Adiabatic cooling rate: H·t_C.
370    pub lambda_exp: f64,
371    /// dH_dcbr/dρ_e (finite-difference derivative from update_temperatures).
372    pub dh_drho: f64,
373    /// Normalization for H_dcbr integral: 1/(4·G₃·θ_z).
374    pub h_norm: f64,
375}
376
377impl KompaneetsWorkspace {
378    /// Create workspace for a given frequency grid. Call once in solver construction.
379    pub fn new(grid: &FrequencyGrid) -> Self {
380        let ng = grid.n;
381        let n_half = ng - 1;
382
383        let mut n_pl_half = vec![0.0; n_half];
384        let mut np1_half = vec![0.0; n_half];
385        let mut twonp1_half = vec![0.0; n_half];
386        let mut x4_half = vec![0.0; n_half];
387        let mut inv_dx = vec![0.0; n_half];
388        let mut x4_over_dx = vec![0.0; n_half];
389
390        for i in 0..n_half {
391            let xh = grid.x_half[i];
392            let np = planck(xh);
393            n_pl_half[i] = np;
394            np1_half[i] = np * (np + 1.0);
395            twonp1_half[i] = 2.0 * np + 1.0;
396            let x4 = xh * xh * xh * xh;
397            x4_half[i] = x4;
398            let dx = grid.dx[i];
399            inv_dx[i] = 1.0 / dx;
400            x4_over_dx[i] = x4 / dx;
401        }
402
403        let mut inv_x2_dx_cell_geom = vec![0.0; ng];
404        for i in 1..ng - 1 {
405            let xi = grid.x[i];
406            let dx_cell = 0.5 * (grid.dx[i - 1] + grid.dx[i]);
407            inv_x2_dx_cell_geom[i] = 1.0 / (xi * xi * dx_cell);
408        }
409
410        // Quadrature weights for ∫x³ f(x) dx: trapezoidal on cell midpoints.
411        // w_j = sum of half-weights from cells adjacent to grid point j.
412        let mut quad_weights_x3 = vec![0.0; ng];
413        for j in 1..ng {
414            let w = 0.5 * grid.x_half_cubed[j - 1] * grid.dx[j - 1];
415            quad_weights_x3[j - 1] += w;
416            quad_weights_x3[j] += w;
417        }
418
419        KompaneetsWorkspace {
420            n_pl_half,
421            np1_half,
422            twonp1_half,
423            x4_half,
424            inv_dx,
425            x4_over_dx,
426            inv_x2_dx_cell_geom,
427            quad_weights_x3,
428            inv_x2_dx_cell: vec![0.0; ng],
429            half_dtau_coeff: vec![0.0; ng],
430            k_old: vec![0.0; ng],
431            dn_old: vec![0.0; ng],
432            j_lower: vec![0.0; ng],
433            j_diag: vec![0.0; ng],
434            j_upper: vec![0.0; ng],
435            rhs_buf: vec![0.0; ng],
436            thomas_work: vec![0.0; ng],
437            c_vec: vec![0.0; ng],
438            v_buf: vec![0.0; ng],
439            thomas_work2: vec![0.0; ng],
440            wem: vec![0.0; ng],
441            dcbr_em_old: vec![0.0; ng],
442            dcbr_neq_old: vec![0.0; ng],
443        }
444    }
445}
446
447/// DC/BR coupling data for implicit backward Euler within the Kompaneets step.
448///
449/// Instead of a precomputed frozen source `dcbr_source[i] = (neq-Δn_old)(1-e^{-dτ·em})`,
450/// we pass the emission rates and equilibrium targets so DC/BR is solved implicitly
451/// (backward Euler) inside the Newton iteration. This ensures DC/BR and Kompaneets
452/// see the same evolving Δn, matching CosmoTherm's approach.
453pub struct DcbrCoupling<'a> {
454    /// DC/BR absorption rates: `R[i] = (K/x³)(e^{x_e}-1)`, capped at 1e8.
455    pub emission_rates: &'a [f64],
456    /// Equilibrium target: `neq[i] = n_pl(x/ρ_eq) - n_pl(x)`.
457    pub n_eq_minus_n_pl: &'a [f64],
458    /// Analytical derivative d(emission_rates)/d(ρ_eq) at the precomputed
459    /// ρ_eq. Used in the bordered Newton c-vector so the Δn-row Jacobian
460    /// reflects how the DC/BR absorption rate shifts when the Newton
461    /// updates the ρ_e iterate. Dominant term:
462    ///   d(em)/d(ρ_eq) = -(K/x³) · exp(x/ρ_eq) · x/ρ_eq².
463    /// Pass `None` (empty slice) to retain the legacy Picard-in-ρ_e
464    /// behaviour, which is correct to O(Δρ_e per step) but only linearly
465    /// convergent at z ≳ 10⁶.
466    pub dem_drho_eq: &'a [f64],
467    /// Analytical derivative d(n_eq_minus_n_pl)/d(ρ_eq). Formula:
468    ///   d(neq)/d(ρ_eq) = n_pl(x/ρ_eq)(1 + n_pl(x/ρ_eq)) · x/ρ_eq².
469    pub dneq_drho_eq: &'a [f64],
470    /// Integrated photon source over the step: S_i = source_rate(x_i, z_mid)·dt.
471    ///
472    /// When `Some`, the Newton residual picks up the `−S_i` term directly
473    /// rather than relying on the caller to pre-add `S_i` to Δn_old. This
474    /// avoids poisoning the Kompaneets CN "old" flux with the source (the
475    /// pre-add approach effectively treats the source as injected at
476    /// `t_old`, which over-Comptonises by roughly `O(dt · ∂K/∂t)` per step
477    /// during a narrow injection window). `None` preserves the legacy
478    /// pre-add caller code path.
479    pub photon_source: Option<&'a [f64]>,
480    /// Use Crank-Nicolson (instead of backward Euler) for DC/BR.
481    /// Requires old-step DC/BR buffers in the workspace.
482    pub cn_dcbr: bool,
483}
484
485/// In-place Kompaneets + DC/BR step using pre-allocated workspace.
486///
487/// Modifies `delta_n` from old values to new values.
488/// Identical physics to `kompaneets_step_nonlinear_coupled` but avoids
489/// per-step heap allocations.
490///
491/// DC/BR is handled via backward Euler within the Newton iteration:
492/// the DC/BR residual `dτ × em × (neq - Δn_new)` and Jacobian `dτ × em`
493/// are added to the Kompaneets system. Backward Euler is unconditionally
494/// stable (amplification → 0 for stiff rates), avoiding the oscillation
495/// that Crank-Nicolson would produce for the stiff DC/BR absorption at low x.
496///
497/// `max_dn_abs` is the current max|Δn| used for adaptive Newton tolerance.
498/// Pass 0.0 for the tightest tolerance (equivalent to old fixed 1e-14).
499///
500/// Returns `(converged, rho_e, last_correction)`. **Note:** the third field
501/// is the size of the last Newton *correction* `|δx|`, not the residual
502/// `|F(x)|`. At convergence `|δx| < tol` by construction; if the Newton
503/// loop exits via `max_newton_iter`, `last_correction` is the final step
504/// the solver attempted, which only upper-bounds the residual for
505/// contractive iterations. Treat it as a diagnostic, not a proof of
506/// small residual.
507pub fn kompaneets_step_coupled_inplace(
508    grid: &FrequencyGrid,
509    delta_n: &mut [f64],
510    theta_e: f64,
511    theta_z: f64,
512    dtau: f64,
513    dcbr: Option<&DcbrCoupling>,
514    rho_coupling: Option<&RhoECoupling>,
515    ws: &mut KompaneetsWorkspace,
516    max_dn_abs: f64,
517    max_newton_iter: usize,
518) -> (bool, f64, f64) {
519    let ng = grid.n;
520    // θ_e for the CN "old" flux uses the step-start ρ_e when provided by
521    // the caller (coupled mode). Without a `RhoECoupling` we assume T_e is
522    // constant over the step and use the passed θ_e for both CN half-steps;
523    // callers that evolve T_e via `update_temperatures` but do not enable the
524    // bordered Newton should pass `rho_coupling = Some(...)` to preserve
525    // time-centering (the `dh_drho`/`lambda_exp`/etc. fields can be zeroed).
526    let theta_e_old = if let Some(rc) = rho_coupling {
527        theta_z * rc.rho_e_old
528    } else {
529        theta_e
530    };
531
532    // Assert workspace sizes so the compiler can elide per-element bounds checks
533    // in the hot inner loops. All workspace arrays are allocated to ng or ng-1
534    // in KompaneetsWorkspace::new(), but LLVM can't prove this across function
535    // boundaries without these hints.
536    let n_half = ng - 1;
537    assert!(delta_n.len() >= ng);
538    assert!(ws.dn_old.len() >= ng);
539    assert!(ws.k_old.len() >= ng);
540    assert!(ws.rhs_buf.len() >= ng);
541    assert!(ws.j_lower.len() >= ng);
542    assert!(ws.j_diag.len() >= ng);
543    assert!(ws.j_upper.len() >= ng);
544    assert!(ws.c_vec.len() >= ng);
545    assert!(ws.wem.len() >= ng);
546    assert!(ws.inv_x2_dx_cell.len() >= ng);
547    assert!(ws.inv_x2_dx_cell_geom.len() >= ng);
548    assert!(ws.half_dtau_coeff.len() >= ng);
549    assert!(ws.quad_weights_x3.len() >= ng);
550    assert!(ws.v_buf.len() >= ng);
551    assert!(ws.n_pl_half.len() >= n_half);
552    assert!(ws.np1_half.len() >= n_half);
553    assert!(ws.twonp1_half.len() >= n_half);
554    assert!(ws.x4_half.len() >= n_half);
555    assert!(ws.inv_dx.len() >= n_half);
556    assert!(ws.x4_over_dx.len() >= n_half);
557
558    // Debug-mode input validation: catch NaN/Inf and unphysical parameters
559    // before they propagate through unsafe blocks. Zero cost in release.
560    debug_assert!(theta_e.is_finite() && theta_e > 0.0, "theta_e={theta_e}");
561    debug_assert!(theta_z.is_finite() && theta_z > 0.0, "theta_z={theta_z}");
562    debug_assert!(dtau.is_finite() && dtau >= 0.0, "dtau={dtau}");
563    debug_assert!(max_dn_abs.is_finite(), "max_dn_abs={max_dn_abs}");
564    debug_assert!(ng >= 3, "grid too small: ng={ng}");
565
566    // For the "old" part of CN, use the step-start ρ_e.
567    // When coupled, rho_e_old is the pre-update value; otherwise use theta_e/theta_z.
568    let phi_old = if let Some(rc) = rho_coupling {
569        1.0 / rc.rho_e_old
570    } else {
571        theta_z / theta_e
572    };
573
574    let mut rho_e = theta_e / theta_z; // initial guess (from update_temperatures)
575    let mut phi = 1.0 / rho_e;
576
577    // Save old delta_n for CN formula
578    ws.dn_old[..ng].copy_from_slice(&delta_n[..ng]);
579
580    // Fill per-step coefficients with θ_e_old for the K_old precompute
581    // below. Inside the Newton loop these are overwritten with θ_e
582    // evaluated at the current ρ_e iterate, so K_new and its Jacobian stay
583    // time-centred with the evolving electron temperature (genuine CN in θ_e,
584    // not frozen-θ_e). In non-coupled mode rho_e is constant within a step,
585    // so the refresh reproduces theta_e_old and is effectively a no-op.
586    for i in 1..ng - 1 {
587        ws.inv_x2_dx_cell[i] = theta_e_old * ws.inv_x2_dx_cell_geom[i];
588        ws.half_dtau_coeff[i] = 0.5 * dtau * ws.inv_x2_dx_cell[i];
589    }
590
591    // Precompute K(delta_n_old, φ_old) for Crank-Nicolson
592    // SAFETY: i ranges over 1..ng-1; all workspace arrays have length >= ng,
593    // half-point arrays have length >= ng-1. Asserted above.
594    for i in 1..ng - 1 {
595        unsafe {
596            let dn_old_im1 = *ws.dn_old.get_unchecked(i - 1);
597            let dn_old_i = *ws.dn_old.get_unchecked(i);
598            let dn_old_ip1 = *ws.dn_old.get_unchecked(i + 1);
599            let dn_half_l = 0.5 * (dn_old_im1 + dn_old_i);
600            let dn_half_r = 0.5 * (dn_old_i + dn_old_ip1);
601            let ddn_dx_l = (dn_old_i - dn_old_im1) * *ws.inv_dx.get_unchecked(i - 1);
602            let ddn_dx_r = (dn_old_ip1 - dn_old_i) * *ws.inv_dx.get_unchecked(i);
603
604            let f_l = *ws.x4_half.get_unchecked(i - 1)
605                * ((phi_old - 1.0) * *ws.np1_half.get_unchecked(i - 1)
606                    + ddn_dx_l
607                    + phi_old * *ws.twonp1_half.get_unchecked(i - 1) * dn_half_l
608                    + phi_old * dn_half_l * dn_half_l);
609            let f_r = *ws.x4_half.get_unchecked(i)
610                * ((phi_old - 1.0) * *ws.np1_half.get_unchecked(i)
611                    + ddn_dx_r
612                    + phi_old * *ws.twonp1_half.get_unchecked(i) * dn_half_r
613                    + phi_old * dn_half_r * dn_half_r);
614            *ws.k_old.get_unchecked_mut(i) = *ws.inv_x2_dx_cell.get_unchecked(i) * (f_r - f_l);
615        }
616    }
617
618    // Assert DC/BR slice lengths so bounds checks are elided in the inner loop.
619    if let Some(dc) = dcbr {
620        assert!(dc.emission_rates.len() >= ng);
621        assert!(dc.n_eq_minus_n_pl.len() >= ng);
622        assert!(dc.dem_drho_eq.len() >= ng);
623        assert!(dc.dneq_drho_eq.len() >= ng);
624        if let Some(ps) = dc.photon_source {
625            assert!(ps.len() >= ng);
626        }
627        if dc.cn_dcbr {
628            assert!(ws.dcbr_em_old.len() >= ng);
629            assert!(ws.dcbr_neq_old.len() >= ng);
630        }
631    }
632
633    // Hoist Option dispatch out of the inner loop. Extract DC/BR slice references
634    // and mode flags once so the hot loop body has no per-point Option checks.
635    // The empty slices are never indexed (guarded by has_dcbr / has_phot_src).
636    static EMPTY: [f64; 0] = [];
637    let (has_dcbr, use_cn, em_rates, neq_vals, dem_drho, dneq_drho, phot_src_vals): (
638        bool,
639        bool,
640        &[f64],
641        &[f64],
642        &[f64],
643        &[f64],
644        &[f64],
645    ) = if let Some(dc) = dcbr {
646        (
647            true,
648            dc.cn_dcbr,
649            dc.emission_rates,
650            dc.n_eq_minus_n_pl,
651            dc.dem_drho_eq,
652            dc.dneq_drho_eq,
653            dc.photon_source.unwrap_or(&EMPTY),
654        )
655    } else {
656        (false, false, &EMPTY, &EMPTY, &EMPTY, &EMPTY, &EMPTY)
657    };
658    let has_phot_src = !phot_src_vals.is_empty();
659    let has_rho_coupling = rho_coupling.is_some();
660
661    // Precompute w_j × em_j once per step (only for the bordered solve).
662    // em_rates and quad_weights_x3 are constant across Newton iterations;
663    // caching this product eliminates ~2× max_newton_iter redundant
664    // multiplies per grid point (h_dcbr pass + b'·u / b'·v pass).
665    if has_dcbr && has_rho_coupling {
666        for j in 0..ng {
667            ws.wem[j] = ws.quad_weights_x3[j] * em_rates[j];
668        }
669    }
670
671    // Newton iteration for the coupled system:
672    //   Δn_new - Δn_old = dτ/2 (K_new(φ) + K_old(φ_old))  [CN for Kompaneets]
673    //                    + dτ · em · (neq - Δn_new)          [BE for DC/BR]
674    let mut converged = false;
675    let mut last_max_delta: f64 = f64::NAN;
676    for _newton in 0..max_newton_iter {
677        // Note on θ_e time-centering (audit H5): in principle a fully time-
678        // centred CN-in-θ_e would refresh `inv_x2_dx_cell[i]` and
679        // `half_dtau_coeff[i]` with θ_e evaluated at the current ρ_e iterate
680        // each Newton pass. We do not do this. Refreshing changes the
681        // residual without updating `c_vec` to include the matching
682        // ∂(inv_x2_dx_cell)/∂ρ_e contribution, which breaks the bordered
683        // Newton Jacobian and can produce catastrophic non-convergence
684        // (observed: |Δρ/ρ| ~ 1 for post-recombination scenarios). Instead,
685        // the prefactor is held at θ_e_old for the whole step; φ = 1/ρ_e
686        // inside the flux is still iterated and the Jacobian is consistent.
687        // The time-centring error on θ_e is O(Δρ_e × θ_e) ~ 10⁻⁷ at z ~ 2×10⁶
688        // with |Δρ_e| < 10⁻³ — well below the CN truncation floor. Restoring
689        // a genuine CN in θ_e would require extending `c_vec` with the
690        // ∂(inv_x2_dx_cell)/∂ρ_e contribution.
691
692        // Boundary conditions: zero Kompaneets flux, but allow DC/BR relaxation.
693        for &bi in &[0_usize, ng - 1] {
694            let (dcbr_res, dcbr_jac) = if has_dcbr {
695                let em = em_rates[bi];
696                let neq = neq_vals[bi];
697                if use_cn {
698                    let em_old = ws.dcbr_em_old[bi];
699                    let neq_old = ws.dcbr_neq_old[bi];
700                    let old_part = 0.5 * dtau * em_old * (neq_old - ws.dn_old[bi]);
701                    (
702                        old_part + 0.5 * dtau * em * (neq - delta_n[bi]),
703                        0.5 * dtau * em,
704                    )
705                } else {
706                    (dtau * em * (neq - delta_n[bi]), dtau * em)
707                }
708            } else {
709                (0.0, 0.0)
710            };
711            let phot_src = if has_phot_src { phot_src_vals[bi] } else { 0.0 };
712            ws.rhs_buf[bi] = -(delta_n[bi] - ws.dn_old[bi] - dcbr_res - phot_src);
713            ws.j_diag[bi] = 1.0 + dcbr_jac;
714            ws.j_lower[bi] = 0.0;
715            ws.j_upper[bi] = 0.0;
716        }
717
718        // SAFETY: i ranges over 1..ng-1; all workspace/delta_n arrays have length >= ng,
719        // half-point arrays have length >= ng-1. DC/BR arrays (when has_dcbr) >= ng.
720        // All asserted at function entry.
721        for i in 1..ng - 1 {
722            unsafe {
723                let dn_im1 = *delta_n.get_unchecked(i - 1);
724                let dn_i = *delta_n.get_unchecked(i);
725                let dn_ip1 = *delta_n.get_unchecked(i + 1);
726                let dn_half_l = 0.5 * (dn_im1 + dn_i);
727                let dn_half_r = 0.5 * (dn_i + dn_ip1);
728
729                // Compute K(delta_n, φ) at interior point i (CN "new" part)
730                let ddn_dx_l = (dn_i - dn_im1) * *ws.inv_dx.get_unchecked(i - 1);
731                let ddn_dx_r = (dn_ip1 - dn_i) * *ws.inv_dx.get_unchecked(i);
732                let x4h_l = *ws.x4_half.get_unchecked(i - 1);
733                let x4h_r = *ws.x4_half.get_unchecked(i);
734                let f_l = x4h_l
735                    * ((phi - 1.0) * *ws.np1_half.get_unchecked(i - 1)
736                        + ddn_dx_l
737                        + phi * *ws.twonp1_half.get_unchecked(i - 1) * dn_half_l
738                        + phi * dn_half_l * dn_half_l);
739                let f_r = x4h_r
740                    * ((phi - 1.0) * *ws.np1_half.get_unchecked(i)
741                        + ddn_dx_r
742                        + phi * *ws.twonp1_half.get_unchecked(i) * dn_half_r
743                        + phi * dn_half_r * dn_half_r);
744                let inv_x2dc = *ws.inv_x2_dx_cell.get_unchecked(i);
745                let k_i = inv_x2dc * (f_r - f_l);
746
747                // DC/BR: backward Euler or Crank-Nicolson within the Newton iteration.
748                //
749                // `em_rates` and `neq_vals` are precomputed by the solver
750                // at the step-start ρ_eq and held fixed across Newton
751                // iterations. `dem_drho`/`dneq_drho` carry their analytical
752                // derivatives w.r.t. ρ_eq; we use them below to close the
753                // Δn-row Jacobian on ρ_e (c_vec). For CN mode the "old"
754                // half is frozen — its ρ_eq derivative does not contribute.
755                let (dcbr_residual, dcbr_jac, dcbr_crho) = if has_dcbr {
756                    let em = *em_rates.get_unchecked(i);
757                    let neq = *neq_vals.get_unchecked(i);
758                    let dem = *dem_drho.get_unchecked(i);
759                    let dneq = *dneq_drho.get_unchecked(i);
760                    // d(dcbr_residual)/d(ρ_eq) from the "new" half, with BE
761                    // weight 1 and CN weight 0.5.
762                    let new_crho = -dtau * (dem * (neq - dn_i) + em * dneq);
763                    if use_cn {
764                        let em_old = *ws.dcbr_em_old.get_unchecked(i);
765                        let neq_old = *ws.dcbr_neq_old.get_unchecked(i);
766                        let old_part =
767                            0.5 * dtau * em_old * (neq_old - *ws.dn_old.get_unchecked(i));
768                        let new_res = 0.5 * dtau * em * (neq - dn_i);
769                        (old_part + new_res, 0.5 * dtau * em, 0.5 * new_crho)
770                    } else {
771                        (dtau * em * (neq - dn_i), dtau * em, new_crho)
772                    }
773                } else {
774                    (0.0, 0.0, 0.0)
775                };
776
777                // Residual: CN for Kompaneets + BE for DC/BR + integrated
778                // source over the step. When `photon_source` is provided
779                // through `DcbrCoupling`, S_i enters as a known constant
780                // (no Jacobian contribution), and the Kompaneets CN "old"
781                // flux is computed from the un-injected Δn_old — so the
782                // source interacts with DC/BR absorption and Comptonisation
783                // over the full step rather than being treated as an
784                // impulse at t_old.
785                let phot_src = if has_phot_src {
786                    *phot_src_vals.get_unchecked(i)
787                } else {
788                    0.0
789                };
790                let residual_i = dn_i
791                    - *ws.dn_old.get_unchecked(i)
792                    - 0.5 * dtau * (k_i + *ws.k_old.get_unchecked(i))
793                    - dcbr_residual
794                    - phot_src;
795                *ws.rhs_buf.get_unchecked_mut(i) = -residual_i;
796
797                // Jacobian: CN (halved) for Kompaneets + BE diagonal for DC/BR
798                let hdc = *ws.half_dtau_coeff.get_unchecked(i);
799                let n_l = *ws.n_pl_half.get_unchecked(i - 1) + dn_half_l;
800                let n_r = *ws.n_pl_half.get_unchecked(i) + dn_half_r;
801                let a_l = *ws.x4_over_dx.get_unchecked(i - 1);
802                let b_l = x4h_l * phi * (2.0 * n_l + 1.0) * 0.5;
803                let a_r = *ws.x4_over_dx.get_unchecked(i);
804                let b_r = x4h_r * phi * (2.0 * n_r + 1.0) * 0.5;
805
806                *ws.j_lower.get_unchecked_mut(i) = -hdc * (a_l - b_l);
807                *ws.j_diag.get_unchecked_mut(i) = 1.0 - hdc * (-a_r + b_r - a_l - b_l) + dcbr_jac;
808                *ws.j_upper.get_unchecked_mut(i) = -hdc * (a_r + b_r);
809
810                // Column vector c_i = dR_i/dρ_e for bordered system.
811                // Two contributions:
812                //   (a) Kompaneets flux: ∂F/∂ρ_e through the n_pl(1+n_pl)/ρ²
813                //       piece (analytical; already in the Planck-subtracted
814                //       split form used here).
815                //   (b) DC/BR residual: ∂(dcbr_residual)/∂ρ_eq, carried in
816                //       `dcbr_crho` above and plumbed via `dem_drho` /
817                //       `dneq_drho`. Closing this piece restores formal
818                //       quadratic Newton convergence in the ρ_e direction
819                //       when DC/BR is strong (high z, photon-injection burst).
820                if has_rho_coupling {
821                    let inv_rho2 = 1.0 / (rho_e * rho_e);
822                    let dfdr_l = -x4h_l * inv_rho2 * n_l * (1.0 + n_l);
823                    let dfdr_r = -x4h_r * inv_rho2 * n_r * (1.0 + n_r);
824                    let c_kompaneets = -0.5 * dtau * inv_x2dc * (dfdr_r - dfdr_l);
825                    *ws.c_vec.get_unchecked_mut(i) = c_kompaneets + dcbr_crho;
826                }
827            }
828        }
829
830        if let Some(rc) = rho_coupling {
831            // Bordered solve: (T, c; b', d) × (δΔn; δρ_e) = (r; r_ρ)
832            ws.c_vec[0] = 0.0;
833            ws.c_vec[ng - 1] = 0.0;
834
835            // Compute H_dcbr = h_norm × Σ wem_j × (neq_j − Δn_j),
836            // where wem_j = w_j × em_j was precomputed before the Newton loop.
837            let mut h_dcbr = 0.0;
838            if has_dcbr {
839                for j in 0..ng {
840                    h_dcbr += ws.wem[j] * (neq_vals[j] - delta_n[j]);
841                }
842                h_dcbr *= rc.h_norm;
843            }
844
845            // ρ_e residual: dρ_e/dτ = R[(source − H_dcbr) − ρ_e] − H·t_C·ρ_e
846            // r_ρ = -(ρ_e − ρ_e_old) + dτ·{R·(source − h_dcbr − ρ_e) − H·t_C·ρ_e}
847            let rhs_rho = -(rho_e - rc.rho_e_old)
848                + dtau * (rc.r_compton * (rc.rho_source - h_dcbr - rho_e) - rc.lambda_exp * rho_e);
849
850            // d = dR_ρ/dρ_e = 1 + dτ·(R·(1 + dH/dρ_e) + H·t_C)
851            let d_rho = 1.0 + dtau * (rc.r_compton * (1.0 + rc.dh_drho) + rc.lambda_exp);
852
853            // Step 1: T·u = r (standard Thomas, result in rhs_buf)
854            thomas_solve_inplace(
855                &ws.j_lower,
856                &ws.j_diag,
857                &ws.j_upper,
858                &mut ws.rhs_buf,
859                &mut ws.thomas_work,
860            );
861
862            // Step 2: T·v = c (Thomas on c_vec, result in v_buf)
863            ws.v_buf[..ng].copy_from_slice(&ws.c_vec[..ng]);
864            thomas_solve_inplace(
865                &ws.j_lower,
866                &ws.j_diag,
867                &ws.j_upper,
868                &mut ws.v_buf,
869                &mut ws.thomas_work2,
870            );
871
872            // Step 3: b'·u and b'·v dot products.
873            // b'_j = ∂R_ρ/∂Δn_j = −dτ · R · h_norm · wem_j.
874            // Sign: h_dcbr = h_norm·Σ wem·(neq − Δn) ⇒ ∂h_dcbr/∂Δn_j = −h_norm·wem_j.
875            // R_ρ = (ρ − ρ_old) − dτ·[R·(source − h_dcbr − ρ) − Λ·ρ], so
876            // ∂R_ρ/∂Δn_j = −dτ·R·(−∂h_dcbr/∂Δn_j) = −dτ·R·h_norm·wem_j.
877            // Pull the scalar (−dτ·R·h_norm) out of the loop and apply once
878            // after the reductions — saves ng muls and keeps the inner loop
879            // a clean fused-multiply-add pair that LLVM can vectorise.
880            let mut bp_dot_u = 0.0;
881            let mut bp_dot_v = 0.0;
882            if has_dcbr {
883                for j in 0..ng {
884                    let wem_j = ws.wem[j];
885                    bp_dot_u += wem_j * ws.rhs_buf[j];
886                    bp_dot_v += wem_j * ws.v_buf[j];
887                }
888                let scale = -dtau * rc.r_compton * rc.h_norm;
889                bp_dot_u *= scale;
890                bp_dot_v *= scale;
891            }
892
893            // Step 4: δρ_e = (r_ρ − b'·u) / (d − b'·v)
894            //
895            // If the bordered-system determinant collapses the problem is
896            // degenerate (no unique ρ_e correction). We signal this by
897            // producing a NaN so the outer Newton loop breaks immediately
898            // and the (false, _, NaN) return propagates non-convergence —
899            // masking the failure as `delta_rho = 0` would silently report
900            // convergence at max_delta below tol.
901            let denom = d_rho - bp_dot_v;
902            let delta_rho = if denom.abs() > 1e-30 {
903                (rhs_rho - bp_dot_u) / denom
904            } else {
905                f64::NAN
906            };
907
908            // Step 5: δΔn = u − v·δρ_e
909            let mut max_delta: f64 = 0.0;
910            for j in 0..ng {
911                let correction = ws.rhs_buf[j] - ws.v_buf[j] * delta_rho;
912                delta_n[j] += correction;
913                let d = correction.abs();
914                if d > max_delta {
915                    max_delta = d;
916                }
917            }
918            rho_e += delta_rho;
919            phi = 1.0 / rho_e;
920
921            let rho_delta = delta_rho.abs();
922            if rho_delta > max_delta {
923                max_delta = rho_delta;
924            }
925            last_max_delta = max_delta;
926
927            if !max_delta.is_finite() {
928                break;
929            }
930            let tol = 1e-8 * max_dn_abs + 1e-14;
931            if max_delta < tol {
932                converged = true;
933                break;
934            }
935        } else {
936            // Standard tridiagonal solve (no ρ_e coupling)
937            thomas_solve_inplace(
938                &ws.j_lower,
939                &ws.j_diag,
940                &ws.j_upper,
941                &mut ws.rhs_buf,
942                &mut ws.thomas_work,
943            );
944
945            let mut max_delta: f64 = 0.0;
946            for i in 0..ng {
947                delta_n[i] += ws.rhs_buf[i];
948                let d = ws.rhs_buf[i].abs();
949                if d > max_delta {
950                    max_delta = d;
951                }
952            }
953            last_max_delta = max_delta;
954
955            if !max_delta.is_finite() {
956                break;
957            }
958            let tol = 1e-8 * max_dn_abs + 1e-14;
959            if max_delta < tol {
960                converged = true;
961                break;
962            }
963        }
964    }
965    (converged, rho_e, last_max_delta)
966}
967
968#[cfg(test)]
969mod tests {
970    use super::*;
971    use crate::grid::FrequencyGrid;
972
973    #[test]
974    fn test_kompaneets_preserves_planck() {
975        // If Δn = 0 and T_e = T_z, Kompaneets should not generate distortion
976        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
977        let delta_n = vec![0.0; grid.n];
978        let theta_e = 1e-6; // some temperature
979        let theta_z = theta_e; // equilibrium
980
981        let result = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.01);
982
983        let max_dn: f64 = result
984            .iter()
985            .map(|x| x.abs())
986            .fold(0.0, |a, b| if b.is_nan() { f64::NAN } else { a.max(b) });
987        assert!(
988            max_dn < 1e-12,
989            "Kompaneets should preserve Planck: max|Δn| = {max_dn}"
990        );
991    }
992
993    #[test]
994    fn test_kompaneets_photon_number_conservation() {
995        // Kompaneets scattering conserves photon number: ∫x² dn/dτ dx = 0
996        // Start with a small perturbation and evolve
997        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 1000);
998        let theta_e = 1e-6;
999        let theta_z = theta_e;
1000
1001        // Small Gaussian perturbation centered at x=3
1002        let mut delta_n: Vec<f64> = grid
1003            .x
1004            .iter()
1005            .map(|&x| 1e-4 * (-(x - 3.0).powi(2) / 0.5).exp())
1006            .collect();
1007
1008        let n_before: f64 = (1..grid.n)
1009            .map(|i| {
1010                let dx = grid.dx[i - 1];
1011                let x_mid = grid.x_half[i - 1];
1012                let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1013                x_mid * x_mid * dn_mid * dx
1014            })
1015            .sum();
1016
1017        // Evolve for several steps
1018        for _ in 0..10 {
1019            delta_n = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.001);
1020        }
1021
1022        let n_after: f64 = (1..grid.n)
1023            .map(|i| {
1024                let dx = grid.dx[i - 1];
1025                let x_mid = grid.x_half[i - 1];
1026                let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1027                x_mid * x_mid * dn_mid * dx
1028            })
1029            .sum();
1030
1031        let rel_change = (n_after - n_before).abs() / n_before.abs().max(1e-20);
1032        // Kompaneets is a number-conserving operator — boundary leakage
1033        // should be tiny on a grid extending to x_max=30
1034        assert!(
1035            rel_change < 1e-4,
1036            "Photon number not conserved: ΔN/N = {rel_change:.2e} (threshold 1e-4), \
1037             before={n_before:.4e}, after={n_after:.4e}"
1038        );
1039    }
1040
1041    #[test]
1042    fn test_kompaneets_energy_conservation() {
1043        // Kompaneets scattering with T_e = T_z should conserve energy: ∫x³ Δn dx = 0.
1044        // Energy is only redistributed (not created) when there's no T_e−T_z offset.
1045        // Start with a Gaussian perturbation and evolve with T_e = T_z.
1046        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 1000);
1047        let theta = 1e-6;
1048
1049        // Gaussian perturbation at x=5
1050        let mut delta_n: Vec<f64> = grid
1051            .x
1052            .iter()
1053            .map(|&x| 1e-4 * (-(x - 5.0).powi(2) / 2.0).exp())
1054            .collect();
1055
1056        let energy_before: f64 = (1..grid.n)
1057            .map(|i| {
1058                let dx = grid.dx[i - 1];
1059                let x_mid = grid.x_half[i - 1];
1060                let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1061                x_mid.powi(3) * dn_mid * dx
1062            })
1063            .sum();
1064
1065        for _ in 0..10 {
1066            delta_n = kompaneets_step(&grid, &delta_n, theta, theta, 0.001);
1067        }
1068
1069        let energy_after: f64 = (1..grid.n)
1070            .map(|i| {
1071                let dx = grid.dx[i - 1];
1072                let x_mid = grid.x_half[i - 1];
1073                let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1074                x_mid.powi(3) * dn_mid * dx
1075            })
1076            .sum();
1077
1078        // With T_e = T_z, pure Kompaneets conserves energy to first order.
1079        // The nonlinear Δn² term introduces O(Δn²) energy change, but for
1080        // Δn ~ 1e-4 the correction is O(1e-8), far below 1e-4.
1081        let rel_change = (energy_after - energy_before).abs() / energy_before.abs().max(1e-20);
1082        assert!(
1083            rel_change < 1e-4,
1084            "Kompaneets energy not conserved with T_e=T_z: ΔE/E = {rel_change:.2e}, \
1085             before={energy_before:.4e}, after={energy_after:.4e}"
1086        );
1087    }
1088
1089    /// Quantitative Kompaneets check in the y-regime: injecting energy via
1090    /// T_e > T_z for one Thomson time should produce Δρ/ρ ≈ 4·y·G₃ where
1091    /// y = (θ_e − θ_z)·dτ is the standard y-parameter. This catches
1092    /// order-of-magnitude errors in the flux split, grid geometry, or time
1093    /// centring that the sign-only `test_kompaneets_te_gt_tz_positive_drho_all_solvers`
1094    /// would miss (audit M2).
1095    #[test]
1096    fn test_kompaneets_y_distortion_magnitude() {
1097        // Deep y-era, linear regime: small δρ_e, small dτ, no DC/BR, no T_e
1098        // evolution. δρ_e = 1e-3 keeps us well inside the Taylor branch where
1099        // the analytical Δρ/ρ = 4y expression is accurate.
1100        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 2000);
1101        let delta_n = vec![0.0; grid.n];
1102        let theta_z = 1e-6;
1103        let delta_rho_e = 1e-3_f64;
1104        let theta_e = theta_z * (1.0 + delta_rho_e);
1105        let dtau = 1.0_f64;
1106
1107        // Expected: pure y-distortion with y = (θ_e − θ_z)·dτ and Δρ/ρ = 4y.
1108        let y_expected = (theta_e - theta_z) * dtau;
1109        let drho_expected = 4.0 * y_expected;
1110
1111        // Solve.
1112        let result = kompaneets_step_nonlinear(&grid, &delta_n, theta_e, theta_z, dtau);
1113        let drho = crate::spectrum::delta_rho_over_rho(&grid.x, &result);
1114
1115        let rel_err = (drho - drho_expected).abs() / drho_expected.abs();
1116        eprintln!(
1117            "Kompaneets y-magnitude: drho={drho:.4e}, expected={drho_expected:.4e}, \
1118             rel_err={rel_err:.2e}"
1119        );
1120        // 5% tolerance allows for finite grid resolution, the small Δn² term,
1121        // and truncation in the integration stencil. A factor-of-10⁶ bug in
1122        // the flux split (the class this test is meant to catch) would show
1123        // up as rel_err ≫ 1.
1124        assert!(
1125            rel_err < 0.05,
1126            "Kompaneets y-magnitude off: drho={drho:.4e}, expected={drho_expected:.4e}, \
1127             rel_err={rel_err:.2e}"
1128        );
1129    }
1130
1131    /// When T_e > T_z, all Kompaneets solver variants must produce Δρ/ρ > 0
1132    /// (energy flows from electrons to photons via upscattering).
1133    /// Tests CN (kompaneets_step), backward Euler (kompaneets_tridiagonal + Thomas),
1134    /// and nonlinear (kompaneets_step_nonlinear) in a single test.
1135    #[test]
1136    fn test_kompaneets_te_gt_tz_positive_drho_all_solvers() {
1137        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 2000);
1138        let delta_n = vec![0.0; grid.n];
1139        let theta_z = 1e-4;
1140        let theta_e = theta_z * 1.001;
1141
1142        // 1. Crank-Nicolson (kompaneets_step)
1143        let result_cn = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.1);
1144        let drho_cn = crate::spectrum::delta_rho_over_rho(&grid.x, &result_cn);
1145        assert!(drho_cn > 0.0, "CN: Δρ/ρ > 0 expected, got {drho_cn:.4e}");
1146
1147        // 2. Backward Euler (kompaneets_tridiagonal + Thomas solve)
1148        let (lower, diag, upper, source) = kompaneets_tridiagonal(&grid, theta_e, theta_z);
1149        let dtau = 1.0;
1150        let n = grid.n;
1151        let mut rhs: Vec<f64> = (0..n).map(|i| dtau * source[i]).collect();
1152        rhs[0] = 0.0;
1153        rhs[n - 1] = 0.0;
1154        let lhs_lower: Vec<f64> = lower.iter().map(|&a| -dtau * a).collect();
1155        let lhs_diag: Vec<f64> = diag
1156            .iter()
1157            .enumerate()
1158            .map(|(i, &b)| {
1159                if i == 0 || i == n - 1 {
1160                    1.0
1161                } else {
1162                    1.0 - dtau * b
1163                }
1164            })
1165            .collect();
1166        let lhs_upper: Vec<f64> = upper.iter().map(|&c| -dtau * c).collect();
1167        let result_be = thomas_solve(&lhs_lower, &lhs_diag, &lhs_upper, &mut rhs);
1168        let drho_be = crate::spectrum::delta_rho_over_rho(&grid.x, &result_be);
1169        assert!(drho_be > 0.0, "BE: Δρ/ρ > 0 expected, got {drho_be:.4e}");
1170
1171        // 3. Nonlinear solver (kompaneets_step_nonlinear)
1172        let result_nl = kompaneets_step_nonlinear(&grid, &delta_n, theta_e, theta_z, 1.0);
1173        let drho_nl = crate::spectrum::delta_rho_over_rho(&grid.x, &result_nl);
1174        assert!(drho_nl > 0.0, "NL: Δρ/ρ > 0 expected, got {drho_nl:.4e}");
1175
1176        eprintln!(
1177            "Source sign (all solvers): CN={drho_cn:.4e}, BE={drho_be:.4e}, NL={drho_nl:.4e}"
1178        );
1179    }
1180
1181    /// Guards the analytic Planck cancellation in `kompaneets_rhs` (CLAUDE.md
1182    /// pitfall #1). The flux is written as
1183    ///   F = x⁴[(φ−1) n_pl(1+n_pl) + dΔn/dx + φ(2n_pl+1)Δn + φΔn²]
1184    /// so that with Δn = 0 and T_e = T_z (⇒ φ = 1), every term is identically
1185    /// zero BEFORE any finite differences touch n_pl. A naive flux that kept
1186    /// dn_pl/dx and +φ n_pl(1+n_pl) as separate pieces would leak O(dx²) ~ 10⁻⁴
1187    /// per point after the divergence, amplified by θ_e/x² at small x —
1188    /// ~1000× the physical y-signal ~10⁻⁵.
1189    ///
1190    /// Two probes:
1191    ///   (a) Δn = 0, T_e = T_z  →  rhs = 0 to machine precision.
1192    ///   (b) Δn = 0, T_e = T_z·(1+ε), ε = 1e-8  →  rhs scales linearly with ε
1193    ///       (the (φ−1) source is the ONLY nonzero piece).
1194    #[test]
1195    fn test_kompaneets_rhs_planck_cancellation() {
1196        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1197        let delta_n = vec![0.0; grid.n];
1198        let theta = 1e-6;
1199
1200        // (a) Exact cancellation at T_e = T_z.
1201        let rhs_eq = kompaneets_rhs(&grid, &delta_n, theta, theta);
1202        let max_abs_eq: f64 = rhs_eq
1203            .iter()
1204            .copied()
1205            .filter(|v| v.is_finite())
1206            .fold(0.0, |a, b| a.max(b.abs()));
1207        assert!(
1208            max_abs_eq < 1e-20,
1209            "Planck + T_e=T_z must give rhs=0 to machine precision: max|rhs| = {max_abs_eq:.2e}. \
1210             Any value > ~1e-15 means the flux is using a finite-difference form of dn_pl/dx \
1211             instead of the analytic identity (CLAUDE.md pitfall #1)."
1212        );
1213
1214        // (b) Linearity in (φ-1): halving ε halves max|rhs| to relative 1e-6.
1215        // If an O(dx²) Planck leak were present, rhs wouldn't scale with ε and
1216        // this ratio would stay near 1.
1217        let eps_big: f64 = 1e-6;
1218        let eps_small: f64 = 1e-8;
1219        let rhs_big = kompaneets_rhs(&grid, &delta_n, theta, theta * (1.0 + eps_big));
1220        let rhs_small = kompaneets_rhs(&grid, &delta_n, theta, theta * (1.0 + eps_small));
1221        let max_big: f64 = rhs_big
1222            .iter()
1223            .copied()
1224            .filter(|v| v.is_finite())
1225            .fold(0.0, |a, b| a.max(b.abs()));
1226        let max_small: f64 = rhs_small
1227            .iter()
1228            .copied()
1229            .filter(|v| v.is_finite())
1230            .fold(0.0, |a, b| a.max(b.abs()));
1231        let expected_ratio = eps_big / eps_small; // 100
1232        let actual_ratio = max_big / max_small;
1233        assert!(
1234            (actual_ratio / expected_ratio - 1.0).abs() < 1e-3,
1235            "rhs must scale ∝ (φ-1): max_big/max_small = {actual_ratio:.4} vs expected {expected_ratio:.4}. \
1236             Deviation > 0.1% means the RHS has a (φ-1)-independent residual (Planck leakage)."
1237        );
1238    }
1239
1240    #[test]
1241    fn test_nonlinear_preserves_planck() {
1242        // Nonlinear solver on Δn should preserve zero when T_e = T_z
1243        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1244        let delta_n = vec![0.0; grid.n];
1245        let theta = 1e-6;
1246
1247        let result = kompaneets_step_nonlinear(&grid, &delta_n, theta, theta, 0.01);
1248
1249        let max_err: f64 = result
1250            .iter()
1251            .map(|x| x.abs())
1252            .fold(0.0, |a, b| if b.is_nan() { f64::NAN } else { a.max(b) });
1253        eprintln!("Nonlinear preserves Planck: max|Δn| = {max_err:.4e}");
1254        assert!(max_err < 1e-12, "max|Δn| = {max_err}");
1255    }
1256
1257    #[test]
1258    fn test_thomas_solve_inplace_2x2() {
1259        // [[2, -1], [-1, 2]] x = [1, 1] → x = [1, 1]
1260        let lower = vec![0.0, -1.0];
1261        let diag = vec![2.0, 2.0];
1262        let upper = vec![-1.0, 0.0];
1263        let mut rhs = vec![1.0, 1.0];
1264        let mut work = vec![0.0; 2];
1265        thomas_solve_inplace(&lower, &diag, &upper, &mut rhs, &mut work);
1266        assert!(
1267            (rhs[0] - 1.0).abs() < 1e-14,
1268            "x[0] = {}, expected 1.0",
1269            rhs[0]
1270        );
1271        assert!(
1272            (rhs[1] - 1.0).abs() < 1e-14,
1273            "x[1] = {}, expected 1.0",
1274            rhs[1]
1275        );
1276    }
1277
1278    #[test]
1279    fn test_thomas_solve_inplace_3x3() {
1280        // Classic tridiagonal: [2,-1,0; -1,2,-1; 0,-1,2] x = [1, 0, 1]
1281        // Known solution: x = [1, 1, 1]
1282        let lower = vec![0.0, -1.0, -1.0];
1283        let diag = vec![2.0, 2.0, 2.0];
1284        let upper = vec![-1.0, -1.0, 0.0];
1285        let mut rhs = vec![1.0, 0.0, 1.0];
1286        let mut work = vec![0.0; 3];
1287        thomas_solve_inplace(&lower, &diag, &upper, &mut rhs, &mut work);
1288        assert!(
1289            (rhs[0] - 1.0).abs() < 1e-14,
1290            "x[0] = {}, expected 1.0",
1291            rhs[0]
1292        );
1293        assert!(
1294            (rhs[1] - 1.0).abs() < 1e-14,
1295            "x[1] = {}, expected 1.0",
1296            rhs[1]
1297        );
1298        assert!(
1299            (rhs[2] - 1.0).abs() < 1e-14,
1300            "x[2] = {}, expected 1.0",
1301            rhs[2]
1302        );
1303    }
1304
1305    #[test]
1306    fn test_thomas_solve_inplace_identity() {
1307        // Diagonal matrix (no off-diags): solution = rhs / diag
1308        let lower = vec![0.0, 0.0, 0.0, 0.0];
1309        let diag = vec![3.0, 5.0, 2.0, 7.0];
1310        let upper = vec![0.0, 0.0, 0.0, 0.0];
1311        let mut rhs = vec![9.0, 10.0, 6.0, 21.0];
1312        let mut work = vec![0.0; 4];
1313        thomas_solve_inplace(&lower, &diag, &upper, &mut rhs, &mut work);
1314        let expected = [3.0, 2.0, 3.0, 3.0];
1315        for i in 0..4 {
1316            assert!(
1317                (rhs[i] - expected[i]).abs() < 1e-14,
1318                "x[{i}] = {}, expected {}",
1319                rhs[i],
1320                expected[i]
1321            );
1322        }
1323    }
1324
1325    #[test]
1326    fn test_coupled_inplace_preserves_planck() {
1327        // The production solver kompaneets_step_coupled_inplace should
1328        // preserve a Planck spectrum when T_e = T_z and no DC/BR coupling.
1329        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1330        let mut delta_n = vec![0.0; grid.n];
1331        let theta = 1e-6;
1332        let dtau = 0.01;
1333        let mut ws = KompaneetsWorkspace::new(&grid);
1334
1335        // Returns (converged, rho_e, last_max_delta)
1336        let (converged, rho_e, last_delta) = kompaneets_step_coupled_inplace(
1337            &grid,
1338            &mut delta_n,
1339            theta,
1340            theta,
1341            dtau,
1342            None,
1343            None,
1344            &mut ws,
1345            0.0,
1346            20,
1347        );
1348
1349        assert!(converged, "Newton should converge for zero distortion");
1350        // rho_e should be 1.0 (T_e = T_z)
1351        assert!(
1352            (rho_e - 1.0).abs() < 1e-10,
1353            "rho_e should be 1.0 at equilibrium: {rho_e}"
1354        );
1355        // last Newton correction should be tiny
1356        assert!(
1357            last_delta < 1e-12,
1358            "Newton correction should be tiny: {last_delta:.4e}"
1359        );
1360
1361        let max_dn: f64 = delta_n
1362            .iter()
1363            .map(|x| x.abs())
1364            .fold(0.0, |a, b| if b.is_nan() { f64::NAN } else { a.max(b) });
1365        assert!(
1366            max_dn < 1e-14,
1367            "Should preserve Planck: max|Δn| = {max_dn:.4e}"
1368        );
1369    }
1370
1371    #[test]
1372    fn test_coupled_inplace_with_dcbr() {
1373        // Test the coupled solver with DC/BR coupling active.
1374        // With T_e = T_z and Δn = 0, DC/BR are at detailed balance.
1375        let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1376        let mut delta_n = vec![0.0; grid.n];
1377        let theta = 1e-4;
1378        let dtau = 0.01;
1379        let mut ws = KompaneetsWorkspace::new(&grid);
1380
1381        // At equilibrium (ρ_e=1), n_eq = 0 and emission rates are finite but irrelevant
1382        let emission_rates: Vec<f64> = grid
1383            .x
1384            .iter()
1385            .map(|&x| {
1386                let k_dc = crate::double_compton::dc_emission_coefficient(x, theta);
1387                let x_e: f64 = 1.0;
1388                (k_dc / x.powi(3)) * (x_e.exp() - 1.0)
1389            })
1390            .collect();
1391        let n_eq: Vec<f64> = vec![0.0; grid.n];
1392
1393        let dem = vec![0.0; grid.n];
1394        let dneq = vec![0.0; grid.n];
1395        let dcbr = DcbrCoupling {
1396            emission_rates: &emission_rates,
1397            n_eq_minus_n_pl: &n_eq,
1398            dem_drho_eq: &dem,
1399            dneq_drho_eq: &dneq,
1400            photon_source: None,
1401            cn_dcbr: false,
1402        };
1403
1404        let (converged, rho_e, last_delta) = kompaneets_step_coupled_inplace(
1405            &grid,
1406            &mut delta_n,
1407            theta,
1408            theta,
1409            dtau,
1410            Some(&dcbr),
1411            None,
1412            &mut ws,
1413            0.0,
1414            20,
1415        );
1416
1417        assert!(
1418            converged,
1419            "Newton should converge with DC/BR at equilibrium"
1420        );
1421        assert!((rho_e - 1.0).abs() < 1e-10, "rho_e should be 1.0: {rho_e}");
1422        assert!(
1423            last_delta < 1e-10,
1424            "Newton correction should be small: {last_delta:.4e}"
1425        );
1426    }
1427
1428    /// Verify that a small temperature perturbation produces a Y_SZ spectral shape.
1429    ///
1430    /// For T_e slightly > T_z, the Kompaneets equation produces Δn ∝ Y_SZ(x).
1431    /// This is the defining property of the y-distortion. The Pearson correlation
1432    /// between the resulting Δn and Y_SZ(x) should be > 0.95.
1433    #[test]
1434    fn test_kompaneets_yields_ysz_shape() {
1435        let grid = FrequencyGrid::log_uniform(0.1, 25.0, 500);
1436        let delta_n = vec![0.0; grid.n];
1437        let theta_z = 1e-6;
1438        let theta_e = 1.001 * theta_z; // small perturbation: T_e = 1.001 T_z
1439
1440        // Run one small step
1441        let result = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.1);
1442
1443        // Compute Y_SZ(x) at grid points
1444        let y_shape: Vec<f64> = grid
1445            .x
1446            .iter()
1447            .map(|&x| crate::spectrum::y_shape(x))
1448            .collect();
1449
1450        // Pearson correlation between result and Y_SZ
1451        let n = result.len();
1452        let mean_r: f64 = result.iter().sum::<f64>() / n as f64;
1453        let mean_y: f64 = y_shape.iter().sum::<f64>() / n as f64;
1454
1455        let mut cov = 0.0;
1456        let mut var_r = 0.0;
1457        let mut var_y = 0.0;
1458        for i in 0..n {
1459            let dr = result[i] - mean_r;
1460            let dy = y_shape[i] - mean_y;
1461            cov += dr * dy;
1462            var_r += dr * dr;
1463            var_y += dy * dy;
1464        }
1465        let corr = cov / (var_r.sqrt() * var_y.sqrt());
1466        eprintln!("Kompaneets Y_SZ correlation: {corr:.6}");
1467
1468        assert!(
1469            corr > 0.95,
1470            "Kompaneets step with T_e > T_z should produce Y_SZ shape: correlation = {corr:.4}"
1471        );
1472    }
1473}
spectroxide/kompaneets.rs

spectroxide/
kompaneets.rs