spectroxide/kompaneets.rs
1//! Kompaneets equation: Compton scattering of photons off thermal electrons.
2//!
3//! The Kompaneets equation describes the Fokker-Planck (diffusion) approximation
4//! to Compton scattering in the non-relativistic limit:
5//!
6//! dn/dτ|_C = (θ_e / x²) ∂/∂x [x⁴ (∂n/∂x + φ n(n+1))]
7//!
8//! where φ = T_z/T_e, θ_e = kT_e/(m_e c²), τ = t/t_C.
9//!
10//! For small distortions Δn = n - n_pl, the linearized form is:
11//!
12//! dΔn/dτ|_C = (θ_e / x²) ∂/∂x [x⁴ (∂Δn/∂x + φ(2n_pl+1)Δn)] + source
13//!
14//! Discretized with second-order conservative finite differences and solved
15//! with Crank-Nicolson time stepping → tridiagonal system.
16//!
17//! References:
18//! - Kompaneets (1957), JETP
19//! - Chluba & Sunyaev (2012), MNRAS 419, 1294 [Eq. 4]
20
21use crate::grid::FrequencyGrid;
22use crate::spectrum::planck;
23
24/// Compute the Kompaneets operator applied to Δn on the grid (test-only).
25///
26/// Returns dΔn/dτ|_C at each grid point. Production code uses the
27/// coupled inplace solver instead.
28#[cfg(test)]
29pub fn kompaneets_rhs(
30 grid: &FrequencyGrid,
31 delta_n: &[f64],
32 theta_e: f64,
33 theta_z: f64,
34) -> Vec<f64> {
35 let n = grid.n;
36 let phi = theta_z / theta_e; // T_z / T_e = 1/ρ_e
37 let mut rhs = vec![0.0; n];
38
39 // Compute fluxes at cell interfaces using the split form.
40 //
41 // Full flux: F = x⁴ [dn/dx + φ n(1+n)]
42 // With n = n_pl + Δn and the identity dn_pl/dx = -n_pl(1+n_pl):
43 // dn/dx + φ n(1+n) = -n_pl(1+n_pl) + dΔn/dx + φ[n_pl(1+n_pl) + (2n_pl+1)Δn + Δn²]
44 // = (φ-1)n_pl(1+n_pl) + dΔn/dx + φ(2n_pl+1)Δn + φΔn²
45 //
46 // This form is numerically stable because each term is computed directly.
47 let mut flux = vec![0.0; n - 1];
48
49 for i in 0..n - 1 {
50 let x_half = grid.x_half[i];
51 let dx = grid.dx[i];
52
53 let n_pl_half = planck(x_half);
54 let dn_half = 0.5 * (delta_n[i] + delta_n[i + 1]);
55
56 // Derivative of the distortion at the interface
57 let ddn_dx = (delta_n[i + 1] - delta_n[i]) / dx;
58
59 // Source from T_e ≠ T_z (analytical, no cancellation)
60 let source = (phi - 1.0) * n_pl_half * (n_pl_half + 1.0);
61
62 // Linearized drift on Δn
63 let drift_linear = phi * (2.0 * n_pl_half + 1.0) * dn_half;
64
65 // Nonlinear stimulated term
66 let drift_nonlinear = phi * dn_half * dn_half;
67
68 flux[i] = x_half.powi(4) * (ddn_dx + source + drift_linear + drift_nonlinear);
69 }
70
71 // Convert flux divergence to dΔn/dτ:
72 // dΔn/dτ = (θ_e / x²) × (F_{i+1/2} - F_{i-1/2}) / Δx_cell
73 for i in 1..n - 1 {
74 let x = grid.x[i];
75 let dx_cell = 0.5 * (grid.dx[i - 1] + grid.dx[i]);
76 rhs[i] = theta_e / (x * x) * (flux[i] - flux[i - 1]) / dx_cell;
77 }
78
79 // Boundary conditions: Δn → 0 at both ends
80 rhs[0] = 0.0;
81 rhs[n - 1] = 0.0;
82
83 rhs
84}
85
86/// Build tridiagonal matrix coefficients for the linearized Kompaneets equation (test-only).
87///
88/// Production code uses the coupled inplace solver instead.
89#[cfg(test)]
90pub fn kompaneets_tridiagonal(
91 grid: &FrequencyGrid,
92 theta_e: f64,
93 theta_z: f64,
94) -> (Vec<f64>, Vec<f64>, Vec<f64>, Vec<f64>) {
95 let n = grid.n;
96 let phi = theta_z / theta_e;
97
98 let mut lower = vec![0.0; n]; // A_i (coefficient of Δn_{i-1})
99 let mut diag = vec![0.0; n]; // B_i (coefficient of Δn_i)
100 let mut upper = vec![0.0; n]; // C_i (coefficient of Δn_{i+1})
101 let mut source = vec![0.0; n]; // Source term from T_e ≠ T_z
102
103 for i in 1..n - 1 {
104 let x = grid.x[i];
105 let x2 = x * x;
106
107 // Cell widths
108 let dx_left = grid.dx[i - 1];
109 let dx_right = grid.dx[i];
110 let dx_cell = 0.5 * (dx_left + dx_right);
111
112 // Half-point values
113 let x_left = grid.x_half[i - 1];
114 let x_right = grid.x_half[i];
115
116 // Diffusion coefficients at half-points: D = θ_e x⁴
117 let d_left = theta_e * x_left.powi(4);
118 let d_right = theta_e * x_right.powi(4);
119
120 // Drift coefficients at half-points: V = θ_e x⁴ φ (2n_pl + 1)
121 // For linearization: n(n+1) ≈ n_pl(n_pl+1) + (2n_pl+1)Δn
122 let n_pl_left = planck(x_left);
123 let n_pl_right = planck(x_right);
124 let v_left = theta_e * x_left.powi(4) * phi * (2.0 * n_pl_left + 1.0);
125 let v_right = theta_e * x_right.powi(4) * phi * (2.0 * n_pl_right + 1.0);
126
127 // Finite difference: (1/x²) * [1/Δx_cell] * [F_{i+1/2} - F_{i-1/2}]
128 // F_{i+1/2} = D_{i+1/2} (Δn_{i+1} - Δn_i)/dx_right + V_{i+1/2} Δn_{i+1/2}
129 // F_{i-1/2} = D_{i-1/2} (Δn_i - Δn_{i-1})/dx_left + V_{i-1/2} Δn_{i-1/2}
130
131 // Using upwind-like averaging for drift: Δn_{i+1/2} ≈ (Δn_i + Δn_{i+1})/2
132 let coeff = 1.0 / (x2 * dx_cell);
133
134 // Contribution from right flux F_{i+1/2}
135 let a_right = d_right / dx_right; // diffusion
136 let b_right = v_right * 0.5; // drift (averaged)
137
138 // Contribution from left flux F_{i-1/2}
139 let a_left = d_left / dx_left;
140 let b_left = v_left * 0.5;
141
142 // F_{i+1/2} = D_R/dx_R * (Δn_{i+1} - Δn_i) + V_R/2 * (Δn_i + Δn_{i+1})
143 // F_{i-1/2} = D_L/dx_L * (Δn_i - Δn_{i-1}) + V_L/2 * (Δn_{i-1} + Δn_i)
144 //
145 // dΔn_i/dτ = coeff * (F_R - F_L) where coeff = 1/(x² Δx_cell)
146
147 // Coefficient of Δn_{i-1}: coeff * (D_L/dx_L - V_L/2)
148 lower[i] = coeff * (a_left - b_left);
149
150 // Coefficient of Δn_{i+1}: coeff * (D_R/dx_R + V_R/2)
151 upper[i] = coeff * (a_right + b_right);
152
153 // Coefficient of Δn_i: coeff * (-D_R/dx_R + V_R/2 - D_L/dx_L - V_L/2)
154 diag[i] = coeff * (-a_right + b_right - a_left - b_left);
155
156 // Source term from T_e ≠ T_z:
157 // F^{pl} = x⁴ [dn_pl/dx + φ n_pl(1+n_pl)] = x⁴ (φ-1) n_pl(1+n_pl)
158 // HOWEVER, the energy from this source is:
159 // ∫ x³ source dx = -θ_e(φ-1)I₄ = θ_e(1-φ)I₄ > 0 for T_e > T_z
160 // So the source should be the DIVERGENCE of F^{pl} = x⁴(φ-1)n(1+n):
161 let source_right =
162 theta_e * x_right.powi(4) * (phi - 1.0) * n_pl_right * (n_pl_right + 1.0);
163 let source_left = theta_e * x_left.powi(4) * (phi - 1.0) * n_pl_left * (n_pl_left + 1.0);
164 source[i] = coeff * (source_right - source_left);
165 }
166
167 // Boundary conditions: Δn fixed at boundaries
168 diag[0] = 1.0;
169 diag[n - 1] = 1.0;
170
171 (lower, diag, upper, source)
172}
173
174/// Solve a tridiagonal system Ax = d using the Thomas algorithm.
175///
176/// A is given by (lower, diag, upper) vectors.
177/// Modifies `rhs` in place to contain the solution.
178/// Uses pre-allocated `work` buffer for the forward sweep (must be same length as `diag`).
179pub fn thomas_solve_inplace(
180 lower: &[f64],
181 diag: &[f64],
182 upper: &[f64],
183 rhs: &mut [f64],
184 work: &mut [f64],
185) {
186 let n = diag.len();
187 assert!(n >= 2);
188 assert!(lower.len() >= n);
189 assert!(upper.len() >= n);
190 assert!(rhs.len() >= n);
191 assert!(work.len() >= n);
192 assert!(
193 diag[0].abs() > 0.0,
194 "Thomas algorithm: zero pivot at row 0 (singular tridiagonal)"
195 );
196
197 // SAFETY: All slice accesses below are within [0, n). The asserts above
198 // guarantee sufficient length. Using get_unchecked eliminates per-element
199 // bounds checks in the forward/back sweeps — this function is called
200 // ~10^6 times per solve (Newton iters × timesteps), so the overhead matters.
201 unsafe {
202 // Forward sweep
203 let d0 = *diag.get_unchecked(0);
204 *work.get_unchecked_mut(0) = *upper.get_unchecked(0) / d0;
205 *rhs.get_unchecked_mut(0) /= d0;
206
207 for i in 1..n - 1 {
208 let denom =
209 *diag.get_unchecked(i) - *lower.get_unchecked(i) * *work.get_unchecked(i - 1);
210 *work.get_unchecked_mut(i) = *upper.get_unchecked(i) / denom;
211 *rhs.get_unchecked_mut(i) = (*rhs.get_unchecked(i)
212 - *lower.get_unchecked(i) * *rhs.get_unchecked(i - 1))
213 / denom;
214 }
215 // Last row (no upper entry needed)
216 let i = n - 1;
217 let denom = *diag.get_unchecked(i) - *lower.get_unchecked(i) * *work.get_unchecked(i - 1);
218 *work.get_unchecked_mut(i) = 0.0;
219 *rhs.get_unchecked_mut(i) =
220 (*rhs.get_unchecked(i) - *lower.get_unchecked(i) * *rhs.get_unchecked(i - 1)) / denom;
221
222 // Back substitution
223 for i in (0..n - 1).rev() {
224 *rhs.get_unchecked_mut(i) -= *work.get_unchecked(i) * *rhs.get_unchecked(i + 1);
225 }
226 }
227}
228
229/// Solve a tridiagonal system (allocating version for tests/convenience).
230pub fn thomas_solve(lower: &[f64], diag: &[f64], upper: &[f64], rhs: &mut [f64]) -> Vec<f64> {
231 let n = diag.len();
232 let mut work = vec![0.0; n];
233 let mut result = rhs.to_vec();
234 thomas_solve_inplace(lower, diag, upper, &mut result, &mut work);
235 result
236}
237
238/// Perform one Crank-Nicolson step of the linearized Kompaneets equation (test-only).
239///
240/// Production code uses the nonlinear coupled inplace solver instead.
241#[cfg(test)]
242pub fn kompaneets_step(
243 grid: &FrequencyGrid,
244 delta_n: &[f64],
245 theta_e: f64,
246 theta_z: f64,
247 dtau: f64,
248) -> Vec<f64> {
249 let n = grid.n;
250 let (lower, diag, upper, source) = kompaneets_tridiagonal(grid, theta_e, theta_z);
251
252 // Build RHS: (I + 0.5 Δτ L) Δn + Δτ S
253 let mut rhs = vec![0.0; n];
254 for i in 1..n - 1 {
255 rhs[i] = delta_n[i]
256 + 0.5
257 * dtau
258 * (lower[i] * delta_n[i.saturating_sub(1)]
259 + diag[i] * delta_n[i]
260 + upper[i] * delta_n[(i + 1).min(n - 1)])
261 + dtau * source[i];
262 }
263 // BCs
264 rhs[0] = 0.0;
265 rhs[n - 1] = 0.0;
266
267 // Build LHS: (I - 0.5 Δτ L)
268 let lhs_lower: Vec<f64> = lower.iter().map(|&a| -0.5 * dtau * a).collect();
269 let lhs_diag: Vec<f64> = diag
270 .iter()
271 .enumerate()
272 .map(|(i, &b)| {
273 if i == 0 || i == n - 1 {
274 1.0
275 } else {
276 1.0 - 0.5 * dtau * b
277 }
278 })
279 .collect();
280 let lhs_upper: Vec<f64> = upper.iter().map(|&c| -0.5 * dtau * c).collect();
281
282 thomas_solve(&lhs_lower, &lhs_diag, &lhs_upper, &mut rhs)
283}
284
285/// Perform one implicit step of the NONLINEAR Kompaneets equation on Δn.
286///
287/// Allocating convenience wrapper around the inplace solver. Used by tests;
288/// production code calls `kompaneets_step_coupled_inplace` directly.
289pub fn kompaneets_step_nonlinear(
290 grid: &FrequencyGrid,
291 delta_n_old: &[f64],
292 theta_e: f64,
293 theta_z: f64,
294 dtau: f64,
295) -> Vec<f64> {
296 let mut result = delta_n_old.to_vec();
297 let mut ws = KompaneetsWorkspace::new(grid);
298 let (_converged, _rho_e, _residual) = kompaneets_step_coupled_inplace(
299 grid,
300 &mut result,
301 theta_e,
302 theta_z,
303 dtau,
304 None::<&DcbrCoupling>,
305 None,
306 &mut ws,
307 0.0,
308 10,
309 );
310 result
311}
312
313/// Pre-allocated workspace for Kompaneets solver.
314///
315/// Grid-constant arrays (set once in `new()`) depend only on the frequency grid.
316/// Per-step arrays are reused across timesteps to avoid heap allocation
317/// (~15 vectors × 1000 elements, called 100K+ times per run).
318pub struct KompaneetsWorkspace {
319 // Grid-constant (length n_half = ng - 1):
320 n_pl_half: Vec<f64>,
321 np1_half: Vec<f64>,
322 twonp1_half: Vec<f64>,
323 x4_half: Vec<f64>,
324 inv_dx: Vec<f64>,
325 x4_over_dx: Vec<f64>,
326 // Grid-constant geometry (length ng, only interior points used):
327 inv_x2_dx_cell_geom: Vec<f64>,
328 // Grid-constant quadrature weights for ∫x³ f(x) dx (length ng):
329 // w_j = share of trapezoidal weight from cells on either side of grid point j.
330 quad_weights_x3: Vec<f64>,
331 // Per-step reusable buffers (length ng):
332 inv_x2_dx_cell: Vec<f64>,
333 half_dtau_coeff: Vec<f64>,
334 k_old: Vec<f64>,
335 dn_old: Vec<f64>,
336 j_lower: Vec<f64>,
337 j_diag: Vec<f64>,
338 j_upper: Vec<f64>,
339 rhs_buf: Vec<f64>,
340 thomas_work: Vec<f64>,
341 // Bordered system workspace (for coupled ρ_e solve):
342 c_vec: Vec<f64>,
343 v_buf: Vec<f64>,
344 thomas_work2: Vec<f64>,
345 // Precomputed w_j × em_j for the bordered Newton. Fixed across the
346 // Newton iteration (em_rates and weights don't change within a step),
347 // so we compute it once per step and reuse it in both the h_dcbr pass
348 // and the bp_dot_u/bp_dot_v pass.
349 wem: Vec<f64>,
350 // DC/BR old-step buffers for Crank-Nicolson DC/BR option:
351 pub(crate) dcbr_em_old: Vec<f64>,
352 pub(crate) dcbr_neq_old: Vec<f64>,
353}
354
355/// Parameters for coupling ρ_e into the bordered Newton system.
356///
357/// When passed to `kompaneets_step_coupled_inplace`, ρ_e becomes
358/// the (N+1)-th unknown solved simultaneously with Δn. The system
359/// becomes bordered tridiagonal, solved in O(N) via two Thomas solves.
360pub struct RhoECoupling {
361 /// ρ_e at the start of this timestep (before update_temperatures).
362 pub rho_e_old: f64,
363 /// Compton coupling coefficient R = (8/3)(ρ̃_γ/α_h).
364 /// This is the rate at which Compton scattering drives ρ_e → ρ_eq,
365 /// per unit Thomson optical depth.
366 pub r_compton: f64,
367 /// Source term in the ρ_e ODE: R·ρ_eq + δρ_inj.
368 pub rho_source: f64,
369 /// Adiabatic cooling rate: H·t_C.
370 pub lambda_exp: f64,
371 /// dH_dcbr/dρ_e (finite-difference derivative from update_temperatures).
372 pub dh_drho: f64,
373 /// Normalization for H_dcbr integral: 1/(4·G₃·θ_z).
374 pub h_norm: f64,
375}
376
377impl KompaneetsWorkspace {
378 /// Create workspace for a given frequency grid. Call once in solver construction.
379 pub fn new(grid: &FrequencyGrid) -> Self {
380 let ng = grid.n;
381 let n_half = ng - 1;
382
383 let mut n_pl_half = vec![0.0; n_half];
384 let mut np1_half = vec![0.0; n_half];
385 let mut twonp1_half = vec![0.0; n_half];
386 let mut x4_half = vec![0.0; n_half];
387 let mut inv_dx = vec![0.0; n_half];
388 let mut x4_over_dx = vec![0.0; n_half];
389
390 for i in 0..n_half {
391 let xh = grid.x_half[i];
392 let np = planck(xh);
393 n_pl_half[i] = np;
394 np1_half[i] = np * (np + 1.0);
395 twonp1_half[i] = 2.0 * np + 1.0;
396 let x4 = xh * xh * xh * xh;
397 x4_half[i] = x4;
398 let dx = grid.dx[i];
399 inv_dx[i] = 1.0 / dx;
400 x4_over_dx[i] = x4 / dx;
401 }
402
403 let mut inv_x2_dx_cell_geom = vec![0.0; ng];
404 for i in 1..ng - 1 {
405 let xi = grid.x[i];
406 let dx_cell = 0.5 * (grid.dx[i - 1] + grid.dx[i]);
407 inv_x2_dx_cell_geom[i] = 1.0 / (xi * xi * dx_cell);
408 }
409
410 // Quadrature weights for ∫x³ f(x) dx: trapezoidal on cell midpoints.
411 // w_j = sum of half-weights from cells adjacent to grid point j.
412 let mut quad_weights_x3 = vec![0.0; ng];
413 for j in 1..ng {
414 let w = 0.5 * grid.x_half_cubed[j - 1] * grid.dx[j - 1];
415 quad_weights_x3[j - 1] += w;
416 quad_weights_x3[j] += w;
417 }
418
419 KompaneetsWorkspace {
420 n_pl_half,
421 np1_half,
422 twonp1_half,
423 x4_half,
424 inv_dx,
425 x4_over_dx,
426 inv_x2_dx_cell_geom,
427 quad_weights_x3,
428 inv_x2_dx_cell: vec![0.0; ng],
429 half_dtau_coeff: vec![0.0; ng],
430 k_old: vec![0.0; ng],
431 dn_old: vec![0.0; ng],
432 j_lower: vec![0.0; ng],
433 j_diag: vec![0.0; ng],
434 j_upper: vec![0.0; ng],
435 rhs_buf: vec![0.0; ng],
436 thomas_work: vec![0.0; ng],
437 c_vec: vec![0.0; ng],
438 v_buf: vec![0.0; ng],
439 thomas_work2: vec![0.0; ng],
440 wem: vec![0.0; ng],
441 dcbr_em_old: vec![0.0; ng],
442 dcbr_neq_old: vec![0.0; ng],
443 }
444 }
445}
446
447/// DC/BR coupling data for implicit backward Euler within the Kompaneets step.
448///
449/// Instead of a precomputed frozen source `dcbr_source[i] = (neq-Δn_old)(1-e^{-dτ·em})`,
450/// we pass the emission rates and equilibrium targets so DC/BR is solved implicitly
451/// (backward Euler) inside the Newton iteration. This ensures DC/BR and Kompaneets
452/// see the same evolving Δn, matching CosmoTherm's approach.
453pub struct DcbrCoupling<'a> {
454 /// DC/BR absorption rates: `R[i] = (K/x³)(e^{x_e}-1)`, capped at 1e8.
455 pub emission_rates: &'a [f64],
456 /// Equilibrium target: `neq[i] = n_pl(x/ρ_eq) - n_pl(x)`.
457 pub n_eq_minus_n_pl: &'a [f64],
458 /// Analytical derivative d(emission_rates)/d(ρ_eq) at the precomputed
459 /// ρ_eq. Used in the bordered Newton c-vector so the Δn-row Jacobian
460 /// reflects how the DC/BR absorption rate shifts when the Newton
461 /// updates the ρ_e iterate. Dominant term:
462 /// d(em)/d(ρ_eq) = -(K/x³) · exp(x/ρ_eq) · x/ρ_eq².
463 /// Pass `None` (empty slice) to retain the legacy Picard-in-ρ_e
464 /// behaviour, which is correct to O(Δρ_e per step) but only linearly
465 /// convergent at z ≳ 10⁶.
466 pub dem_drho_eq: &'a [f64],
467 /// Analytical derivative d(n_eq_minus_n_pl)/d(ρ_eq). Formula:
468 /// d(neq)/d(ρ_eq) = n_pl(x/ρ_eq)(1 + n_pl(x/ρ_eq)) · x/ρ_eq².
469 pub dneq_drho_eq: &'a [f64],
470 /// Integrated photon source over the step: S_i = source_rate(x_i, z_mid)·dt.
471 ///
472 /// When `Some`, the Newton residual picks up the `−S_i` term directly
473 /// rather than relying on the caller to pre-add `S_i` to Δn_old. This
474 /// avoids poisoning the Kompaneets CN "old" flux with the source (the
475 /// pre-add approach effectively treats the source as injected at
476 /// `t_old`, which over-Comptonises by roughly `O(dt · ∂K/∂t)` per step
477 /// during a narrow injection window). `None` preserves the legacy
478 /// pre-add caller code path.
479 pub photon_source: Option<&'a [f64]>,
480 /// Use Crank-Nicolson (instead of backward Euler) for DC/BR.
481 /// Requires old-step DC/BR buffers in the workspace.
482 pub cn_dcbr: bool,
483}
484
485/// In-place Kompaneets + DC/BR step using pre-allocated workspace.
486///
487/// Modifies `delta_n` from old values to new values.
488/// Identical physics to `kompaneets_step_nonlinear_coupled` but avoids
489/// per-step heap allocations.
490///
491/// DC/BR is handled via backward Euler within the Newton iteration:
492/// the DC/BR residual `dτ × em × (neq - Δn_new)` and Jacobian `dτ × em`
493/// are added to the Kompaneets system. Backward Euler is unconditionally
494/// stable (amplification → 0 for stiff rates), avoiding the oscillation
495/// that Crank-Nicolson would produce for the stiff DC/BR absorption at low x.
496///
497/// `max_dn_abs` is the current max|Δn| used for adaptive Newton tolerance.
498/// Pass 0.0 for the tightest tolerance (equivalent to old fixed 1e-14).
499///
500/// Returns `(converged, rho_e, last_correction)`. **Note:** the third field
501/// is the size of the last Newton *correction* `|δx|`, not the residual
502/// `|F(x)|`. At convergence `|δx| < tol` by construction; if the Newton
503/// loop exits via `max_newton_iter`, `last_correction` is the final step
504/// the solver attempted, which only upper-bounds the residual for
505/// contractive iterations. Treat it as a diagnostic, not a proof of
506/// small residual.
507pub fn kompaneets_step_coupled_inplace(
508 grid: &FrequencyGrid,
509 delta_n: &mut [f64],
510 theta_e: f64,
511 theta_z: f64,
512 dtau: f64,
513 dcbr: Option<&DcbrCoupling>,
514 rho_coupling: Option<&RhoECoupling>,
515 ws: &mut KompaneetsWorkspace,
516 max_dn_abs: f64,
517 max_newton_iter: usize,
518) -> (bool, f64, f64) {
519 let ng = grid.n;
520 // θ_e for the CN "old" flux uses the step-start ρ_e when provided by
521 // the caller (coupled mode). Without a `RhoECoupling` we assume T_e is
522 // constant over the step and use the passed θ_e for both CN half-steps;
523 // callers that evolve T_e via `update_temperatures` but do not enable the
524 // bordered Newton should pass `rho_coupling = Some(...)` to preserve
525 // time-centering (the `dh_drho`/`lambda_exp`/etc. fields can be zeroed).
526 let theta_e_old = if let Some(rc) = rho_coupling {
527 theta_z * rc.rho_e_old
528 } else {
529 theta_e
530 };
531
532 // Assert workspace sizes so the compiler can elide per-element bounds checks
533 // in the hot inner loops. All workspace arrays are allocated to ng or ng-1
534 // in KompaneetsWorkspace::new(), but LLVM can't prove this across function
535 // boundaries without these hints.
536 let n_half = ng - 1;
537 assert!(delta_n.len() >= ng);
538 assert!(ws.dn_old.len() >= ng);
539 assert!(ws.k_old.len() >= ng);
540 assert!(ws.rhs_buf.len() >= ng);
541 assert!(ws.j_lower.len() >= ng);
542 assert!(ws.j_diag.len() >= ng);
543 assert!(ws.j_upper.len() >= ng);
544 assert!(ws.c_vec.len() >= ng);
545 assert!(ws.wem.len() >= ng);
546 assert!(ws.inv_x2_dx_cell.len() >= ng);
547 assert!(ws.inv_x2_dx_cell_geom.len() >= ng);
548 assert!(ws.half_dtau_coeff.len() >= ng);
549 assert!(ws.quad_weights_x3.len() >= ng);
550 assert!(ws.v_buf.len() >= ng);
551 assert!(ws.n_pl_half.len() >= n_half);
552 assert!(ws.np1_half.len() >= n_half);
553 assert!(ws.twonp1_half.len() >= n_half);
554 assert!(ws.x4_half.len() >= n_half);
555 assert!(ws.inv_dx.len() >= n_half);
556 assert!(ws.x4_over_dx.len() >= n_half);
557
558 // Debug-mode input validation: catch NaN/Inf and unphysical parameters
559 // before they propagate through unsafe blocks. Zero cost in release.
560 debug_assert!(theta_e.is_finite() && theta_e > 0.0, "theta_e={theta_e}");
561 debug_assert!(theta_z.is_finite() && theta_z > 0.0, "theta_z={theta_z}");
562 debug_assert!(dtau.is_finite() && dtau >= 0.0, "dtau={dtau}");
563 debug_assert!(max_dn_abs.is_finite(), "max_dn_abs={max_dn_abs}");
564 debug_assert!(ng >= 3, "grid too small: ng={ng}");
565
566 // For the "old" part of CN, use the step-start ρ_e.
567 // When coupled, rho_e_old is the pre-update value; otherwise use theta_e/theta_z.
568 let phi_old = if let Some(rc) = rho_coupling {
569 1.0 / rc.rho_e_old
570 } else {
571 theta_z / theta_e
572 };
573
574 let mut rho_e = theta_e / theta_z; // initial guess (from update_temperatures)
575 let mut phi = 1.0 / rho_e;
576
577 // Save old delta_n for CN formula
578 ws.dn_old[..ng].copy_from_slice(&delta_n[..ng]);
579
580 // Fill per-step coefficients with θ_e_old for the K_old precompute
581 // below. Inside the Newton loop these are overwritten with θ_e
582 // evaluated at the current ρ_e iterate, so K_new and its Jacobian stay
583 // time-centred with the evolving electron temperature (genuine CN in θ_e,
584 // not frozen-θ_e). In non-coupled mode rho_e is constant within a step,
585 // so the refresh reproduces theta_e_old and is effectively a no-op.
586 for i in 1..ng - 1 {
587 ws.inv_x2_dx_cell[i] = theta_e_old * ws.inv_x2_dx_cell_geom[i];
588 ws.half_dtau_coeff[i] = 0.5 * dtau * ws.inv_x2_dx_cell[i];
589 }
590
591 // Precompute K(delta_n_old, φ_old) for Crank-Nicolson
592 // SAFETY: i ranges over 1..ng-1; all workspace arrays have length >= ng,
593 // half-point arrays have length >= ng-1. Asserted above.
594 for i in 1..ng - 1 {
595 unsafe {
596 let dn_old_im1 = *ws.dn_old.get_unchecked(i - 1);
597 let dn_old_i = *ws.dn_old.get_unchecked(i);
598 let dn_old_ip1 = *ws.dn_old.get_unchecked(i + 1);
599 let dn_half_l = 0.5 * (dn_old_im1 + dn_old_i);
600 let dn_half_r = 0.5 * (dn_old_i + dn_old_ip1);
601 let ddn_dx_l = (dn_old_i - dn_old_im1) * *ws.inv_dx.get_unchecked(i - 1);
602 let ddn_dx_r = (dn_old_ip1 - dn_old_i) * *ws.inv_dx.get_unchecked(i);
603
604 let f_l = *ws.x4_half.get_unchecked(i - 1)
605 * ((phi_old - 1.0) * *ws.np1_half.get_unchecked(i - 1)
606 + ddn_dx_l
607 + phi_old * *ws.twonp1_half.get_unchecked(i - 1) * dn_half_l
608 + phi_old * dn_half_l * dn_half_l);
609 let f_r = *ws.x4_half.get_unchecked(i)
610 * ((phi_old - 1.0) * *ws.np1_half.get_unchecked(i)
611 + ddn_dx_r
612 + phi_old * *ws.twonp1_half.get_unchecked(i) * dn_half_r
613 + phi_old * dn_half_r * dn_half_r);
614 *ws.k_old.get_unchecked_mut(i) = *ws.inv_x2_dx_cell.get_unchecked(i) * (f_r - f_l);
615 }
616 }
617
618 // Assert DC/BR slice lengths so bounds checks are elided in the inner loop.
619 if let Some(dc) = dcbr {
620 assert!(dc.emission_rates.len() >= ng);
621 assert!(dc.n_eq_minus_n_pl.len() >= ng);
622 assert!(dc.dem_drho_eq.len() >= ng);
623 assert!(dc.dneq_drho_eq.len() >= ng);
624 if let Some(ps) = dc.photon_source {
625 assert!(ps.len() >= ng);
626 }
627 if dc.cn_dcbr {
628 assert!(ws.dcbr_em_old.len() >= ng);
629 assert!(ws.dcbr_neq_old.len() >= ng);
630 }
631 }
632
633 // Hoist Option dispatch out of the inner loop. Extract DC/BR slice references
634 // and mode flags once so the hot loop body has no per-point Option checks.
635 // The empty slices are never indexed (guarded by has_dcbr / has_phot_src).
636 static EMPTY: [f64; 0] = [];
637 let (has_dcbr, use_cn, em_rates, neq_vals, dem_drho, dneq_drho, phot_src_vals): (
638 bool,
639 bool,
640 &[f64],
641 &[f64],
642 &[f64],
643 &[f64],
644 &[f64],
645 ) = if let Some(dc) = dcbr {
646 (
647 true,
648 dc.cn_dcbr,
649 dc.emission_rates,
650 dc.n_eq_minus_n_pl,
651 dc.dem_drho_eq,
652 dc.dneq_drho_eq,
653 dc.photon_source.unwrap_or(&EMPTY),
654 )
655 } else {
656 (false, false, &EMPTY, &EMPTY, &EMPTY, &EMPTY, &EMPTY)
657 };
658 let has_phot_src = !phot_src_vals.is_empty();
659 let has_rho_coupling = rho_coupling.is_some();
660
661 // Precompute w_j × em_j once per step (only for the bordered solve).
662 // em_rates and quad_weights_x3 are constant across Newton iterations;
663 // caching this product eliminates ~2× max_newton_iter redundant
664 // multiplies per grid point (h_dcbr pass + b'·u / b'·v pass).
665 if has_dcbr && has_rho_coupling {
666 for j in 0..ng {
667 ws.wem[j] = ws.quad_weights_x3[j] * em_rates[j];
668 }
669 }
670
671 // Newton iteration for the coupled system:
672 // Δn_new - Δn_old = dτ/2 (K_new(φ) + K_old(φ_old)) [CN for Kompaneets]
673 // + dτ · em · (neq - Δn_new) [BE for DC/BR]
674 let mut converged = false;
675 let mut last_max_delta: f64 = f64::NAN;
676 for _newton in 0..max_newton_iter {
677 // Note on θ_e time-centering (audit H5): in principle a fully time-
678 // centred CN-in-θ_e would refresh `inv_x2_dx_cell[i]` and
679 // `half_dtau_coeff[i]` with θ_e evaluated at the current ρ_e iterate
680 // each Newton pass. We do not do this. Refreshing changes the
681 // residual without updating `c_vec` to include the matching
682 // ∂(inv_x2_dx_cell)/∂ρ_e contribution, which breaks the bordered
683 // Newton Jacobian and can produce catastrophic non-convergence
684 // (observed: |Δρ/ρ| ~ 1 for post-recombination scenarios). Instead,
685 // the prefactor is held at θ_e_old for the whole step; φ = 1/ρ_e
686 // inside the flux is still iterated and the Jacobian is consistent.
687 // The time-centring error on θ_e is O(Δρ_e × θ_e) ~ 10⁻⁷ at z ~ 2×10⁶
688 // with |Δρ_e| < 10⁻³ — well below the CN truncation floor. Restoring
689 // a genuine CN in θ_e would require extending `c_vec` with the
690 // ∂(inv_x2_dx_cell)/∂ρ_e contribution.
691
692 // Boundary conditions: zero Kompaneets flux, but allow DC/BR relaxation.
693 for &bi in &[0_usize, ng - 1] {
694 let (dcbr_res, dcbr_jac) = if has_dcbr {
695 let em = em_rates[bi];
696 let neq = neq_vals[bi];
697 if use_cn {
698 let em_old = ws.dcbr_em_old[bi];
699 let neq_old = ws.dcbr_neq_old[bi];
700 let old_part = 0.5 * dtau * em_old * (neq_old - ws.dn_old[bi]);
701 (
702 old_part + 0.5 * dtau * em * (neq - delta_n[bi]),
703 0.5 * dtau * em,
704 )
705 } else {
706 (dtau * em * (neq - delta_n[bi]), dtau * em)
707 }
708 } else {
709 (0.0, 0.0)
710 };
711 let phot_src = if has_phot_src { phot_src_vals[bi] } else { 0.0 };
712 ws.rhs_buf[bi] = -(delta_n[bi] - ws.dn_old[bi] - dcbr_res - phot_src);
713 ws.j_diag[bi] = 1.0 + dcbr_jac;
714 ws.j_lower[bi] = 0.0;
715 ws.j_upper[bi] = 0.0;
716 }
717
718 // SAFETY: i ranges over 1..ng-1; all workspace/delta_n arrays have length >= ng,
719 // half-point arrays have length >= ng-1. DC/BR arrays (when has_dcbr) >= ng.
720 // All asserted at function entry.
721 for i in 1..ng - 1 {
722 unsafe {
723 let dn_im1 = *delta_n.get_unchecked(i - 1);
724 let dn_i = *delta_n.get_unchecked(i);
725 let dn_ip1 = *delta_n.get_unchecked(i + 1);
726 let dn_half_l = 0.5 * (dn_im1 + dn_i);
727 let dn_half_r = 0.5 * (dn_i + dn_ip1);
728
729 // Compute K(delta_n, φ) at interior point i (CN "new" part)
730 let ddn_dx_l = (dn_i - dn_im1) * *ws.inv_dx.get_unchecked(i - 1);
731 let ddn_dx_r = (dn_ip1 - dn_i) * *ws.inv_dx.get_unchecked(i);
732 let x4h_l = *ws.x4_half.get_unchecked(i - 1);
733 let x4h_r = *ws.x4_half.get_unchecked(i);
734 let f_l = x4h_l
735 * ((phi - 1.0) * *ws.np1_half.get_unchecked(i - 1)
736 + ddn_dx_l
737 + phi * *ws.twonp1_half.get_unchecked(i - 1) * dn_half_l
738 + phi * dn_half_l * dn_half_l);
739 let f_r = x4h_r
740 * ((phi - 1.0) * *ws.np1_half.get_unchecked(i)
741 + ddn_dx_r
742 + phi * *ws.twonp1_half.get_unchecked(i) * dn_half_r
743 + phi * dn_half_r * dn_half_r);
744 let inv_x2dc = *ws.inv_x2_dx_cell.get_unchecked(i);
745 let k_i = inv_x2dc * (f_r - f_l);
746
747 // DC/BR: backward Euler or Crank-Nicolson within the Newton iteration.
748 //
749 // `em_rates` and `neq_vals` are precomputed by the solver
750 // at the step-start ρ_eq and held fixed across Newton
751 // iterations. `dem_drho`/`dneq_drho` carry their analytical
752 // derivatives w.r.t. ρ_eq; we use them below to close the
753 // Δn-row Jacobian on ρ_e (c_vec). For CN mode the "old"
754 // half is frozen — its ρ_eq derivative does not contribute.
755 let (dcbr_residual, dcbr_jac, dcbr_crho) = if has_dcbr {
756 let em = *em_rates.get_unchecked(i);
757 let neq = *neq_vals.get_unchecked(i);
758 let dem = *dem_drho.get_unchecked(i);
759 let dneq = *dneq_drho.get_unchecked(i);
760 // d(dcbr_residual)/d(ρ_eq) from the "new" half, with BE
761 // weight 1 and CN weight 0.5.
762 let new_crho = -dtau * (dem * (neq - dn_i) + em * dneq);
763 if use_cn {
764 let em_old = *ws.dcbr_em_old.get_unchecked(i);
765 let neq_old = *ws.dcbr_neq_old.get_unchecked(i);
766 let old_part =
767 0.5 * dtau * em_old * (neq_old - *ws.dn_old.get_unchecked(i));
768 let new_res = 0.5 * dtau * em * (neq - dn_i);
769 (old_part + new_res, 0.5 * dtau * em, 0.5 * new_crho)
770 } else {
771 (dtau * em * (neq - dn_i), dtau * em, new_crho)
772 }
773 } else {
774 (0.0, 0.0, 0.0)
775 };
776
777 // Residual: CN for Kompaneets + BE for DC/BR + integrated
778 // source over the step. When `photon_source` is provided
779 // through `DcbrCoupling`, S_i enters as a known constant
780 // (no Jacobian contribution), and the Kompaneets CN "old"
781 // flux is computed from the un-injected Δn_old — so the
782 // source interacts with DC/BR absorption and Comptonisation
783 // over the full step rather than being treated as an
784 // impulse at t_old.
785 let phot_src = if has_phot_src {
786 *phot_src_vals.get_unchecked(i)
787 } else {
788 0.0
789 };
790 let residual_i = dn_i
791 - *ws.dn_old.get_unchecked(i)
792 - 0.5 * dtau * (k_i + *ws.k_old.get_unchecked(i))
793 - dcbr_residual
794 - phot_src;
795 *ws.rhs_buf.get_unchecked_mut(i) = -residual_i;
796
797 // Jacobian: CN (halved) for Kompaneets + BE diagonal for DC/BR
798 let hdc = *ws.half_dtau_coeff.get_unchecked(i);
799 let n_l = *ws.n_pl_half.get_unchecked(i - 1) + dn_half_l;
800 let n_r = *ws.n_pl_half.get_unchecked(i) + dn_half_r;
801 let a_l = *ws.x4_over_dx.get_unchecked(i - 1);
802 let b_l = x4h_l * phi * (2.0 * n_l + 1.0) * 0.5;
803 let a_r = *ws.x4_over_dx.get_unchecked(i);
804 let b_r = x4h_r * phi * (2.0 * n_r + 1.0) * 0.5;
805
806 *ws.j_lower.get_unchecked_mut(i) = -hdc * (a_l - b_l);
807 *ws.j_diag.get_unchecked_mut(i) = 1.0 - hdc * (-a_r + b_r - a_l - b_l) + dcbr_jac;
808 *ws.j_upper.get_unchecked_mut(i) = -hdc * (a_r + b_r);
809
810 // Column vector c_i = dR_i/dρ_e for bordered system.
811 // Two contributions:
812 // (a) Kompaneets flux: ∂F/∂ρ_e through the n_pl(1+n_pl)/ρ²
813 // piece (analytical; already in the Planck-subtracted
814 // split form used here).
815 // (b) DC/BR residual: ∂(dcbr_residual)/∂ρ_eq, carried in
816 // `dcbr_crho` above and plumbed via `dem_drho` /
817 // `dneq_drho`. Closing this piece restores formal
818 // quadratic Newton convergence in the ρ_e direction
819 // when DC/BR is strong (high z, photon-injection burst).
820 if has_rho_coupling {
821 let inv_rho2 = 1.0 / (rho_e * rho_e);
822 let dfdr_l = -x4h_l * inv_rho2 * n_l * (1.0 + n_l);
823 let dfdr_r = -x4h_r * inv_rho2 * n_r * (1.0 + n_r);
824 let c_kompaneets = -0.5 * dtau * inv_x2dc * (dfdr_r - dfdr_l);
825 *ws.c_vec.get_unchecked_mut(i) = c_kompaneets + dcbr_crho;
826 }
827 }
828 }
829
830 if let Some(rc) = rho_coupling {
831 // Bordered solve: (T, c; b', d) × (δΔn; δρ_e) = (r; r_ρ)
832 ws.c_vec[0] = 0.0;
833 ws.c_vec[ng - 1] = 0.0;
834
835 // Compute H_dcbr = h_norm × Σ wem_j × (neq_j − Δn_j),
836 // where wem_j = w_j × em_j was precomputed before the Newton loop.
837 let mut h_dcbr = 0.0;
838 if has_dcbr {
839 for j in 0..ng {
840 h_dcbr += ws.wem[j] * (neq_vals[j] - delta_n[j]);
841 }
842 h_dcbr *= rc.h_norm;
843 }
844
845 // ρ_e residual: dρ_e/dτ = R[(source − H_dcbr) − ρ_e] − H·t_C·ρ_e
846 // r_ρ = -(ρ_e − ρ_e_old) + dτ·{R·(source − h_dcbr − ρ_e) − H·t_C·ρ_e}
847 let rhs_rho = -(rho_e - rc.rho_e_old)
848 + dtau * (rc.r_compton * (rc.rho_source - h_dcbr - rho_e) - rc.lambda_exp * rho_e);
849
850 // d = dR_ρ/dρ_e = 1 + dτ·(R·(1 + dH/dρ_e) + H·t_C)
851 let d_rho = 1.0 + dtau * (rc.r_compton * (1.0 + rc.dh_drho) + rc.lambda_exp);
852
853 // Step 1: T·u = r (standard Thomas, result in rhs_buf)
854 thomas_solve_inplace(
855 &ws.j_lower,
856 &ws.j_diag,
857 &ws.j_upper,
858 &mut ws.rhs_buf,
859 &mut ws.thomas_work,
860 );
861
862 // Step 2: T·v = c (Thomas on c_vec, result in v_buf)
863 ws.v_buf[..ng].copy_from_slice(&ws.c_vec[..ng]);
864 thomas_solve_inplace(
865 &ws.j_lower,
866 &ws.j_diag,
867 &ws.j_upper,
868 &mut ws.v_buf,
869 &mut ws.thomas_work2,
870 );
871
872 // Step 3: b'·u and b'·v dot products.
873 // b'_j = ∂R_ρ/∂Δn_j = −dτ · R · h_norm · wem_j.
874 // Sign: h_dcbr = h_norm·Σ wem·(neq − Δn) ⇒ ∂h_dcbr/∂Δn_j = −h_norm·wem_j.
875 // R_ρ = (ρ − ρ_old) − dτ·[R·(source − h_dcbr − ρ) − Λ·ρ], so
876 // ∂R_ρ/∂Δn_j = −dτ·R·(−∂h_dcbr/∂Δn_j) = −dτ·R·h_norm·wem_j.
877 // Pull the scalar (−dτ·R·h_norm) out of the loop and apply once
878 // after the reductions — saves ng muls and keeps the inner loop
879 // a clean fused-multiply-add pair that LLVM can vectorise.
880 let mut bp_dot_u = 0.0;
881 let mut bp_dot_v = 0.0;
882 if has_dcbr {
883 for j in 0..ng {
884 let wem_j = ws.wem[j];
885 bp_dot_u += wem_j * ws.rhs_buf[j];
886 bp_dot_v += wem_j * ws.v_buf[j];
887 }
888 let scale = -dtau * rc.r_compton * rc.h_norm;
889 bp_dot_u *= scale;
890 bp_dot_v *= scale;
891 }
892
893 // Step 4: δρ_e = (r_ρ − b'·u) / (d − b'·v)
894 //
895 // If the bordered-system determinant collapses the problem is
896 // degenerate (no unique ρ_e correction). We signal this by
897 // producing a NaN so the outer Newton loop breaks immediately
898 // and the (false, _, NaN) return propagates non-convergence —
899 // masking the failure as `delta_rho = 0` would silently report
900 // convergence at max_delta below tol.
901 let denom = d_rho - bp_dot_v;
902 let delta_rho = if denom.abs() > 1e-30 {
903 (rhs_rho - bp_dot_u) / denom
904 } else {
905 f64::NAN
906 };
907
908 // Step 5: δΔn = u − v·δρ_e
909 let mut max_delta: f64 = 0.0;
910 for j in 0..ng {
911 let correction = ws.rhs_buf[j] - ws.v_buf[j] * delta_rho;
912 delta_n[j] += correction;
913 let d = correction.abs();
914 if d > max_delta {
915 max_delta = d;
916 }
917 }
918 rho_e += delta_rho;
919 phi = 1.0 / rho_e;
920
921 let rho_delta = delta_rho.abs();
922 if rho_delta > max_delta {
923 max_delta = rho_delta;
924 }
925 last_max_delta = max_delta;
926
927 if !max_delta.is_finite() {
928 break;
929 }
930 let tol = 1e-8 * max_dn_abs + 1e-14;
931 if max_delta < tol {
932 converged = true;
933 break;
934 }
935 } else {
936 // Standard tridiagonal solve (no ρ_e coupling)
937 thomas_solve_inplace(
938 &ws.j_lower,
939 &ws.j_diag,
940 &ws.j_upper,
941 &mut ws.rhs_buf,
942 &mut ws.thomas_work,
943 );
944
945 let mut max_delta: f64 = 0.0;
946 for i in 0..ng {
947 delta_n[i] += ws.rhs_buf[i];
948 let d = ws.rhs_buf[i].abs();
949 if d > max_delta {
950 max_delta = d;
951 }
952 }
953 last_max_delta = max_delta;
954
955 if !max_delta.is_finite() {
956 break;
957 }
958 let tol = 1e-8 * max_dn_abs + 1e-14;
959 if max_delta < tol {
960 converged = true;
961 break;
962 }
963 }
964 }
965 (converged, rho_e, last_max_delta)
966}
967
968#[cfg(test)]
969mod tests {
970 use super::*;
971 use crate::grid::FrequencyGrid;
972
973 #[test]
974 fn test_kompaneets_preserves_planck() {
975 // If Δn = 0 and T_e = T_z, Kompaneets should not generate distortion
976 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
977 let delta_n = vec![0.0; grid.n];
978 let theta_e = 1e-6; // some temperature
979 let theta_z = theta_e; // equilibrium
980
981 let result = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.01);
982
983 let max_dn: f64 = result
984 .iter()
985 .map(|x| x.abs())
986 .fold(0.0, |a, b| if b.is_nan() { f64::NAN } else { a.max(b) });
987 assert!(
988 max_dn < 1e-12,
989 "Kompaneets should preserve Planck: max|Δn| = {max_dn}"
990 );
991 }
992
993 #[test]
994 fn test_kompaneets_photon_number_conservation() {
995 // Kompaneets scattering conserves photon number: ∫x² dn/dτ dx = 0
996 // Start with a small perturbation and evolve
997 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 1000);
998 let theta_e = 1e-6;
999 let theta_z = theta_e;
1000
1001 // Small Gaussian perturbation centered at x=3
1002 let mut delta_n: Vec<f64> = grid
1003 .x
1004 .iter()
1005 .map(|&x| 1e-4 * (-(x - 3.0).powi(2) / 0.5).exp())
1006 .collect();
1007
1008 let n_before: f64 = (1..grid.n)
1009 .map(|i| {
1010 let dx = grid.dx[i - 1];
1011 let x_mid = grid.x_half[i - 1];
1012 let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1013 x_mid * x_mid * dn_mid * dx
1014 })
1015 .sum();
1016
1017 // Evolve for several steps
1018 for _ in 0..10 {
1019 delta_n = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.001);
1020 }
1021
1022 let n_after: f64 = (1..grid.n)
1023 .map(|i| {
1024 let dx = grid.dx[i - 1];
1025 let x_mid = grid.x_half[i - 1];
1026 let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1027 x_mid * x_mid * dn_mid * dx
1028 })
1029 .sum();
1030
1031 let rel_change = (n_after - n_before).abs() / n_before.abs().max(1e-20);
1032 // Kompaneets is a number-conserving operator — boundary leakage
1033 // should be tiny on a grid extending to x_max=30
1034 assert!(
1035 rel_change < 1e-4,
1036 "Photon number not conserved: ΔN/N = {rel_change:.2e} (threshold 1e-4), \
1037 before={n_before:.4e}, after={n_after:.4e}"
1038 );
1039 }
1040
1041 #[test]
1042 fn test_kompaneets_energy_conservation() {
1043 // Kompaneets scattering with T_e = T_z should conserve energy: ∫x³ Δn dx = 0.
1044 // Energy is only redistributed (not created) when there's no T_e−T_z offset.
1045 // Start with a Gaussian perturbation and evolve with T_e = T_z.
1046 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 1000);
1047 let theta = 1e-6;
1048
1049 // Gaussian perturbation at x=5
1050 let mut delta_n: Vec<f64> = grid
1051 .x
1052 .iter()
1053 .map(|&x| 1e-4 * (-(x - 5.0).powi(2) / 2.0).exp())
1054 .collect();
1055
1056 let energy_before: f64 = (1..grid.n)
1057 .map(|i| {
1058 let dx = grid.dx[i - 1];
1059 let x_mid = grid.x_half[i - 1];
1060 let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1061 x_mid.powi(3) * dn_mid * dx
1062 })
1063 .sum();
1064
1065 for _ in 0..10 {
1066 delta_n = kompaneets_step(&grid, &delta_n, theta, theta, 0.001);
1067 }
1068
1069 let energy_after: f64 = (1..grid.n)
1070 .map(|i| {
1071 let dx = grid.dx[i - 1];
1072 let x_mid = grid.x_half[i - 1];
1073 let dn_mid = 0.5 * (delta_n[i] + delta_n[i - 1]);
1074 x_mid.powi(3) * dn_mid * dx
1075 })
1076 .sum();
1077
1078 // With T_e = T_z, pure Kompaneets conserves energy to first order.
1079 // The nonlinear Δn² term introduces O(Δn²) energy change, but for
1080 // Δn ~ 1e-4 the correction is O(1e-8), far below 1e-4.
1081 let rel_change = (energy_after - energy_before).abs() / energy_before.abs().max(1e-20);
1082 assert!(
1083 rel_change < 1e-4,
1084 "Kompaneets energy not conserved with T_e=T_z: ΔE/E = {rel_change:.2e}, \
1085 before={energy_before:.4e}, after={energy_after:.4e}"
1086 );
1087 }
1088
1089 /// Quantitative Kompaneets check in the y-regime: injecting energy via
1090 /// T_e > T_z for one Thomson time should produce Δρ/ρ ≈ 4·y·G₃ where
1091 /// y = (θ_e − θ_z)·dτ is the standard y-parameter. This catches
1092 /// order-of-magnitude errors in the flux split, grid geometry, or time
1093 /// centring that the sign-only `test_kompaneets_te_gt_tz_positive_drho_all_solvers`
1094 /// would miss (audit M2).
1095 #[test]
1096 fn test_kompaneets_y_distortion_magnitude() {
1097 // Deep y-era, linear regime: small δρ_e, small dτ, no DC/BR, no T_e
1098 // evolution. δρ_e = 1e-3 keeps us well inside the Taylor branch where
1099 // the analytical Δρ/ρ = 4y expression is accurate.
1100 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 2000);
1101 let delta_n = vec![0.0; grid.n];
1102 let theta_z = 1e-6;
1103 let delta_rho_e = 1e-3_f64;
1104 let theta_e = theta_z * (1.0 + delta_rho_e);
1105 let dtau = 1.0_f64;
1106
1107 // Expected: pure y-distortion with y = (θ_e − θ_z)·dτ and Δρ/ρ = 4y.
1108 let y_expected = (theta_e - theta_z) * dtau;
1109 let drho_expected = 4.0 * y_expected;
1110
1111 // Solve.
1112 let result = kompaneets_step_nonlinear(&grid, &delta_n, theta_e, theta_z, dtau);
1113 let drho = crate::spectrum::delta_rho_over_rho(&grid.x, &result);
1114
1115 let rel_err = (drho - drho_expected).abs() / drho_expected.abs();
1116 eprintln!(
1117 "Kompaneets y-magnitude: drho={drho:.4e}, expected={drho_expected:.4e}, \
1118 rel_err={rel_err:.2e}"
1119 );
1120 // 5% tolerance allows for finite grid resolution, the small Δn² term,
1121 // and truncation in the integration stencil. A factor-of-10⁶ bug in
1122 // the flux split (the class this test is meant to catch) would show
1123 // up as rel_err ≫ 1.
1124 assert!(
1125 rel_err < 0.05,
1126 "Kompaneets y-magnitude off: drho={drho:.4e}, expected={drho_expected:.4e}, \
1127 rel_err={rel_err:.2e}"
1128 );
1129 }
1130
1131 /// When T_e > T_z, all Kompaneets solver variants must produce Δρ/ρ > 0
1132 /// (energy flows from electrons to photons via upscattering).
1133 /// Tests CN (kompaneets_step), backward Euler (kompaneets_tridiagonal + Thomas),
1134 /// and nonlinear (kompaneets_step_nonlinear) in a single test.
1135 #[test]
1136 fn test_kompaneets_te_gt_tz_positive_drho_all_solvers() {
1137 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 2000);
1138 let delta_n = vec![0.0; grid.n];
1139 let theta_z = 1e-4;
1140 let theta_e = theta_z * 1.001;
1141
1142 // 1. Crank-Nicolson (kompaneets_step)
1143 let result_cn = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.1);
1144 let drho_cn = crate::spectrum::delta_rho_over_rho(&grid.x, &result_cn);
1145 assert!(drho_cn > 0.0, "CN: Δρ/ρ > 0 expected, got {drho_cn:.4e}");
1146
1147 // 2. Backward Euler (kompaneets_tridiagonal + Thomas solve)
1148 let (lower, diag, upper, source) = kompaneets_tridiagonal(&grid, theta_e, theta_z);
1149 let dtau = 1.0;
1150 let n = grid.n;
1151 let mut rhs: Vec<f64> = (0..n).map(|i| dtau * source[i]).collect();
1152 rhs[0] = 0.0;
1153 rhs[n - 1] = 0.0;
1154 let lhs_lower: Vec<f64> = lower.iter().map(|&a| -dtau * a).collect();
1155 let lhs_diag: Vec<f64> = diag
1156 .iter()
1157 .enumerate()
1158 .map(|(i, &b)| {
1159 if i == 0 || i == n - 1 {
1160 1.0
1161 } else {
1162 1.0 - dtau * b
1163 }
1164 })
1165 .collect();
1166 let lhs_upper: Vec<f64> = upper.iter().map(|&c| -dtau * c).collect();
1167 let result_be = thomas_solve(&lhs_lower, &lhs_diag, &lhs_upper, &mut rhs);
1168 let drho_be = crate::spectrum::delta_rho_over_rho(&grid.x, &result_be);
1169 assert!(drho_be > 0.0, "BE: Δρ/ρ > 0 expected, got {drho_be:.4e}");
1170
1171 // 3. Nonlinear solver (kompaneets_step_nonlinear)
1172 let result_nl = kompaneets_step_nonlinear(&grid, &delta_n, theta_e, theta_z, 1.0);
1173 let drho_nl = crate::spectrum::delta_rho_over_rho(&grid.x, &result_nl);
1174 assert!(drho_nl > 0.0, "NL: Δρ/ρ > 0 expected, got {drho_nl:.4e}");
1175
1176 eprintln!(
1177 "Source sign (all solvers): CN={drho_cn:.4e}, BE={drho_be:.4e}, NL={drho_nl:.4e}"
1178 );
1179 }
1180
1181 /// Guards the analytic Planck cancellation in `kompaneets_rhs` (CLAUDE.md
1182 /// pitfall #1). The flux is written as
1183 /// F = x⁴[(φ−1) n_pl(1+n_pl) + dΔn/dx + φ(2n_pl+1)Δn + φΔn²]
1184 /// so that with Δn = 0 and T_e = T_z (⇒ φ = 1), every term is identically
1185 /// zero BEFORE any finite differences touch n_pl. A naive flux that kept
1186 /// dn_pl/dx and +φ n_pl(1+n_pl) as separate pieces would leak O(dx²) ~ 10⁻⁴
1187 /// per point after the divergence, amplified by θ_e/x² at small x —
1188 /// ~1000× the physical y-signal ~10⁻⁵.
1189 ///
1190 /// Two probes:
1191 /// (a) Δn = 0, T_e = T_z → rhs = 0 to machine precision.
1192 /// (b) Δn = 0, T_e = T_z·(1+ε), ε = 1e-8 → rhs scales linearly with ε
1193 /// (the (φ−1) source is the ONLY nonzero piece).
1194 #[test]
1195 fn test_kompaneets_rhs_planck_cancellation() {
1196 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1197 let delta_n = vec![0.0; grid.n];
1198 let theta = 1e-6;
1199
1200 // (a) Exact cancellation at T_e = T_z.
1201 let rhs_eq = kompaneets_rhs(&grid, &delta_n, theta, theta);
1202 let max_abs_eq: f64 = rhs_eq
1203 .iter()
1204 .copied()
1205 .filter(|v| v.is_finite())
1206 .fold(0.0, |a, b| a.max(b.abs()));
1207 assert!(
1208 max_abs_eq < 1e-20,
1209 "Planck + T_e=T_z must give rhs=0 to machine precision: max|rhs| = {max_abs_eq:.2e}. \
1210 Any value > ~1e-15 means the flux is using a finite-difference form of dn_pl/dx \
1211 instead of the analytic identity (CLAUDE.md pitfall #1)."
1212 );
1213
1214 // (b) Linearity in (φ-1): halving ε halves max|rhs| to relative 1e-6.
1215 // If an O(dx²) Planck leak were present, rhs wouldn't scale with ε and
1216 // this ratio would stay near 1.
1217 let eps_big: f64 = 1e-6;
1218 let eps_small: f64 = 1e-8;
1219 let rhs_big = kompaneets_rhs(&grid, &delta_n, theta, theta * (1.0 + eps_big));
1220 let rhs_small = kompaneets_rhs(&grid, &delta_n, theta, theta * (1.0 + eps_small));
1221 let max_big: f64 = rhs_big
1222 .iter()
1223 .copied()
1224 .filter(|v| v.is_finite())
1225 .fold(0.0, |a, b| a.max(b.abs()));
1226 let max_small: f64 = rhs_small
1227 .iter()
1228 .copied()
1229 .filter(|v| v.is_finite())
1230 .fold(0.0, |a, b| a.max(b.abs()));
1231 let expected_ratio = eps_big / eps_small; // 100
1232 let actual_ratio = max_big / max_small;
1233 assert!(
1234 (actual_ratio / expected_ratio - 1.0).abs() < 1e-3,
1235 "rhs must scale ∝ (φ-1): max_big/max_small = {actual_ratio:.4} vs expected {expected_ratio:.4}. \
1236 Deviation > 0.1% means the RHS has a (φ-1)-independent residual (Planck leakage)."
1237 );
1238 }
1239
1240 #[test]
1241 fn test_nonlinear_preserves_planck() {
1242 // Nonlinear solver on Δn should preserve zero when T_e = T_z
1243 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1244 let delta_n = vec![0.0; grid.n];
1245 let theta = 1e-6;
1246
1247 let result = kompaneets_step_nonlinear(&grid, &delta_n, theta, theta, 0.01);
1248
1249 let max_err: f64 = result
1250 .iter()
1251 .map(|x| x.abs())
1252 .fold(0.0, |a, b| if b.is_nan() { f64::NAN } else { a.max(b) });
1253 eprintln!("Nonlinear preserves Planck: max|Δn| = {max_err:.4e}");
1254 assert!(max_err < 1e-12, "max|Δn| = {max_err}");
1255 }
1256
1257 #[test]
1258 fn test_thomas_solve_inplace_2x2() {
1259 // [[2, -1], [-1, 2]] x = [1, 1] → x = [1, 1]
1260 let lower = vec![0.0, -1.0];
1261 let diag = vec![2.0, 2.0];
1262 let upper = vec![-1.0, 0.0];
1263 let mut rhs = vec![1.0, 1.0];
1264 let mut work = vec![0.0; 2];
1265 thomas_solve_inplace(&lower, &diag, &upper, &mut rhs, &mut work);
1266 assert!(
1267 (rhs[0] - 1.0).abs() < 1e-14,
1268 "x[0] = {}, expected 1.0",
1269 rhs[0]
1270 );
1271 assert!(
1272 (rhs[1] - 1.0).abs() < 1e-14,
1273 "x[1] = {}, expected 1.0",
1274 rhs[1]
1275 );
1276 }
1277
1278 #[test]
1279 fn test_thomas_solve_inplace_3x3() {
1280 // Classic tridiagonal: [2,-1,0; -1,2,-1; 0,-1,2] x = [1, 0, 1]
1281 // Known solution: x = [1, 1, 1]
1282 let lower = vec![0.0, -1.0, -1.0];
1283 let diag = vec![2.0, 2.0, 2.0];
1284 let upper = vec![-1.0, -1.0, 0.0];
1285 let mut rhs = vec![1.0, 0.0, 1.0];
1286 let mut work = vec![0.0; 3];
1287 thomas_solve_inplace(&lower, &diag, &upper, &mut rhs, &mut work);
1288 assert!(
1289 (rhs[0] - 1.0).abs() < 1e-14,
1290 "x[0] = {}, expected 1.0",
1291 rhs[0]
1292 );
1293 assert!(
1294 (rhs[1] - 1.0).abs() < 1e-14,
1295 "x[1] = {}, expected 1.0",
1296 rhs[1]
1297 );
1298 assert!(
1299 (rhs[2] - 1.0).abs() < 1e-14,
1300 "x[2] = {}, expected 1.0",
1301 rhs[2]
1302 );
1303 }
1304
1305 #[test]
1306 fn test_thomas_solve_inplace_identity() {
1307 // Diagonal matrix (no off-diags): solution = rhs / diag
1308 let lower = vec![0.0, 0.0, 0.0, 0.0];
1309 let diag = vec![3.0, 5.0, 2.0, 7.0];
1310 let upper = vec![0.0, 0.0, 0.0, 0.0];
1311 let mut rhs = vec![9.0, 10.0, 6.0, 21.0];
1312 let mut work = vec![0.0; 4];
1313 thomas_solve_inplace(&lower, &diag, &upper, &mut rhs, &mut work);
1314 let expected = [3.0, 2.0, 3.0, 3.0];
1315 for i in 0..4 {
1316 assert!(
1317 (rhs[i] - expected[i]).abs() < 1e-14,
1318 "x[{i}] = {}, expected {}",
1319 rhs[i],
1320 expected[i]
1321 );
1322 }
1323 }
1324
1325 #[test]
1326 fn test_coupled_inplace_preserves_planck() {
1327 // The production solver kompaneets_step_coupled_inplace should
1328 // preserve a Planck spectrum when T_e = T_z and no DC/BR coupling.
1329 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1330 let mut delta_n = vec![0.0; grid.n];
1331 let theta = 1e-6;
1332 let dtau = 0.01;
1333 let mut ws = KompaneetsWorkspace::new(&grid);
1334
1335 // Returns (converged, rho_e, last_max_delta)
1336 let (converged, rho_e, last_delta) = kompaneets_step_coupled_inplace(
1337 &grid,
1338 &mut delta_n,
1339 theta,
1340 theta,
1341 dtau,
1342 None,
1343 None,
1344 &mut ws,
1345 0.0,
1346 20,
1347 );
1348
1349 assert!(converged, "Newton should converge for zero distortion");
1350 // rho_e should be 1.0 (T_e = T_z)
1351 assert!(
1352 (rho_e - 1.0).abs() < 1e-10,
1353 "rho_e should be 1.0 at equilibrium: {rho_e}"
1354 );
1355 // last Newton correction should be tiny
1356 assert!(
1357 last_delta < 1e-12,
1358 "Newton correction should be tiny: {last_delta:.4e}"
1359 );
1360
1361 let max_dn: f64 = delta_n
1362 .iter()
1363 .map(|x| x.abs())
1364 .fold(0.0, |a, b| if b.is_nan() { f64::NAN } else { a.max(b) });
1365 assert!(
1366 max_dn < 1e-14,
1367 "Should preserve Planck: max|Δn| = {max_dn:.4e}"
1368 );
1369 }
1370
1371 #[test]
1372 fn test_coupled_inplace_with_dcbr() {
1373 // Test the coupled solver with DC/BR coupling active.
1374 // With T_e = T_z and Δn = 0, DC/BR are at detailed balance.
1375 let grid = FrequencyGrid::log_uniform(1e-3, 30.0, 500);
1376 let mut delta_n = vec![0.0; grid.n];
1377 let theta = 1e-4;
1378 let dtau = 0.01;
1379 let mut ws = KompaneetsWorkspace::new(&grid);
1380
1381 // At equilibrium (ρ_e=1), n_eq = 0 and emission rates are finite but irrelevant
1382 let emission_rates: Vec<f64> = grid
1383 .x
1384 .iter()
1385 .map(|&x| {
1386 let k_dc = crate::double_compton::dc_emission_coefficient(x, theta);
1387 let x_e: f64 = 1.0;
1388 (k_dc / x.powi(3)) * (x_e.exp() - 1.0)
1389 })
1390 .collect();
1391 let n_eq: Vec<f64> = vec![0.0; grid.n];
1392
1393 let dem = vec![0.0; grid.n];
1394 let dneq = vec![0.0; grid.n];
1395 let dcbr = DcbrCoupling {
1396 emission_rates: &emission_rates,
1397 n_eq_minus_n_pl: &n_eq,
1398 dem_drho_eq: &dem,
1399 dneq_drho_eq: &dneq,
1400 photon_source: None,
1401 cn_dcbr: false,
1402 };
1403
1404 let (converged, rho_e, last_delta) = kompaneets_step_coupled_inplace(
1405 &grid,
1406 &mut delta_n,
1407 theta,
1408 theta,
1409 dtau,
1410 Some(&dcbr),
1411 None,
1412 &mut ws,
1413 0.0,
1414 20,
1415 );
1416
1417 assert!(
1418 converged,
1419 "Newton should converge with DC/BR at equilibrium"
1420 );
1421 assert!((rho_e - 1.0).abs() < 1e-10, "rho_e should be 1.0: {rho_e}");
1422 assert!(
1423 last_delta < 1e-10,
1424 "Newton correction should be small: {last_delta:.4e}"
1425 );
1426 }
1427
1428 /// Verify that a small temperature perturbation produces a Y_SZ spectral shape.
1429 ///
1430 /// For T_e slightly > T_z, the Kompaneets equation produces Δn ∝ Y_SZ(x).
1431 /// This is the defining property of the y-distortion. The Pearson correlation
1432 /// between the resulting Δn and Y_SZ(x) should be > 0.95.
1433 #[test]
1434 fn test_kompaneets_yields_ysz_shape() {
1435 let grid = FrequencyGrid::log_uniform(0.1, 25.0, 500);
1436 let delta_n = vec![0.0; grid.n];
1437 let theta_z = 1e-6;
1438 let theta_e = 1.001 * theta_z; // small perturbation: T_e = 1.001 T_z
1439
1440 // Run one small step
1441 let result = kompaneets_step(&grid, &delta_n, theta_e, theta_z, 0.1);
1442
1443 // Compute Y_SZ(x) at grid points
1444 let y_shape: Vec<f64> = grid
1445 .x
1446 .iter()
1447 .map(|&x| crate::spectrum::y_shape(x))
1448 .collect();
1449
1450 // Pearson correlation between result and Y_SZ
1451 let n = result.len();
1452 let mean_r: f64 = result.iter().sum::<f64>() / n as f64;
1453 let mean_y: f64 = y_shape.iter().sum::<f64>() / n as f64;
1454
1455 let mut cov = 0.0;
1456 let mut var_r = 0.0;
1457 let mut var_y = 0.0;
1458 for i in 0..n {
1459 let dr = result[i] - mean_r;
1460 let dy = y_shape[i] - mean_y;
1461 cov += dr * dy;
1462 var_r += dr * dr;
1463 var_y += dy * dy;
1464 }
1465 let corr = cov / (var_r.sqrt() * var_y.sqrt());
1466 eprintln!("Kompaneets Y_SZ correlation: {corr:.6}");
1467
1468 assert!(
1469 corr > 0.95,
1470 "Kompaneets step with T_e > T_z should produce Y_SZ shape: correlation = {corr:.4}"
1471 );
1472 }
1473}