!> @file spatial_discretization.f90 !> @brief Spatial residual computation via flux splitting and reconstruction. module spatial_discretization use precision, only: wp use option_registry, only: hybrid_sensor_jameson, hybrid_sensor_density_gradient, & hybrid_sensor_weno_beta implicit none private public :: compute_resid contains ! --------------------------------------------------------------------------- !> Compute the spatial residual R(Q) = -1/dx * (F_{i+1/2} - F_{i-1/2}). !! !! Two code paths are supported, selected by state%use_fds: !! !! **FVS path** (Lax-Friedrichs, Steger-Warming, van Leer): !! 1. Computes F^+(Q_j) and F^-(Q_j) at all grid points. !! 2. Averages adjacent states to get K and K^{-1} at the interface. !! 3. Assembles the stencil for both F^+ (left-biased) !! and F^- (right-biased), applying ghost states at boundaries. !! 4. Calls the active reconstruction scheme for each split flux. !! 5. Sums the contributions: F_hat = F_hat^+ + F_hat^-. !! !! **FDS path** (AUSM+, HLL, HLLC, Roe): !! 1. Reconstructs Q_L and Q_R at each face from cell Q values. !! 2. Calls the active FDS solver: F_hat = fds_solver(Q_L, Q_R, gam). !! !! See course notes, Sec. "Finite Difference WENO Scheme", Eq. (28)-(30). !! !! @param[inout] state Solver instance state. ! --------------------------------------------------------------------------- subroutine compute_resid(state) use solver_state, only: solver_state_t, neq use euler_physics, only: compute_eigensystem use reconstruction, only: max_stencil_width use boundary_conditions, only: apply_bcs use positivity_limiter, only: limit_positivity use timer, only: timer_start, timer_stop type(solver_state_t), intent(inout) :: state integer :: i, iface real(wp) :: flux_pos(neq), flux_neg(neq) real(wp) :: r_mat(neq, neq), r_inv(neq, neq), q_avg(neq) real(wp) :: f_stencil(neq, max_stencil_width) real(wp) :: q_stencil(neq, max_stencil_width) real(wp) :: q_face_L(neq), q_face_R(neq) integer :: stencil_width real(wp) :: sensor_val logical :: is_smooth stencil_width = state % stencil_width if (state % cfg % do_timing) call timer_start(state % perf % resid) ! Update ghost-state vectors from the current solution and BC types. call apply_bcs(state) if (state % use_fds) then ! ----------------------------------------------------------------------- ! FDS path (AUSM+, HLL, HLLC, Roe): reconstruct Q_L and Q_R at each face. ! ----------------------------------------------------------------------- if (state % cfg % do_timing) call timer_start(state % perf % faceloop) face_loop_ausm: do iface = 1, state % n_pt + 1 ! Left-biased stencil of Q for Q_L (same index pattern as F^+ stencil). call fill_stencil(state % ub, state % n_pt, iface + state % stencil_start_offset, 1, & & state % q_left, state % q_right, state % is_periodic, q_stencil(:, 1:stencil_width)) ! Hybrid sensor: classify face as smooth or non-smooth. ! Computed once per face; applied to both Q_L and Q_R reconstructions. if (state % cfg % use_hybrid_recon) then sensor_val = eval_face_sensor(state, iface, q_stencil(:, 1:stencil_width)) is_smooth = (sensor_val <= state % cfg % hybrid_sensor_threshold) else is_smooth = .false. end if if (is_smooth) then call state % smooth_reconstruct(q_stencil(:, 1:stencil_width), q_face_L) else call state % reconstruct(q_stencil(:, 1:stencil_width), q_face_L) end if ! Zhang-Shu limiter: scale Q_L toward cell average to ensure ρ,p ≥ ε. ! Skipped at the left boundary (iface=1) where q_face_L comes from the ! ghost state set explicitly by boundary conditions. if (state % cfg % use_positivity_limiter .and. iface > 1) & call limit_positivity(q_face_L, state % ub(:, iface - 1), state % cfg % gam) ! Right-biased (reversed) stencil of Q for Q_R. call fill_stencil(state % ub, state % n_pt, iface - state % stencil_start_offset - 1, -1, & & state % q_left, state % q_right, state % is_periodic, q_stencil(:, 1:stencil_width)) if (is_smooth) then call state % smooth_reconstruct(q_stencil(:, 1:stencil_width), q_face_R) else call state % reconstruct(q_stencil(:, 1:stencil_width), q_face_R) end if ! Zhang-Shu limiter for Q_R; skipped at the right boundary (iface=n_pt+1). if (state % cfg % use_positivity_limiter .and. iface <= state % n_pt) & call limit_positivity(q_face_R, state % ub(:, iface), state % cfg % gam) call state % fds_solver(q_face_L, q_face_R, state % num_flux(:, iface), state % cfg % gam) end do face_loop_ausm if (state % cfg % do_timing) call timer_stop(state % perf % faceloop) else ! ----------------------------------------------------------------------- ! FVS path: split fluxes at cells, reconstruct, sum at faces. ! ----------------------------------------------------------------------- ! Compute split fluxes F^+ and F^- at every grid point. ! Use the fused split_both routine (one primitive extraction per cell) ! rather than two separate flux_split calls. if (state % cfg % do_timing) call timer_start(state % perf % fluxsplit) do i = 1, state % n_pt ! Guard: FVS split routines call sqrt(γp/ρ); detect a vacuum or ! non-physical pressure before that to produce a clear error rather ! than propagating NaN through the residual. block real(wp) :: rho_i, p_i rho_i = state % ub(1, i) if (rho_i <= 0.0_wp) & error stop 'compute_resid: non-positive density in FVS precompute' p_i = (state % ub(3, i) - 0.5_wp * state % ub(2, i)**2 / rho_i) & * (state % cfg % gam - 1.0_wp) if (p_i <= 0.0_wp) & error stop 'compute_resid: non-positive pressure in FVS precompute' end block call state % split_both(state % ub(:, i), state % fp(:, i), state % fm(:, i), & state % cfg % gam) end do if (state % cfg % do_timing) call timer_stop(state % perf % fluxsplit) ! Loop over all cell faces to compute the numerical flux. if (state % cfg % do_timing) call timer_start(state % perf % faceloop) face_loop: do iface = 1, state % n_pt + 1 ! Compute averaged state at the interface for the eigensystem. ! Using arithmetic average: Q_avg = 0.5*(Q_L + Q_R). ! See course notes, Sec. "Generalization to Systems", Step 1. if (iface == 1) then if (state % is_periodic) then ! Periodic left ghost is the last unique interior point. q_avg = 0.5_wp * (state % ub(:, state % n_pt - 1) + state % ub(:, 1)) else q_avg = 0.5_wp * (state % q_left + state % ub(:, 1)) end if else if (iface == state % n_pt + 1) then if (state % is_periodic) then ! Periodic right ghost is the second grid point. ! ub(:,n_pt) = ub(:,1) for periodic, so this = 0.5*(ub(:,1)+ub(:,2)). q_avg = 0.5_wp * (state % ub(:, state % n_pt) + state % ub(:, 2)) else q_avg = 0.5_wp * (state % ub(:, state % n_pt) + state % q_right) end if else q_avg = 0.5_wp * (state % ub(:, iface - 1) + state % ub(:, iface)) end if ! Step 2: Compute eigenvector matrices at this interface. call compute_eigensystem(q_avg, r_mat, r_inv, state % cfg % gam) ! Hybrid sensor: classify face as smooth or non-smooth using Q (ub), ! not the split-flux stencil, so the same sensor logic applies to ! both the FVS and FDS paths. No q_stencil is available here, so ! weno_beta falls back to density_gradient automatically inside ! eval_face_sensor when the stencil argument has fewer than 5 columns. if (state % cfg % use_hybrid_recon) then sensor_val = eval_face_sensor(state, iface, state % ub(:, 1:0)) is_smooth = (sensor_val <= state % cfg % hybrid_sensor_threshold) else is_smooth = .false. end if ! --- Positive flux F^+: left-biased stencil --- ! Left-biased reconstruction stencil selected by the active scheme ! metadata. For example: WENO5 starts at iface-3 (width 5), ! WENO-CU6 starts at iface-3 (width 6), WENO11 starts at iface-6. call fill_stencil(state % fp, state % n_pt, iface + state % stencil_start_offset, 1, & & state % fl_pos, state % fr_pos, state % is_periodic, f_stencil(:, 1:stencil_width)) if (is_smooth) then call reconstruct_with_proj(state % smooth_reconstruct, f_stencil(:, 1:stencil_width), & & r_mat, r_inv, state % use_char_proj, flux_pos) else call reconstruct_with_proj(state % reconstruct, f_stencil(:, 1:stencil_width), & & r_mat, r_inv, state % use_char_proj, flux_pos) end if ! --- Negative flux F^-: right-biased (mirror) stencil --- ! Stencil is assembled in reverse order for the right-biased ! reconstruction, so that the same routine can be reused. ! Points: iface - stencil_start_offset - 1 down to iface + stencil_start_offset, ! assembled in reverse order so that the same reconstruction routine can be reused. call fill_stencil(state % fm, state % n_pt, iface - state % stencil_start_offset - 1, -1, & & state % fl_neg, state % fr_neg, state % is_periodic, f_stencil(:, 1:stencil_width)) if (is_smooth) then call reconstruct_with_proj(state % smooth_reconstruct, f_stencil(:, 1:stencil_width), & & r_mat, r_inv, state % use_char_proj, flux_neg) else call reconstruct_with_proj(state % reconstruct, f_stencil(:, 1:stencil_width), & & r_mat, r_inv, state % use_char_proj, flux_neg) end if ! Total numerical flux at this face. state % num_flux(:, iface) = flux_pos + flux_neg end do face_loop if (state % cfg % do_timing) call timer_stop(state % perf % faceloop) end if ! Compute the residual: R_i = -1/dx * (F_{i+1/2} - F_{i-1/2}). ! See course notes, Eq. (28). do i = 1, state % n_pt state % resid(:, i) = -(state % num_flux(:, i + 1) - state % num_flux(:, i)) / state % dx end do if (state % cfg % do_timing) call timer_stop(state % perf % resid) end subroutine compute_resid ! --------------------------------------------------------------------------- !> Fill a reconstruction stencil from `field`, applying ghost states at !! domain boundaries. !! !! Iterates over stencil positions i = 1 .. size(stencil, 2) and maps each !! to grid index pt_idx = pt_start + (i-1)*pt_step. Out-of-bounds indices !! are handled as follows: !! - Periodic domain: wrap into [1, n_pts-1] via modulo arithmetic. !! - Non-periodic, low side (pt_idx < 1): use ghost_lo. !! - Non-periodic, high side (pt_idx > n_pts): use ghost_hi. !! !! Use pt_step = +1 for a left-biased stencil (F^+, Q_L reconstruction). !! Use pt_step = -1 for a right-biased (mirror) stencil (F^-, Q_R). !! !! @param field Source data array (neq x n_pts). !! @param n_pts Number of active grid points. !! @param pt_start Grid index for stencil position i = 1. !! @param pt_step Index stride per position: +1 (left-biased) or -1 (right-biased). !! @param ghost_lo Ghost state applied when pt_idx < 1. !! @param ghost_hi Ghost state applied when pt_idx > n_pts. !! @param periodic If .true., wrap out-of-bounds indices periodically. !! @param stencil Output stencil (neq x stencil_width), overwritten. ! --------------------------------------------------------------------------- pure subroutine fill_stencil(field, n_pts, pt_start, pt_step, & ghost_lo, ghost_hi, periodic, stencil) real(wp), intent(in) :: field(:, :), ghost_lo(:), ghost_hi(:) integer, intent(in) :: n_pts, pt_start, pt_step logical, intent(in) :: periodic real(wp), intent(out) :: stencil(:, :) integer :: i, pt_idx do i = 1, size(stencil, 2) pt_idx = pt_start + (i - 1) * pt_step if (pt_idx < 1 .or. pt_idx > n_pts) then if (periodic) then stencil(:, i) = field(:, modulo(pt_idx - 1, n_pts - 1) + 1) else if (pt_idx < 1) then stencil(:, i) = ghost_lo else stencil(:, i) = ghost_hi end if else stencil(:, i) = field(:, pt_idx) end if end do end subroutine fill_stencil ! --------------------------------------------------------------------------- !> Reconstruct a face flux or state with optional characteristic projection. !! !! If proj is .true.: !! 1. Transform each stencil column to characteristic space: K^{-1} * f. !! 2. Reconstruct the face value in characteristic space. !! 3. Map back to physical space: K * flux_out. !! If proj is .false.: !! Call recon directly on the physical-space stencil. !! !! See course notes, Sec. "Generalization to Systems", Steps 3-4. !! !! @param recon Active reconstruction procedure pointer. !! @param stencil Input flux or state stencil (neq x stencil_width). !! @param r_mat Right eigenvector matrix K (neq x neq). !! @param r_inv Inverse eigenvector matrix K^{-1} (neq x neq). !! @param proj Enable characteristic-space projection. !! @param flux_out Reconstructed face value (neq), overwritten. ! --------------------------------------------------------------------------- subroutine reconstruct_with_proj(recon, stencil, r_mat, r_inv, proj, flux_out) use solver_interfaces, only: reconstructor_iface procedure(reconstructor_iface) :: recon real(wp), intent(in) :: stencil(:, :), r_mat(:, :), r_inv(:, :) logical, intent(in) :: proj real(wp), intent(out) :: flux_out(:) real(wp) :: proj_stencil(size(stencil, 1), size(stencil, 2)) real(wp) :: tmp1, tmp2, tmp3 integer :: k if (proj) then ! Hand-unrolled 3x3 matrix-vector products to avoid runtime library dispatch ! (matmul on assumed-shape arrays generates a _gfortran_matmul_r8 call even at ! -O3 because the rank is not known inside the subroutine; with BLAS linked the ! compiler may additionally dispatch to dgemm, which has non-trivial startup cost ! for a 3x3 kernel). The unrolled form exposes 9 FMAs per column for the ! compiler's vectorizer. do k = 1, size(stencil, 2) proj_stencil(1, k) = r_inv(1, 1) * stencil(1, k) + r_inv(1, 2) * stencil(2, k) & + r_inv(1, 3) * stencil(3, k) proj_stencil(2, k) = r_inv(2, 1) * stencil(1, k) + r_inv(2, 2) * stencil(2, k) & + r_inv(2, 3) * stencil(3, k) proj_stencil(3, k) = r_inv(3, 1) * stencil(1, k) + r_inv(3, 2) * stencil(2, k) & + r_inv(3, 3) * stencil(3, k) end do call recon(proj_stencil, flux_out) ! Back-project: flux_out = r_mat * flux_out (3x3 matmul, hand-unrolled) tmp1 = r_mat(1, 1) * flux_out(1) + r_mat(1, 2) * flux_out(2) + r_mat(1, 3) * flux_out(3) tmp2 = r_mat(2, 1) * flux_out(1) + r_mat(2, 2) * flux_out(2) + r_mat(2, 3) * flux_out(3) tmp3 = r_mat(3, 1) * flux_out(1) + r_mat(3, 2) * flux_out(2) + r_mat(3, 3) * flux_out(3) flux_out(1) = tmp1 flux_out(2) = tmp2 flux_out(3) = tmp3 else call recon(stencil, flux_out) end if end subroutine reconstruct_with_proj ! --------------------------------------------------------------------------- !> Evaluate the hybrid shock sensor for face iface. !! !! Dispatches to the sensor type set in state%cfg%hybrid_sensor: !! 'jameson' — Jameson-Schmidt-Turkel pressure second-derivative !! sensor (JST 1981, AIAA Paper 81-1259). !! 'density_gradient' — Normalised density jump across the face. !! 'weno_beta' — WENO5 smoothness-indicator ratio on density !! (Jiang & Shu 1996, Eq. 24-26); falls back to !! density_gradient for non-5-point schemes or when !! q_stencil has < 5 columns. !! !! @param state Solver state (solution arrays and config). !! @param iface Face index in [1, n_pt+1]. !! @param q_stencil Assembled Q stencil (neq x ≥5) for weno_beta sensor; !! pass a zero-column slice for the FVS path. !! @return Non-negative sensor value; larger = more non-smooth. ! --------------------------------------------------------------------------- real(wp) function eval_face_sensor(state, iface, q_stencil) result(s) use solver_state, only: solver_state_t, neq use weno_family, only: weno5_smoothness, eps_weno type(solver_state_t), intent(in) :: state integer, intent(in) :: iface real(wp), intent(in) :: q_stencil(:, :) real(wp) :: q_m2(neq), q_m1(neq), q_p1(neq), q_p2(neq) real(wp) :: p_m2, p_m1, p_p1, p_p2, gm1 real(wp) :: rho_stencil(1, 5), beta0(1), beta1(1), beta2(1) q_m1 = get_q_at_face(state, iface - 1) q_p1 = get_q_at_face(state, iface) select case (trim(state % cfg % hybrid_sensor)) case (hybrid_sensor_jameson) q_m2 = get_q_at_face(state, iface - 2) q_p2 = get_q_at_face(state, iface + 1) gm1 = state % cfg % gam - 1.0_wp p_m2 = gm1 * (q_m2(3) - 0.5_wp * q_m2(2)**2 / q_m2(1)) p_m1 = gm1 * (q_m1(3) - 0.5_wp * q_m1(2)**2 / q_m1(1)) p_p1 = gm1 * (q_p1(3) - 0.5_wp * q_p1(2)**2 / q_p1(1)) p_p2 = gm1 * (q_p2(3) - 0.5_wp * q_p2(2)**2 / q_p2(1)) s = (abs(p_m2 - 2.0_wp * p_m1 + p_p1) + abs(p_m1 - 2.0_wp * p_p1 + p_p2)) & / (abs(p_m2) + 2.0_wp * abs(p_m1) + 2.0_wp * abs(p_p1) + abs(p_p2) & + tiny(1.0_wp)) case (hybrid_sensor_density_gradient) s = abs(q_p1(1) - q_m1(1)) & / (0.5_wp * (abs(q_p1(1)) + abs(q_m1(1))) + tiny(1.0_wp)) case (hybrid_sensor_weno_beta) if (state % stencil_width == 5 .and. size(q_stencil, 2) >= 5) then ! WENO5 smoothness-indicator ratio on density (equation 1). ! Large value indicates a discontinuity in the reconstruction stencil. rho_stencil(1, :) = q_stencil(1, 1:5) call weno5_smoothness(rho_stencil, beta0, beta1, beta2) s = max(beta0(1), beta1(1), beta2(1)) & / (min(beta0(1), beta1(1), beta2(1)) + eps_weno) else ! No Q stencil (FVS path) — fall back to density_gradient. s = abs(q_p1(1) - q_m1(1)) & / (0.5_wp * (abs(q_p1(1)) + abs(q_m1(1))) + tiny(1.0_wp)) end if case default s = 0.0_wp ! unreachable: config validation guards this path end select end function eval_face_sensor ! --------------------------------------------------------------------------- !> Return Q at grid cell idx with ghost-state fallback at boundaries. !! !! Mirrors the boundary-aware averaging in the FVS face loop (lines ~124-141). !! !! @param state Solver state. !! @param idx Cell index (may lie outside [1, n_pt]). !! @return Q(neq) for that cell, or the appropriate ghost state. ! --------------------------------------------------------------------------- function get_q_at_face(state, idx) result(q) use solver_state, only: solver_state_t, neq type(solver_state_t), intent(in) :: state integer, intent(in) :: idx real(wp) :: q(neq) integer :: wrapped if (idx < 1) then if (state % is_periodic) then wrapped = modulo(idx - 1, state % n_pt - 1) + 1 q = state % ub(:, wrapped) else q = state % q_left end if else if (idx > state % n_pt) then if (state % is_periodic) then wrapped = modulo(idx - 1, state % n_pt - 1) + 1 q = state % ub(:, wrapped) else q = state % q_right end if else q = state % ub(:, idx) end if end function get_q_at_face end module spatial_discretization