spatial_discretization.f90 Source File


Source Code

!> @file spatial_discretization.f90
!> @brief Spatial residual computation via flux splitting and reconstruction.

module spatial_discretization
  use precision, only: wp
  use option_registry, only: hybrid_sensor_jameson, hybrid_sensor_density_gradient, &
                             hybrid_sensor_weno_beta
  implicit none
  private
  public :: compute_resid

contains

  ! ---------------------------------------------------------------------------
  !> Compute the spatial residual R(Q) = -1/dx * (F_{i+1/2} - F_{i-1/2}).
  !!
  !! Two code paths are supported, selected by state%use_fds:
  !!
  !! **FVS path** (Lax-Friedrichs, Steger-Warming, van Leer):
  !!   1. Computes F^+(Q_j) and F^-(Q_j) at all grid points.
  !!   2. Averages adjacent states to get K and K^{-1} at the interface.
  !!   3. Assembles the stencil for both F^+ (left-biased)
  !!      and F^- (right-biased), applying ghost states at boundaries.
  !!   4. Calls the active reconstruction scheme for each split flux.
  !!   5. Sums the contributions: F_hat = F_hat^+ + F_hat^-.
  !!
  !! **FDS path** (AUSM+, HLL, HLLC, Roe):
  !!   1. Reconstructs Q_L and Q_R at each face from cell Q values.
  !!   2. Calls the active FDS solver: F_hat = fds_solver(Q_L, Q_R, gam).
  !!
  !! See course notes, Sec. "Finite Difference WENO Scheme", Eq. (28)-(30).
  !!
  !! @param[inout] state  Solver instance state.
  ! ---------------------------------------------------------------------------
  subroutine compute_resid(state)
    use solver_state, only: solver_state_t, neq
    use euler_physics, only: compute_eigensystem
    use reconstruction, only: max_stencil_width
    use boundary_conditions, only: apply_bcs
    use positivity_limiter, only: limit_positivity
    use timer, only: timer_start, timer_stop
    type(solver_state_t), intent(inout) :: state

    integer :: i, iface
    real(wp) :: flux_pos(neq), flux_neg(neq)
    real(wp) :: r_mat(neq, neq), r_inv(neq, neq), q_avg(neq)
    real(wp) :: f_stencil(neq, max_stencil_width)
    real(wp) :: q_stencil(neq, max_stencil_width)
    real(wp) :: q_face_L(neq), q_face_R(neq)
    integer :: stencil_width
    real(wp) :: sensor_val
    logical :: is_smooth

    stencil_width = state % stencil_width

    if (state % cfg % do_timing) call timer_start(state % perf % resid)

    ! Update ghost-state vectors from the current solution and BC types.
    call apply_bcs(state)

    if (state % use_fds) then

      ! -----------------------------------------------------------------------
      ! FDS path (AUSM+, HLL, HLLC, Roe): reconstruct Q_L and Q_R at each face.
      ! -----------------------------------------------------------------------
      if (state % cfg % do_timing) call timer_start(state % perf % faceloop)
      face_loop_ausm: do iface = 1, state % n_pt + 1

        ! Left-biased stencil of Q for Q_L (same index pattern as F^+ stencil).
        call fill_stencil(state % ub, state % n_pt, iface + state % stencil_start_offset, 1, &
            & state % q_left, state % q_right, state % is_periodic, q_stencil(:, 1:stencil_width))

        ! Hybrid sensor: classify face as smooth or non-smooth.
        ! Computed once per face; applied to both Q_L and Q_R reconstructions.
        if (state % cfg % use_hybrid_recon) then
          sensor_val = eval_face_sensor(state, iface, q_stencil(:, 1:stencil_width))
          is_smooth = (sensor_val <= state % cfg % hybrid_sensor_threshold)
        else
          is_smooth = .false.
        end if

        if (is_smooth) then
          call state % smooth_reconstruct(q_stencil(:, 1:stencil_width), q_face_L)
        else
          call state % reconstruct(q_stencil(:, 1:stencil_width), q_face_L)
        end if
        ! Zhang-Shu limiter: scale Q_L toward cell average to ensure ρ,p ≥ ε.
        ! Skipped at the left boundary (iface=1) where q_face_L comes from the
        ! ghost state set explicitly by boundary conditions.
        if (state % cfg % use_positivity_limiter .and. iface > 1) &
          call limit_positivity(q_face_L, state % ub(:, iface - 1), state % cfg % gam)

        ! Right-biased (reversed) stencil of Q for Q_R.
        call fill_stencil(state % ub, state % n_pt, iface - state % stencil_start_offset - 1, -1, &
            & state % q_left, state % q_right, state % is_periodic, q_stencil(:, 1:stencil_width))
        if (is_smooth) then
          call state % smooth_reconstruct(q_stencil(:, 1:stencil_width), q_face_R)
        else
          call state % reconstruct(q_stencil(:, 1:stencil_width), q_face_R)
        end if
        ! Zhang-Shu limiter for Q_R; skipped at the right boundary (iface=n_pt+1).
        if (state % cfg % use_positivity_limiter .and. iface <= state % n_pt) &
          call limit_positivity(q_face_R, state % ub(:, iface), state % cfg % gam)

        call state % fds_solver(q_face_L, q_face_R, state % num_flux(:, iface), state % cfg % gam)

      end do face_loop_ausm
      if (state % cfg % do_timing) call timer_stop(state % perf % faceloop)

    else

      ! -----------------------------------------------------------------------
      ! FVS path: split fluxes at cells, reconstruct, sum at faces.
      ! -----------------------------------------------------------------------

      ! Compute split fluxes F^+ and F^- at every grid point.
      ! Use the fused split_both routine (one primitive extraction per cell)
      ! rather than two separate flux_split calls.
      if (state % cfg % do_timing) call timer_start(state % perf % fluxsplit)
      do i = 1, state % n_pt
        ! Guard: FVS split routines call sqrt(γp/ρ); detect a vacuum or
        ! non-physical pressure before that to produce a clear error rather
        ! than propagating NaN through the residual.
        block
          real(wp) :: rho_i, p_i
          rho_i = state % ub(1, i)
          if (rho_i <= 0.0_wp) &
            error stop 'compute_resid: non-positive density in FVS precompute'
          p_i = (state % ub(3, i) - 0.5_wp * state % ub(2, i)**2 / rho_i) &
                * (state % cfg % gam - 1.0_wp)
          if (p_i <= 0.0_wp) &
            error stop 'compute_resid: non-positive pressure in FVS precompute'
        end block
        call state % split_both(state % ub(:, i), state % fp(:, i), state % fm(:, i), &
                                state % cfg % gam)
      end do
      if (state % cfg % do_timing) call timer_stop(state % perf % fluxsplit)

      ! Loop over all cell faces to compute the numerical flux.
      if (state % cfg % do_timing) call timer_start(state % perf % faceloop)
      face_loop: do iface = 1, state % n_pt + 1

        ! Compute averaged state at the interface for the eigensystem.
        ! Using arithmetic average: Q_avg = 0.5*(Q_L + Q_R).
        ! See course notes, Sec. "Generalization to Systems", Step 1.
        if (iface == 1) then
          if (state % is_periodic) then
            ! Periodic left ghost is the last unique interior point.
            q_avg = 0.5_wp * (state % ub(:, state % n_pt - 1) + state % ub(:, 1))
          else
            q_avg = 0.5_wp * (state % q_left + state % ub(:, 1))
          end if
        else if (iface == state % n_pt + 1) then
          if (state % is_periodic) then
            ! Periodic right ghost is the second grid point.
            ! ub(:,n_pt) = ub(:,1) for periodic, so this = 0.5*(ub(:,1)+ub(:,2)).
            q_avg = 0.5_wp * (state % ub(:, state % n_pt) + state % ub(:, 2))
          else
            q_avg = 0.5_wp * (state % ub(:, state % n_pt) + state % q_right)
          end if
        else
          q_avg = 0.5_wp * (state % ub(:, iface - 1) + state % ub(:, iface))
        end if

        ! Step 2: Compute eigenvector matrices at this interface.
        call compute_eigensystem(q_avg, r_mat, r_inv, state % cfg % gam)

        ! Hybrid sensor: classify face as smooth or non-smooth using Q (ub),
        ! not the split-flux stencil, so the same sensor logic applies to
        ! both the FVS and FDS paths.  No q_stencil is available here, so
        ! weno_beta falls back to density_gradient automatically inside
        ! eval_face_sensor when the stencil argument has fewer than 5 columns.
        if (state % cfg % use_hybrid_recon) then
          sensor_val = eval_face_sensor(state, iface, state % ub(:, 1:0))
          is_smooth = (sensor_val <= state % cfg % hybrid_sensor_threshold)
        else
          is_smooth = .false.
        end if

        ! --- Positive flux F^+: left-biased stencil ---
        ! Left-biased reconstruction stencil selected by the active scheme
        ! metadata.  For example: WENO5 starts at iface-3 (width 5),
        ! WENO-CU6 starts at iface-3 (width 6), WENO11 starts at iface-6.
        call fill_stencil(state % fp, state % n_pt, iface + state % stencil_start_offset, 1, &
            & state % fl_pos, state % fr_pos, state % is_periodic, f_stencil(:, 1:stencil_width))
        if (is_smooth) then
          call reconstruct_with_proj(state % smooth_reconstruct, f_stencil(:, 1:stencil_width), &
              & r_mat, r_inv, state % use_char_proj, flux_pos)
        else
          call reconstruct_with_proj(state % reconstruct, f_stencil(:, 1:stencil_width), &
              & r_mat, r_inv, state % use_char_proj, flux_pos)
        end if

        ! --- Negative flux F^-: right-biased (mirror) stencil ---
        ! Stencil is assembled in reverse order for the right-biased
        ! reconstruction, so that the same routine can be reused.
        ! Points: iface - stencil_start_offset - 1 down to iface + stencil_start_offset,
        ! assembled in reverse order so that the same reconstruction routine can be reused.
        call fill_stencil(state % fm, state % n_pt, iface - state % stencil_start_offset - 1, -1, &
            & state % fl_neg, state % fr_neg, state % is_periodic, f_stencil(:, 1:stencil_width))
        if (is_smooth) then
          call reconstruct_with_proj(state % smooth_reconstruct, f_stencil(:, 1:stencil_width), &
              & r_mat, r_inv, state % use_char_proj, flux_neg)
        else
          call reconstruct_with_proj(state % reconstruct, f_stencil(:, 1:stencil_width), &
              & r_mat, r_inv, state % use_char_proj, flux_neg)
        end if

        ! Total numerical flux at this face.
        state % num_flux(:, iface) = flux_pos + flux_neg

      end do face_loop
      if (state % cfg % do_timing) call timer_stop(state % perf % faceloop)

    end if

    ! Compute the residual: R_i = -1/dx * (F_{i+1/2} - F_{i-1/2}).
    ! See course notes, Eq. (28).
    do i = 1, state % n_pt
      state % resid(:, i) = -(state % num_flux(:, i + 1) - state % num_flux(:, i)) / state % dx
    end do

    if (state % cfg % do_timing) call timer_stop(state % perf % resid)
  end subroutine compute_resid

  ! ---------------------------------------------------------------------------
  !> Fill a reconstruction stencil from `field`, applying ghost states at
  !! domain boundaries.
  !!
  !! Iterates over stencil positions i = 1 .. size(stencil, 2) and maps each
  !! to grid index pt_idx = pt_start + (i-1)*pt_step.  Out-of-bounds indices
  !! are handled as follows:
  !!   - Periodic domain: wrap into [1, n_pts-1] via modulo arithmetic.
  !!   - Non-periodic, low side (pt_idx < 1): use ghost_lo.
  !!   - Non-periodic, high side (pt_idx > n_pts): use ghost_hi.
  !!
  !! Use pt_step = +1 for a left-biased stencil (F^+, Q_L reconstruction).
  !! Use pt_step = -1 for a right-biased (mirror) stencil (F^-, Q_R).
  !!
  !! @param field    Source data array (neq x n_pts).
  !! @param n_pts    Number of active grid points.
  !! @param pt_start Grid index for stencil position i = 1.
  !! @param pt_step  Index stride per position: +1 (left-biased) or -1 (right-biased).
  !! @param ghost_lo Ghost state applied when pt_idx < 1.
  !! @param ghost_hi Ghost state applied when pt_idx > n_pts.
  !! @param periodic If .true., wrap out-of-bounds indices periodically.
  !! @param stencil  Output stencil (neq x stencil_width), overwritten.
  ! ---------------------------------------------------------------------------
  pure subroutine fill_stencil(field, n_pts, pt_start, pt_step, &
                               ghost_lo, ghost_hi, periodic, stencil)
    real(wp), intent(in) :: field(:, :), ghost_lo(:), ghost_hi(:)
    integer, intent(in) :: n_pts, pt_start, pt_step
    logical, intent(in) :: periodic
    real(wp), intent(out) :: stencil(:, :)

    integer :: i, pt_idx

    do i = 1, size(stencil, 2)
      pt_idx = pt_start + (i - 1) * pt_step
      if (pt_idx < 1 .or. pt_idx > n_pts) then
        if (periodic) then
          stencil(:, i) = field(:, modulo(pt_idx - 1, n_pts - 1) + 1)
        else if (pt_idx < 1) then
          stencil(:, i) = ghost_lo
        else
          stencil(:, i) = ghost_hi
        end if
      else
        stencil(:, i) = field(:, pt_idx)
      end if
    end do
  end subroutine fill_stencil

  ! ---------------------------------------------------------------------------
  !> Reconstruct a face flux or state with optional characteristic projection.
  !!
  !! If proj is .true.:
  !!   1. Transform each stencil column to characteristic space: K^{-1} * f.
  !!   2. Reconstruct the face value in characteristic space.
  !!   3. Map back to physical space: K * flux_out.
  !! If proj is .false.:
  !!   Call recon directly on the physical-space stencil.
  !!
  !! See course notes, Sec. "Generalization to Systems", Steps 3-4.
  !!
  !! @param recon    Active reconstruction procedure pointer.
  !! @param stencil  Input flux or state stencil (neq x stencil_width).
  !! @param r_mat    Right eigenvector matrix K (neq x neq).
  !! @param r_inv    Inverse eigenvector matrix K^{-1} (neq x neq).
  !! @param proj     Enable characteristic-space projection.
  !! @param flux_out Reconstructed face value (neq), overwritten.
  ! ---------------------------------------------------------------------------
  subroutine reconstruct_with_proj(recon, stencil, r_mat, r_inv, proj, flux_out)
    use solver_interfaces, only: reconstructor_iface
    procedure(reconstructor_iface) :: recon
    real(wp), intent(in) :: stencil(:, :), r_mat(:, :), r_inv(:, :)
    logical, intent(in) :: proj
    real(wp), intent(out) :: flux_out(:)

    real(wp) :: proj_stencil(size(stencil, 1), size(stencil, 2))
    real(wp) :: tmp1, tmp2, tmp3
    integer :: k

    if (proj) then
      ! Hand-unrolled 3x3 matrix-vector products to avoid runtime library dispatch
      ! (matmul on assumed-shape arrays generates a _gfortran_matmul_r8 call even at
      ! -O3 because the rank is not known inside the subroutine; with BLAS linked the
      ! compiler may additionally dispatch to dgemm, which has non-trivial startup cost
      ! for a 3x3 kernel).  The unrolled form exposes 9 FMAs per column for the
      ! compiler's vectorizer.
      do k = 1, size(stencil, 2)
        proj_stencil(1, k) = r_inv(1, 1) * stencil(1, k) + r_inv(1, 2) * stencil(2, k) &
                             + r_inv(1, 3) * stencil(3, k)
        proj_stencil(2, k) = r_inv(2, 1) * stencil(1, k) + r_inv(2, 2) * stencil(2, k) &
                             + r_inv(2, 3) * stencil(3, k)
        proj_stencil(3, k) = r_inv(3, 1) * stencil(1, k) + r_inv(3, 2) * stencil(2, k) &
                             + r_inv(3, 3) * stencil(3, k)
      end do
      call recon(proj_stencil, flux_out)
      ! Back-project: flux_out = r_mat * flux_out  (3x3 matmul, hand-unrolled)
      tmp1 = r_mat(1, 1) * flux_out(1) + r_mat(1, 2) * flux_out(2) + r_mat(1, 3) * flux_out(3)
      tmp2 = r_mat(2, 1) * flux_out(1) + r_mat(2, 2) * flux_out(2) + r_mat(2, 3) * flux_out(3)
      tmp3 = r_mat(3, 1) * flux_out(1) + r_mat(3, 2) * flux_out(2) + r_mat(3, 3) * flux_out(3)
      flux_out(1) = tmp1
      flux_out(2) = tmp2
      flux_out(3) = tmp3
    else
      call recon(stencil, flux_out)
    end if
  end subroutine reconstruct_with_proj

  ! ---------------------------------------------------------------------------
  !> Evaluate the hybrid shock sensor for face iface.
  !!
  !! Dispatches to the sensor type set in state%cfg%hybrid_sensor:
  !!   'jameson'          — Jameson-Schmidt-Turkel pressure second-derivative
  !!                        sensor (JST 1981, AIAA Paper 81-1259).
  !!   'density_gradient' — Normalised density jump across the face.
  !!   'weno_beta'        — WENO5 smoothness-indicator ratio on density
  !!                        (Jiang & Shu 1996, Eq. 24-26); falls back to
  !!                        density_gradient for non-5-point schemes or when
  !!                        q_stencil has < 5 columns.
  !!
  !! @param state     Solver state (solution arrays and config).
  !! @param iface     Face index in [1, n_pt+1].
  !! @param q_stencil Assembled Q stencil (neq x ≥5) for weno_beta sensor;
  !!                  pass a zero-column slice for the FVS path.
  !! @return          Non-negative sensor value; larger = more non-smooth.
  ! ---------------------------------------------------------------------------
  real(wp) function eval_face_sensor(state, iface, q_stencil) result(s)
    use solver_state, only: solver_state_t, neq
    use weno_family, only: weno5_smoothness, eps_weno
    type(solver_state_t), intent(in) :: state
    integer, intent(in) :: iface
    real(wp), intent(in) :: q_stencil(:, :)

    real(wp) :: q_m2(neq), q_m1(neq), q_p1(neq), q_p2(neq)
    real(wp) :: p_m2, p_m1, p_p1, p_p2, gm1
    real(wp) :: rho_stencil(1, 5), beta0(1), beta1(1), beta2(1)

    q_m1 = get_q_at_face(state, iface - 1)
    q_p1 = get_q_at_face(state, iface)

    select case (trim(state % cfg % hybrid_sensor))

    case (hybrid_sensor_jameson)
      q_m2 = get_q_at_face(state, iface - 2)
      q_p2 = get_q_at_face(state, iface + 1)
      gm1 = state % cfg % gam - 1.0_wp
      p_m2 = gm1 * (q_m2(3) - 0.5_wp * q_m2(2)**2 / q_m2(1))
      p_m1 = gm1 * (q_m1(3) - 0.5_wp * q_m1(2)**2 / q_m1(1))
      p_p1 = gm1 * (q_p1(3) - 0.5_wp * q_p1(2)**2 / q_p1(1))
      p_p2 = gm1 * (q_p2(3) - 0.5_wp * q_p2(2)**2 / q_p2(1))
      s = (abs(p_m2 - 2.0_wp * p_m1 + p_p1) + abs(p_m1 - 2.0_wp * p_p1 + p_p2)) &
          / (abs(p_m2) + 2.0_wp * abs(p_m1) + 2.0_wp * abs(p_p1) + abs(p_p2) &
             + tiny(1.0_wp))

    case (hybrid_sensor_density_gradient)
      s = abs(q_p1(1) - q_m1(1)) &
          / (0.5_wp * (abs(q_p1(1)) + abs(q_m1(1))) + tiny(1.0_wp))

    case (hybrid_sensor_weno_beta)
      if (state % stencil_width == 5 .and. size(q_stencil, 2) >= 5) then
        ! WENO5 smoothness-indicator ratio on density (equation 1).
        ! Large value indicates a discontinuity in the reconstruction stencil.
        rho_stencil(1, :) = q_stencil(1, 1:5)
        call weno5_smoothness(rho_stencil, beta0, beta1, beta2)
        s = max(beta0(1), beta1(1), beta2(1)) &
            / (min(beta0(1), beta1(1), beta2(1)) + eps_weno)
      else
        ! No Q stencil (FVS path) — fall back to density_gradient.
        s = abs(q_p1(1) - q_m1(1)) &
            / (0.5_wp * (abs(q_p1(1)) + abs(q_m1(1))) + tiny(1.0_wp))
      end if

    case default
      s = 0.0_wp  ! unreachable: config validation guards this path

    end select
  end function eval_face_sensor

  ! ---------------------------------------------------------------------------
  !> Return Q at grid cell idx with ghost-state fallback at boundaries.
  !!
  !! Mirrors the boundary-aware averaging in the FVS face loop (lines ~124-141).
  !!
  !! @param state  Solver state.
  !! @param idx    Cell index (may lie outside [1, n_pt]).
  !! @return       Q(neq) for that cell, or the appropriate ghost state.
  ! ---------------------------------------------------------------------------
  function get_q_at_face(state, idx) result(q)
    use solver_state, only: solver_state_t, neq
    type(solver_state_t), intent(in) :: state
    integer, intent(in) :: idx
    real(wp) :: q(neq)

    integer :: wrapped

    if (idx < 1) then
      if (state % is_periodic) then
        wrapped = modulo(idx - 1, state % n_pt - 1) + 1
        q = state % ub(:, wrapped)
      else
        q = state % q_left
      end if
    else if (idx > state % n_pt) then
      if (state % is_periodic) then
        wrapped = modulo(idx - 1, state % n_pt - 1) + 1
        q = state % ub(:, wrapped)
      else
        q = state % q_right
      end if
    else
      q = state % ub(:, idx)
    end if
  end function get_q_at_face

end module spatial_discretization