weno7_js.f90 Source File


Source Code

!> @file weno7_js.f90
!> @brief WENO7-JS (Balsara & Shu 2000) reconstruction scheme.
!!
!! 7th-order weighted essentially non-oscillatory reconstruction on a 7-point
!! stencil.  Optimal weights d7_0..d7_3 are shared via weno_family; substencil
!! reconstructions and smoothness indicators are specific to WENO7.
!!
!! References:
!!   [1] D. S. Balsara and C.-W. Shu, "Monotonicity Preserving Weighted
!!       Essentially Non-oscillatory Schemes with Increasingly High Order of
!!       Accuracy," J. Comput. Phys., 160:405-452, 2000.

module weno7_js
  use precision, only: wp
  use solver_state, only: neq
  use weno_family, only: eps_weno, d7_0, d7_1, d7_2, d7_3
  implicit none
  private

  public :: weno7_js_reconstruct

contains

  ! ---------------------------------------------------------------------------
  !> Perform WENO7-JS reconstruction on the supplied stencil.
  !!
  !! Applies WENO7 reconstruction component-wise to a 7-point stencil.
  !!
  !! Stencil indices 1..7 = {v_{i-3}, ..., v_{i+3}}, centre at index 4.
  !!
  !! Substencil approximations (Balsara & Shu, 2000):
  !!   q0 = -(1/4)*v1  + (13/12)*v2 - (23/12)*v3 + (25/12)*v4
  !!   q1 =  (1/12)*v2 -  (5/12)*v3 + (13/12)*v4  + (1/4)*v5
  !!   q2 = -(1/12)*v3 +  (7/12)*v4 +  (7/12)*v5 - (1/12)*v6
  !!   q3 =   (1/4)*v4 + (13/12)*v5 -  (5/12)*v6  + (1/12)*v7
  !!
  !! @param[in]  f_stencil  Stencil at 7 points (neq x 7)
  !! @param[out] f_hat      Reconstructed value at the interface
  ! ---------------------------------------------------------------------------
  pure subroutine weno7_js_reconstruct(f_stencil, f_hat)
    real(wp), intent(in) :: f_stencil(:, :)
    real(wp), intent(out) :: f_hat(:)

    real(wp) :: q0(neq), q1(neq), q2(neq), q3(neq)  ! substencil approximations
    real(wp) :: beta0(neq), beta1(neq), beta2(neq), beta3(neq)
    real(wp) :: w0(neq), w1(neq), w2(neq), w3(neq), w_sum(neq)
    ! Local stencil values and precomputed products for CSE in smoothness indicators.
    ! Loading into scalars avoids repeated stride-1 reads from the assumed-shape array
    ! and exposes common subexpressions (v3*v3, v4*v4, v3*v4, v4*v5, v5*v5) to the
    ! compiler's register allocator.
    real(wp) :: v1, v2, v3, v4, v5, v6, v7
    real(wp) :: v2sq, v3sq, v4sq, v5sq, v6sq
    real(wp) :: v2v3, v2v4, v3v4, v3v5, v4v5, v4v6, v5v6
    integer :: i

    ! --- 4th-order substencil approximations. See Balsara & Shu (2000). ---
    q0 = -1.0_wp / 4.0_wp * f_stencil(:, 1) &
        & + 13.0_wp / 12.0_wp * f_stencil(:, 2) &
        & - 23.0_wp / 12.0_wp * f_stencil(:, 3) &
        & + 25.0_wp / 12.0_wp * f_stencil(:, 4)

    q1 = 1.0_wp / 12.0_wp * f_stencil(:, 2) &
        & - 5.0_wp / 12.0_wp * f_stencil(:, 3) &
        & + 13.0_wp / 12.0_wp * f_stencil(:, 4) &
        & + 1.0_wp / 4.0_wp * f_stencil(:, 5)

    q2 = -1.0_wp / 12.0_wp * f_stencil(:, 3) &
        & + 7.0_wp / 12.0_wp * f_stencil(:, 4) &
        & + 7.0_wp / 12.0_wp * f_stencil(:, 5) &
        & - 1.0_wp / 12.0_wp * f_stencil(:, 6)

    q3 = 1.0_wp / 4.0_wp * f_stencil(:, 4) &
        & + 13.0_wp / 12.0_wp * f_stencil(:, 5) &
        & - 5.0_wp / 12.0_wp * f_stencil(:, 6) &
        & + 1.0_wp / 12.0_wp * f_stencil(:, 7)

    ! --- Smoothness indicators. See Balsara & Shu (2000), Table 1. ---
    ! CSE: load stencil values into scalars (avoids repeated 2D array reads with
    ! non-unit first-index stride) and precompute products that appear in 2–4 betas.
    ! v4*v4 appears in all four betas; v3*v4, v4*v5 each in 3 betas; etc.
    do i = 1, neq
      v1 = f_stencil(i, 1); v2 = f_stencil(i, 2); v3 = f_stencil(i, 3)
      v4 = f_stencil(i, 4); v5 = f_stencil(i, 5); v6 = f_stencil(i, 6)
      v7 = f_stencil(i, 7)

      ! Squared values
      v2sq = v2 * v2; v3sq = v3 * v3; v4sq = v4 * v4
      v5sq = v5 * v5; v6sq = v6 * v6

      ! Cross-products shared across multiple beta expressions
      v2v3 = v2 * v3; v2v4 = v2 * v4
      v3v4 = v3 * v4; v3v5 = v3 * v5
      v4v5 = v4 * v5; v4v6 = v4 * v6
      v5v6 = v5 * v6

      beta0(i) = (547.0_wp * v1 * v1 &
          & - 3882.0_wp * v1 * v2 &
          & + 4642.0_wp * v1 * v3 &
          & - 1854.0_wp * v1 * v4 &
          & + 7043.0_wp * v2sq  &
          & - 17246.0_wp * v2v3 &
          & + 7042.0_wp * v2v4 &
          & + 11003.0_wp * v3sq &
          & - 9402.0_wp * v3v4 &
          & + 2107.0_wp * v4sq &
          & ) / 240.0_wp

      beta1(i) = (267.0_wp * v2sq &
          & - 1642.0_wp * v2v3 &
          & + 1602.0_wp * v2v4 &
          & - 494.0_wp * v2 * v5 &
          & + 2843.0_wp * v3sq &
          & - 5966.0_wp * v3v4 &
          & + 1922.0_wp * v3v5 &
          & + 3443.0_wp * v4sq &
          & - 2522.0_wp * v4v5 &
          & + 547.0_wp * v5sq &
          & ) / 240.0_wp

      beta2(i) = (547.0_wp * v3sq &
          & - 2522.0_wp * v3v4 &
          & + 1922.0_wp * v3v5 &
          & - 494.0_wp * v3 * v6 &
          & + 3443.0_wp * v4sq &
          & - 5966.0_wp * v4v5 &
          & + 1602.0_wp * v4v6 &
          & + 2843.0_wp * v5sq &
          & - 1642.0_wp * v5v6 &
          & + 267.0_wp * v6sq &
          & ) / 240.0_wp

      beta3(i) = (2107.0_wp * v4sq &
          & - 9402.0_wp * v4v5 &
          & + 7042.0_wp * v4v6 &
          & - 1854.0_wp * v4 * v7 &
          & + 11003.0_wp * v5sq &
          & - 17246.0_wp * v5v6 &
          & + 4642.0_wp * v5 * v7 &
          & + 7043.0_wp * v6sq &
          & - 3882.0_wp * v6 * v7 &
          & + 547.0_wp * v7 * v7 &
          & ) / 240.0_wp
    end do

    ! --- Nonlinear WENO weights ---
    !   omega_tilde_k = d7_k / (eps + beta_k)^2
    !   omega_k       = omega_tilde_k / sum(omega_tilde)
    do i = 1, neq
      w0(i) = d7_0 / (eps_weno + beta0(i))**2
      w1(i) = d7_1 / (eps_weno + beta1(i))**2
      w2(i) = d7_2 / (eps_weno + beta2(i))**2
      w3(i) = d7_3 / (eps_weno + beta3(i))**2
    end do

    w_sum = w0 + w1 + w2 + w3

    do i = 1, neq
      f_hat(i) = (q0(i) * w0(i) + q1(i) * w1(i) &
               & + q2(i) * w2(i) + q3(i) * w3(i)) / w_sum(i)
    end do
  end subroutine weno7_js_reconstruct

end module weno7_js