// Copyright © 2025 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT

#include "affine.bwd_dq_dk_dv_v3.h"
#include <aotriton/_internal/kernel_cluster.h>
#include <aotriton/util.h>
#include <tuple>
#include <iostream>
#include "iface.op_attn_bwd.h"

namespace AOTRITON_NS::v3::flash {

#if 1
using AOTRITON_NS::v3::flash::OpAttnBwdParams;
#endif

int64_t BwdDqDkDvV3Context::godel_number() const
{
    int64_t sum = 0;
    const auto& args = *params;
    {
        int64_t number = -1;
        if (args.Q->dtype() == DType::kFloat16) number = 0 ;
        if (args.Q->dtype() == DType::kBFloat16) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported Q, value: " << args.Q->dtype() << std::endl;
#endif
            return -1;
        }
        sum += number * 288;
    }
    {
        int64_t number = -1;
        if (args.BLOCK_DMODEL == 64) number = 0 ;
        if (args.BLOCK_DMODEL == 128) number = 1 ;
        if (args.BLOCK_DMODEL == 192) number = 2 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported BLOCK_DMODEL, value: " << +args.BLOCK_DMODEL << std::endl;
#endif
            return -1;
        }
        sum += number * 96;
    }
    {
        int64_t number = -1;
        if (args.CAUSAL_TYPE == 0) number = 0 ;
        if (args.CAUSAL_TYPE == 3) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported CAUSAL_TYPE, value: " << +args.CAUSAL_TYPE << std::endl;
#endif
            return -1;
        }
        sum += number * 48;
    }
    {
        int64_t number = -1;
        if (args.PADDED_HEAD == false) number = 0 ;
        if (args.PADDED_HEAD == true) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported PADDED_HEAD, value: " << args.PADDED_HEAD << std::endl;
#endif
            return -1;
        }
        sum += number * 24;
    }
     // Residual Choices start here
    {
        int64_t number = -1;
        if (residual_args.kIsUniformStride == false) number = 0 ;
        if (residual_args.kIsUniformStride == true) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported kIsUniformStride, value: " << residual_args.kIsUniformStride << std::endl;
#endif
            return -1;
        }
        sum += number * 12;
    }
    {
        int64_t number = -1;
        if (residual_args.MaskType == 0) number = 0 ;
        if (residual_args.MaskType == 1) number = 1 ;
        if (residual_args.MaskType == 2) number = 2 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported MaskType, value: " << +residual_args.MaskType << std::endl;
#endif
            return -1;
        }
        sum += number * 4;
    }
    {
        int64_t number = -1;
        if (residual_args.kIsSEQPad == false) number = 0 ;
        if (residual_args.kIsSEQPad == true) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported kIsSEQPad, value: " << residual_args.kIsSEQPad << std::endl;
#endif
            return -1;
        }
        sum += number * 2;
    }
    {
        int64_t number = -1;
        if (residual_args.kIsGroupMode == false) number = 0 ;
        if (residual_args.kIsGroupMode == true) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported kIsGroupMode, value: " << residual_args.kIsGroupMode << std::endl;
#endif
            return -1;
        }
        sum += number * 1;
    }

    return sum;
}

hipError_t
BwdDqDkDvV3Context::lookup_optimal(Gpu gpu) {
    auto [arch_number, mod_number] = get_archmod_number(gpu);
    if (arch_number < 0) {
        return hipErrorNoBinaryForGpu;
    }
    const char* reject_reason = check_inputs_are_supported();
    if (reject_reason) {
#ifndef NDEBUG
        std::cerr << "Unsupported inputs for backend "
                  << "BwdDqDkDvV3Context"
                  << " reason: "
                  << reject_reason
                  << std::endl;
#endif
        return hipErrorPeerAccessUnsupported;
    }
    calculate_residual_func_fields();
    kernel_on_device = nullptr;
    // Unlike Triton's autotune_table
    // Affine kernel uses entries from "capability_table", which validate if
    // input is supported.
    auto number = godel_number();
    if (number < 0) {
#ifndef NDEBUG
        std::cerr << "Unsupported inputs for backend "
                  << "BwdDqDkDvV3Context"
                  << " reason: cannot assign godel number "
                  << std::endl;
#endif
        return hipErrorPeerAccessUnsupported;
    }
    auto capability_validator = capability_table[arch_number][number];
    if (!capability_validator) {
#ifndef NDEBUG
        std::cerr << "Unsupported inputs for backend "
                  << "BwdDqDkDvV3Context"
                  << " reason: capability table has no entry for godel number "
                  << number
                  << std::endl;
#endif
        return hipErrorPeerAccessUnsupported;
    }
    // capability_validator is responsible to
    // 1. return hipErrorPeerAccessUnsupported when kernel cannot handle inputs
    //    (Usually not required, can be identified with residual choices)
    // 2. assign selected_pp_args
    // 3. assign affine_kernel_name/package_path/function_name/arch_name
    // 4. assign kernel_on_device
    return capability_validator(*this, mod_number);
}

std::tuple<int, int>
BwdDqDkDvV3Context::get_archmod_number(Gpu gpu) {
    if (gpu == GPU_AMD_ARCH_GFX950_MOD0) return { 0, 0 };
    // TODO: print warning about tuning for this GPU mod is not built.
    // Note: if some mod does not have tuning info in the database at all, the
    //       getGpuFromStream should not return that mod from beginning.
    return std::make_tuple(-1, 0);
}


hipError_t
BwdDqDkDvV3Context::launch(hipStream_t stream) const {
    DirectKernelArguments direct_args;
    auto [grid, block] = (*this.*selected_pp_args)(direct_args);
    return kernel_on_device->direct_invoke(affine_kernel_function_name,
                                           package_path,
                                           affine_kernel_function_name,
                                           arch_name,
                                           grid,
                                           block,
                                           &direct_args,
                                           sizeof_selected_args,
                                           stream);
}

namespace {

// Kernels from ALL arches go here.
AOTRITON_NS::TritonKernelCompactMeta meta_list[] = {
    { 0x2198f4ebu, 0x5aafe94eu, 0, 7 }, // 2198f4eb5aafe94e = b2sum -l 64 <<< gfx950/bwd_hd64_fp16_a32_pssk
    { 0x29308916u, 0xf17a6242u, 0, 30 }, // 29308916f17a6242 = b2sum -l 64 <<< gfx950/bwd_hd64_fp16_a32_pssk_group
    { 0x1873309eu, 0xaabccf56u, 0, 59 }, // 1873309eaabccf56 = b2sum -l 64 <<< gfx950/bwd_hd64_fp16_causal_a32_pssk
    { 0x63d3e7bfu, 0xe47a9592u, 0, 89 }, // 63d3e7bfe47a9592 = b2sum -l 64 <<< gfx950/bwd_hd64_fp16_causal_a32_pssk_group
    { 0x318058f6u, 0x476541bcu, 0, 125 }, // 318058f6476541bc = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_a32_psskddv
    { 0xbdf24f5du, 0x12b6c9e2u, 0, 152 }, // bdf24f5d12b6c9e2 = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_a32_pssk_group
    { 0x7a9c15e6u, 0xb7cbd2aau, 0, 182 }, // 7a9c15e6b7cbd2aa = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_causal_a32_psskddv
    { 0x4a3958d6u, 0x5dacdd50u, 0, 216 }, // 4a3958d65dacdd50 = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_causal_a32_pssk_group
    { 0xb8f31283u, 0xcb77599au, 0, 253 }, // b8f31283cb77599a = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_swa_a32_psskddv
    { 0x8d24a338u, 0xfe328337u, 0, 284 }, // 8d24a338fe328337 = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_a32
    { 0x49a5e4dcu, 0xf07ba44cu, 0, 303 }, // 49a5e4dcf07ba44c = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_causal_a32
    { 0x989b697cu, 0xefa975d2u, 0, 329 }, // 989b697cefa975d2 = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_a32_psskddv_group
    { 0x552aadc9u, 0xb8f7b698u, 0, 362 }, // 552aadc9b8f7b698 = b2sum -l 64 <<< gfx950/bwd_hd128_fp16_causal_a32_psskddv_group
    { 0x4f7bb884u, 0x9866e6fau, 0, 402 }, // 4f7bb8849866e6fa = b2sum -l 64 <<< gfx950/bwd_hd192_fp16_a32_psskddv
    { 0x72f49e0du, 0xc5020b47u, 0, 429 }, // 72f49e0dc5020b47 = b2sum -l 64 <<< gfx950/bwd_hd192_fp16_causal_a32_psskddv
    { 0x7a9820afu, 0x69f0a50du, 0, 463 }, // 7a9820af69f0a50d = b2sum -l 64 <<< gfx950/bwd_hd64_bf16_a32_rtne_pssk
    { 0x7b97cb5fu, 0x9ee890a3u, 0, 491 }, // 7b97cb5f9ee890a3 = b2sum -l 64 <<< gfx950/bwd_hd64_bf16_a32_rtne_pssk_group
    { 0x83e92c32u, 0x22929e19u, 0, 525 }, // 83e92c3222929e19 = b2sum -l 64 <<< gfx950/bwd_hd64_bf16_causal_a32_rtne_pssk
    { 0x03b24ef3u, 0xf8174df4u, 0, 560 }, // 03b24ef3f8174df4 = b2sum -l 64 <<< gfx950/bwd_hd64_bf16_causal_a32_rtne_pssk_group
    { 0x386fb565u, 0x9d42bafau, 0, 601 }, // 386fb5659d42bafa = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_a32_rtne_psskddv
    { 0x1401eac3u, 0x7cb080a2u, 0, 633 }, // 1401eac37cb080a2 = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_a32_rtne_pssk_group
    { 0xbf4728cau, 0xa5ae0302u, 0, 668 }, // bf4728caa5ae0302 = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_causal_a32_rtne_psskddv
    { 0xdae38842u, 0x07d236b6u, 0, 707 }, // dae3884207d236b6 = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_causal_a32_rtne_pssk_group
    { 0x417a460du, 0xe0f8c617u, 0, 749 }, // 417a460de0f8c617 = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_swa_a32_rtne_psskddv
    { 0xdad2a584u, 0xafe8caaau, 0, 785 }, // dad2a584afe8caaa = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_a32_rtne
    { 0x10098027u, 0x9fe5b62au, 0, 809 }, // 100980279fe5b62a = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_causal_a32_rtne
    { 0xf6ab40e8u, 0x0c9493ebu, 0, 840 }, // f6ab40e80c9493eb = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_a32_rtne_psskddv_group
    { 0x5ae15341u, 0x8b9af58du, 0, 878 }, // 5ae153418b9af58d = b2sum -l 64 <<< gfx950/bwd_hd128_bf16_causal_a32_rtne_psskddv_group
    { 0xf4527819u, 0x4f1842deu, 0, 923 }, // f45278194f1842de = b2sum -l 64 <<< gfx950/bwd_hd192_bf16_a32_rtne_psskddv
    { 0x289cc7ffu, 0x59898788u, 0, 955 }, // 289cc7ff59898788 = b2sum -l 64 <<< gfx950/bwd_hd192_bf16_causal_a32_rtne_psskddv
};
#define ARRAY_SIZE(array)  (sizeof(array) / sizeof(array[0]))
constexpr int kTotalNumKernels = ARRAY_SIZE(meta_list);
#undef ARRAY_SIZE
const char packed_string[] =
"gfx950\0"
"bwd_hd64_fp16_a32_pssk\0"
"bwd_hd64_fp16_a32_pssk_group\0"
"bwd_hd64_fp16_causal_a32_pssk\0"
"bwd_hd64_fp16_causal_a32_pssk_group\0"
"bwd_hd128_fp16_a32_psskddv\0"
"bwd_hd128_fp16_a32_pssk_group\0"
"bwd_hd128_fp16_causal_a32_psskddv\0"
"bwd_hd128_fp16_causal_a32_pssk_group\0"
"bwd_hd128_fp16_swa_a32_psskddv\0"
"bwd_hd128_fp16_a32\0"
"bwd_hd128_fp16_causal_a32\0"
"bwd_hd128_fp16_a32_psskddv_group\0"
"bwd_hd128_fp16_causal_a32_psskddv_group\0"
"bwd_hd192_fp16_a32_psskddv\0"
"bwd_hd192_fp16_causal_a32_psskddv\0"
"bwd_hd64_bf16_a32_rtne_pssk\0"
"bwd_hd64_bf16_a32_rtne_pssk_group\0"
"bwd_hd64_bf16_causal_a32_rtne_pssk\0"
"bwd_hd64_bf16_causal_a32_rtne_pssk_group\0"
"bwd_hd128_bf16_a32_rtne_psskddv\0"
"bwd_hd128_bf16_a32_rtne_pssk_group\0"
"bwd_hd128_bf16_causal_a32_rtne_psskddv\0"
"bwd_hd128_bf16_causal_a32_rtne_pssk_group\0"
"bwd_hd128_bf16_swa_a32_rtne_psskddv\0"
"bwd_hd128_bf16_a32_rtne\0"
"bwd_hd128_bf16_causal_a32_rtne\0"
"bwd_hd128_bf16_a32_rtne_psskddv_group\0"
"bwd_hd128_bf16_causal_a32_rtne_psskddv_group\0"
"bwd_hd192_bf16_a32_rtne_psskddv\0"
"bwd_hd192_bf16_causal_a32_rtne_psskddv\0";

AOTRITON_NS::TritonKernelCluster<kTotalNumKernels>
kernel_cluster(meta_list, packed_string);

hipError_t validator_Arch_gfx950_lambda__0(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(0);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter27fmha_bwd_hd64_fp16_a32_psskE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__1(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(1);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter33fmha_bwd_hd64_fp16_a32_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__2(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(2);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter34fmha_bwd_hd64_fp16_causal_a32_psskE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__3(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(3);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter40fmha_bwd_hd64_fp16_causal_a32_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__4(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(4);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter31fmha_bwd_hd128_fp16_a32_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__5(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(5);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter34fmha_bwd_hd128_fp16_a32_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__6(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(6);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter38fmha_bwd_hd128_fp16_causal_a32_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__7(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(7);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter41fmha_bwd_hd128_fp16_causal_a32_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__8(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_swa_genl_args;
  context.kernel_on_device = kernel_cluster.get(8);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter35fmha_bwd_hd128_fp16_swa_a32_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_swa_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__9(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_args;
  context.kernel_on_device = kernel_cluster.get(9);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter23fmha_bwd_hd128_fp16_a32E)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__10(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_args;
  context.kernel_on_device = kernel_cluster.get(10);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter30fmha_bwd_hd128_fp16_causal_a32E)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__11(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(11);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter37fmha_bwd_hd128_fp16_a32_psskddv_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__12(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(12);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter44fmha_bwd_hd128_fp16_causal_a32_psskddv_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__13(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(13);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter31fmha_bwd_hd192_fp16_a32_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 64;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__14(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(14);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter38fmha_bwd_hd192_fp16_causal_a32_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 64;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__15(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(15);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter32fmha_bwd_hd64_bf16_a32_rtne_psskE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__16(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(16);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter38fmha_bwd_hd64_bf16_a32_rtne_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__17(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(17);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter39fmha_bwd_hd64_bf16_causal_a32_rtne_psskE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__18(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(18);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter45fmha_bwd_hd64_bf16_causal_a32_rtne_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 32;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__19(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(19);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter36fmha_bwd_hd128_bf16_a32_rtne_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__20(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(20);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter39fmha_bwd_hd128_bf16_a32_rtne_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__21(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(21);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter43fmha_bwd_hd128_bf16_causal_a32_rtne_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__22(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(22);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter46fmha_bwd_hd128_bf16_causal_a32_rtne_pssk_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__23(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_swa_genl_args;
  context.kernel_on_device = kernel_cluster.get(23);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter40fmha_bwd_hd128_bf16_swa_a32_rtne_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_swa_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__24(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_args;
  context.kernel_on_device = kernel_cluster.get(24);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter28fmha_bwd_hd128_bf16_a32_rtneE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 256;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__25(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_args;
  context.kernel_on_device = kernel_cluster.get(25);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter35fmha_bwd_hd128_bf16_causal_a32_rtneE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 256;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__26(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(26);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter42fmha_bwd_hd128_bf16_a32_rtne_psskddv_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__27(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_group_args;
  context.kernel_on_device = kernel_cluster.get(27);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter49fmha_bwd_hd128_bf16_causal_a32_rtne_psskddv_groupE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_group_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 192;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__28(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(28);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter36fmha_bwd_hd192_bf16_a32_rtne_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 64;
  return hipSuccess;
}


hipError_t validator_Arch_gfx950_lambda__29(BwdDqDkDvV3Context& context, int mod_number)
{
  context.selected_pp_args = &BwdDqDkDvV3Context::pp_direct_kernel_args_for_fmha_bwd_v3_genl_args;
  context.kernel_on_device = kernel_cluster.get(29);
  context.affine_kernel_function_name = R"xyzwwzyx(_ZN5aiter43fmha_bwd_hd192_bf16_causal_a32_rtne_psskddvE)xyzwwzyx";
  context.package_path = R"xyzwwzyx(amd-gfx950/flash/bwd_dq_dk_dv_v3/affine_kernels)xyzwwzyx";
  context.arch_name = R"xyzwwzyx(gfx950)xyzwwzyx";
  context.sizeof_selected_args = sizeof(AOTRITON_NS::v3::flash::aiter::fmha_bwd_v3_genl_args);
  context.perf_args.ts_qo = 16;
    context.perf_args.ts_kv = 64;
  return hipSuccess;
}

}

BwdDqDkDvV3Context::CapabilityTableEntry
BwdDqDkDvV3Context::capability_table[][ 576 ] = {
    {
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__0,
        &validator_Arch_gfx950_lambda__1,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        &validator_Arch_gfx950_lambda__2,
        &validator_Arch_gfx950_lambda__3,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__9,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__10,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__11,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__11,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__12,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__12,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__9,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__10,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__5,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__7,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__11,
        &validator_Arch_gfx950_lambda__4,
        &validator_Arch_gfx950_lambda__11,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__12,
        &validator_Arch_gfx950_lambda__6,
        &validator_Arch_gfx950_lambda__12,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__8,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__13,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        &validator_Arch_gfx950_lambda__14,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__15,
        &validator_Arch_gfx950_lambda__16,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        &validator_Arch_gfx950_lambda__17,
        &validator_Arch_gfx950_lambda__18,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__24,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__25,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__26,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__26,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__27,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__27,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__24,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__25,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__20,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__22,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__26,
        &validator_Arch_gfx950_lambda__19,
        &validator_Arch_gfx950_lambda__26,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__27,
        &validator_Arch_gfx950_lambda__21,
        &validator_Arch_gfx950_lambda__27,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__23,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__28,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        &validator_Arch_gfx950_lambda__29,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        nullptr,
    },
};

}

