// Copyright © 2025 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT

// clang-format off
#pragma once

#include <aotriton/config.h>
#include <aotriton/dtypes.h>
#include <aotriton/util.h>
#include <aotriton/runtime.h>
#include <aotriton/_internal/lazy_tensor_internal.h>
#include <aotriton/flash.h>
#include <functional>
#include <string>
#include <vector>

namespace AOTRITON_NS::v3::flash {

// Unlike KernelDescription, Operator must have its own parameter class
struct OpAttnBwdParams {
    const TensorView<4>*   Q;
    const TensorView<4>*   K;
    const TensorView<4>*   V;
    const TensorView<4>*   B;
    float                  sm_scale;
    const TensorView<4>*   Out;
    const TensorView<4>*   DO;
    const TensorView<4>*   DK;
    const TensorView<4>*   DV;
    const TensorView<4>*   DQ;
    const TensorView<4>*   DB;
    LazyTensorInternal<4>* DQ_ACC;
    const TensorView<2>*   L;
    LazyTensorInternal<2>* D;
    int32_t                num_head_q;
    int32_t                num_head_k;
    const TensorView<1>*   cu_seqlens_q;
    const TensorView<1>*   cu_seqlens_k;
    int32_t                num_seqlens;
    int32_t                max_seqlen_q;
    int32_t                max_seqlen_k;
    int32_t                head_dim;
    float                  dropout_p;
    const TensorView<0>*   philox_seed_ptr;
    const TensorView<0>*   philox_offset1;
    uint64_t               philox_offset2;
    int32_t                Window_left;
    int32_t                Window_right;
    int16_t                BLOCK_DMODEL;
    int8_t                 CAUSAL_TYPE;
    bool                   ENABLE_DROPOUT;
    bool                   PADDED_HEAD;
    int8_t                 BIAS_TYPE;
};

struct OpAttnBwdContext {
    OpAttnBwdParams *params = nullptr;
    const attn_options *call_options = nullptr;
    enum BackendEnum : int32_t {
        None = -1,
        kMetro_TritonSplit = 0,
        kShim_BwdKernelFuse = 1,
        kMetro_AiterAsm = 2,
        Max = 3
    };
    static constexpr BackendEnum fallback_backend = kMetro_TritonSplit;
    BackendEnum backend_index = BackendEnum::None;
    bool disable_fallback = false;

#if AOTRITON_BUILD_FOR_TUNING
    int _has_preferred_backend = -1;
    static constexpr int _total_number_of_backends = BackendEnum::Max;
    const char* _backend_name = nullptr;
#endif

    // One more layer of dispatcher of functionals is added due to
    // 1. Individual kernel may use fewer arguments
    // 2. Metro kernel needs overall performance numbers over individual kernels.
    // 3. Even metro kernel only has one kernel, another set LUT is need to
    //    determine which metro kernel (or backend) need to be used
    int64_t godel_number() const;
    static std::tuple<int, int> get_archmod_number(Gpu gpu);
    static constexpr int kMaxGodelNumber = 576;

    hipError_t lookup_optimal(Gpu gpu);
    // Unlike Triton kernel, Operator's launch need gpu argument to eventually
    // call backend's lookup_optimal
    hipError_t launch(Gpu gpu, hipStream_t stream) const;
private:
    typedef void (*OpTuneTableEntry)(OpAttnBwdContext& context, int mod_number);
    static OpTuneTableEntry optune_table[][ kMaxGodelNumber ];

    typedef hipError_t (*BackendLauncher)(const OpAttnBwdContext& context,
                                          Gpu gpu,
                                          hipStream_t stream);
    static BackendLauncher launcher_table[ BackendEnum::Max ];
};

namespace optune {

extern int op_attn_bwd__lut_lambda__0(const OpAttnBwdParams& params, int mod_number, int8_t lut[1][10][10]);

void Optune_op_attn_bwd__A0__F0(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F1(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F4(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F5(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F8(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F12(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F16(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F17(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F20(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F21(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F24(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F28(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F32(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F33(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F36(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F37(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F40(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F44(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F48(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F49(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F52(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F53(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F56(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F60(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F64(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F65(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F68(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F69(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F72(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F76(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F80(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F81(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F84(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F85(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F88(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F92(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F96(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F97(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F100(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F101(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F104(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F108(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F112(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F113(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F116(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F117(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F120(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F124(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F128(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F129(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F132(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F133(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F136(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F140(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F144(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F145(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F148(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F149(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F152(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F156(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F160(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F161(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F164(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F165(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F168(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F172(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F192(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F193(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F196(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F197(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F200(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F204(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F208(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F209(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F212(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F213(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F216(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F220(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F224(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F225(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F228(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F229(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F232(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F236(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F240(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F241(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F244(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F245(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F248(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F252(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F256(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F257(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F260(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F261(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F264(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F268(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F272(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F273(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F276(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F277(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F280(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F284(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F288(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F289(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F292(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F293(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F296(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F300(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F304(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F305(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F308(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F309(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F312(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F316(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F320(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F321(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F324(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F325(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F328(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F332(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F336(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F337(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F340(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F341(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F344(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F348(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F352(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F353(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F356(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F357(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F360(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F364(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F384(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F385(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F388(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F389(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F392(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F396(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F400(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F401(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F404(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F405(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F408(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F412(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F416(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F417(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F420(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F421(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F424(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F428(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F432(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F433(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F436(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F437(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F440(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F444(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F448(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F449(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F452(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F453(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F456(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F460(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F464(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F465(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F468(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F469(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F472(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F476(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F480(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F481(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F484(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F485(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F488(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F492(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F496(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F497(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F500(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F501(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F504(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F508(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F512(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F513(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F516(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F517(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F520(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F524(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F528(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F529(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F532(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F533(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F536(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F540(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F544(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F545(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F548(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F549(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F552(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F556(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F0(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F1(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F4(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F5(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F12(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F16(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F17(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F20(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F21(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F28(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F32(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F33(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F36(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F37(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F40(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F44(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F48(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F49(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F52(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F53(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F56(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F60(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F64(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F65(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F68(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F69(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F72(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F76(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F80(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F81(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F84(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F85(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F88(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F92(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F96(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F97(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F100(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F101(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F104(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F108(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F112(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F113(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F116(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F117(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F120(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F124(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F128(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F129(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F132(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F133(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F136(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F140(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F144(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F145(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F148(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F149(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F152(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F156(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F160(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F161(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F164(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F165(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F168(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F172(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F192(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F193(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F196(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F197(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F204(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F208(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F209(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F212(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F213(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F216(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F220(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F224(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F225(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F228(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F229(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F232(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F236(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F240(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F241(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F244(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F245(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F248(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F252(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F256(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F257(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F260(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F261(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F264(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F268(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F272(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F273(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F276(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F277(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F280(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F284(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F288(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F289(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F292(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F293(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F296(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F300(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F304(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F305(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F308(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F309(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F312(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F316(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F320(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F321(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F324(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F325(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F328(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F332(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F336(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F337(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F340(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F341(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F344(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F348(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F352(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F353(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F356(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F357(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F360(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F364(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F384(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F385(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F388(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F389(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F392(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F396(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F400(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F401(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F404(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F405(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F408(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F412(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F416(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F417(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F420(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F421(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F424(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F428(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F432(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F433(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F436(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F437(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F440(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F444(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F448(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F449(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F452(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F453(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F456(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F460(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F464(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F465(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F468(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F469(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F472(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F476(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F480(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F481(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F484(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F485(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F488(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F496(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F497(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F500(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F501(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F504(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F508(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F512(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F513(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F516(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F517(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F520(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F524(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F528(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F529(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F532(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F533(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F536(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F540(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F544(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F545(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F548(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F549(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F552(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F556(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F560(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F561(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F564(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F565(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F568(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A6__F572(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F0(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F1(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F4(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F5(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F12(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F16(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F17(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F20(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F21(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F28(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F32(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F33(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F36(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F37(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F40(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F44(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F48(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F49(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F52(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F53(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F56(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F60(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F64(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F65(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F68(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F69(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F72(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F76(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F80(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F81(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F84(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F85(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F88(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F92(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F96(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F97(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F100(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F101(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F104(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F108(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F112(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F113(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F116(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F117(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F120(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F124(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F128(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F129(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F132(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F133(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F136(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F140(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F144(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F145(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F148(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F149(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F152(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F156(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F160(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F161(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F164(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F165(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F168(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F172(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F192(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F193(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F196(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F197(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F204(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F208(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F209(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F212(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F213(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F216(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F220(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F224(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F225(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F228(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F229(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F232(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F236(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F240(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F241(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F244(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F245(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F248(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F252(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F256(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F257(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F260(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F261(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F264(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F268(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F272(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F273(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F276(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F277(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F280(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F284(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F288(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F289(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F292(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F293(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F296(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F300(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F304(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F305(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F308(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F309(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F312(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F316(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F320(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F321(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F324(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F325(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F328(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F332(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F336(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F337(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F340(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F341(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F344(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F348(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F352(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F353(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F356(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F357(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F360(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F364(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F384(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F385(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F388(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F389(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F392(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F396(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F400(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F401(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F404(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F405(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F408(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F412(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F416(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F417(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F420(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F421(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F424(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F428(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F432(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F433(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F436(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F437(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F440(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F444(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F448(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F449(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F452(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F453(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F456(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F460(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F464(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F465(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F468(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F469(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F472(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F476(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F480(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F481(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F484(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F485(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F488(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F496(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F497(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F500(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F501(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F504(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F508(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F512(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F513(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F516(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F517(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F520(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F524(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F528(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F529(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F532(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F533(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F536(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F540(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F544(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F545(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F548(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F549(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F552(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F556(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F560(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F561(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F564(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F565(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F568(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A7__F572(OpAttnBwdContext& params, int mod_number);

}

}

// vim: set fileencoding=utf-8

