/*******************************************************************************
 *
 * MIT License
 *
 * Copyright (c) 2020 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 *******************************************************************************/

#include <cstddef>
#include <miopen/conv/solvers.hpp>
#include <miopen/env.hpp>
#include <miopen/handle.hpp>
#include <miopen/generic_search.hpp>
#include <miopen/conv/wrw_invoke_params.hpp>
#include <miopen/solver/implicitgemm_util.hpp>
#include <miopen/gcn_asm_utils.hpp>
#include <miopen/tensor_ops.hpp>
#include <miopen/conv/asm_implicit_gemm.hpp>

MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS)

namespace miopen {
namespace solver {
namespace conv {

using ProblemDescription = miopen::conv::ProblemDescription;

static const inline std::vector<TunableImplicitGemmGTCDynamic_t>&
GetImplicitGemmWrwGTCDynamicXdlopsKernelList()
{
    // retrieve dynamic igemm wrw pass's possible kernel name
    // clang-format off
    static const std::vector<TunableImplicitGemmGTCDynamic_t> kernel_param_list {
        { "wrw", miopenFloat,   4,   0, 256, 128,  16,  64,  32,   1,   1,   1,   2,   2,   {1,   4,   4,   1},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenFloat,   4,   0, 256, 128,  16,  64,  32,   1,   1,   1,   2,   2,   {1,   4,   4,   1},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenFloat,   4,   0, 256, 128,   8,  64,  32,   1,   1,   1,   2,   2,   {1,   4,   2,   1},   {1,   2,   1, 128},   {1,   4,   1,   1},   {1,   2,   1, 128},   0},
        { "wrw", miopenFloat,   4,   0, 256, 128,   8,  64,  32,   1,   1,   1,   2,   2,   {1,   4,   2,   1},   {1,   2,   1, 128},   {1,   4,   1,   1},   {1,   2,   1, 128},   1},
        { "wrw", miopenFloat,   1,   1, 256, 128,  16,  64,  32,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1, 256, 128,  16,  64,  32,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1, 256, 128,   8,  64,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1, 256, 128,   8,  64,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1, 256, 128,  16,  64,  32,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1, 256, 128,  16,  64,  32,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1, 256, 128,   8,  64,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1, 256, 128,   8,  64,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   0, 256,  64,  16,  64,  16,   1,   1,   1,   2,   2,   {1,   4,   4,   1},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenFloat,   4,   0, 256,  64,  16,  64,  16,   1,   1,   1,   2,   2,   {1,   4,   4,   1},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenFloat,   1,   1, 256,  64,  16,  64,  16,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1, 256,  64,  16,  64,  16,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1, 256,  64,   8,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1, 256,  64,   8,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1, 256,  64,   4,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   4,   1,  64},   {1,   1,   1,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenFloat,   1,   1, 256,  64,   4,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   4,   1,  64},   {1,   1,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenFloat,   4,   1, 256,  64,  16,  64,  16,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1, 256,  64,  16,  64,  16,   1,   1,   1,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1, 256,  64,   8,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1, 256,  64,   8,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1, 256,  64,   4,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   4,   1,  64},   {1,   1,   1,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenFloat,   4,   1, 256,  64,   4,  64,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   4,   1,  64},   {1,   1,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenFloat,   1,   1, 256,  32,  16,  64,   4,   1,   1,   2,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1, 256,  32,  16,  64,   4,   1,   1,   2,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1, 256,  32,   8,  64,   4,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1, 256,  32,   8,  64,   4,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1, 256,  32,  16,  64,   4,   1,   1,   2,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1, 256,  32,  16,  64,   4,   1,   1,   2,   2,   2,   {1,   1,  16,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1, 256,  32,   8,  64,   4,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1, 256,  32,   8,  64,   4,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   0, 128, 128,  16,  32,  32,   1,   1,   1,   2,   2,   {1,   4,   2,   1},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenFloat,   4,   0, 128, 128,  16,  32,  32,   1,   1,   1,   2,   2,   {1,   4,   2,   1},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenFloat,   1,   1, 128, 128,  16,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1, 128, 128,  16,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1, 128, 128,   8,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1, 128, 128,   8,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1, 128, 128,  16,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1, 128, 128,  16,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1, 128, 128,   8,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1, 128, 128,   8,  32,  32,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   0, 128,  64,  16,  32,   8,   1,   1,   2,   2,   2,   {1,   4,   2,   1},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenFloat,   4,   0, 128,  64,  16,  32,   8,   1,   1,   2,   2,   2,   {1,   4,   2,   1},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenFloat,   1,   1, 128,  64,  16,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1, 128,  64,  16,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1, 128,  64,   8,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1, 128,  64,   8,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1, 128,  64,  16,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1, 128,  64,  16,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1, 128,  64,   8,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1, 128,  64,   8,  32,   8,   1,   1,   2,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1, 128,  32,  16,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1, 128,  32,  16,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1, 128,  32,   8,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1, 128,  32,   8,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1, 128,  32,  16,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1, 128,  32,  16,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   8,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1, 128,  32,   8,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1, 128,  32,   8,  32,   8,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1,  64, 256,  16,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1,  64, 256,  16,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1,  64, 256,   8,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   8,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1,  64, 256,   8,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   8,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1,  64, 256,  16,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1,  64, 256,  16,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1,  64, 256,   8,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   8,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1,  64, 256,   8,  16,  64,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   8,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1,  64, 128,  16,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1,  64, 128,  16,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1,  64, 128,   8,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1,  64, 128,   8,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1,  64, 128,  16,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1,  64, 128,  16,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1,  64, 128,   8,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1,  64, 128,   8,   8,  32,   1,   2,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1,  64,  64,  16,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1,  64,  64,  16,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1,  64,  64,   8,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1,  64,  64,   8,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1,  64,  64,  16,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1,  64,  64,  16,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1,  64,  64,   8,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1,  64,  64,   8,  16,  16,   1,   1,   1,   2,   2,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1,  64,  32,  16,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1,  64,  32,  16,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1,  64,  32,   8,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1,  64,  32,   8,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   4,   1,  64,  32,  16,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1,  64,  32,  16,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1,  64,  32,   8,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   4,   1,  64,  32,   8,  32,   8,   1,   1,   2,   1,   1,   {1,   1,   2,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1,  64,  16,  16,  64,   4,   1,   1,   1,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1,  64,  16,  16,  64,   4,   1,   1,   1,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   1,   1,   4,  64,  16,   4,  64,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenFloat,   1,   1,   4,  64,  16,   4,  64,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenFloat,   4,   1,  64,  16,  16,  64,   4,   1,   1,   1,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1,  64,  16,  16,  64,   4,   1,   1,   1,   1,   1,   {1,   1,   4,   1},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1,   4,  64,  16,   4,  64,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenFloat,   4,   1,   4,  64,  16,   4,  64,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenFloat,  16,   1,   4,  64,  16,   4,  64,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenFloat,  16,   1,   4,  64,  16,   4,  64,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenFloat,  16,   1,  64,   4,  16,  64,   4,   1,   1,   1,   1,   1,   {1,   1,  16,   1},   {1,  16,   1,   4},   {1,   1,   1,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenFloat,  16,   1,  64,   4,  16,  64,   4,   1,   1,   1,   1,   1,   {1,   1,  16,   1},   {1,  16,   1,   4},   {1,   1,   1,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenFloat,   1,   1,  32,  32,   8,  16,  16,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   1,   1,  32,  32,   8,  16,  16,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   8,   1,  32,  32,   8,  16,  16,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenFloat,   8,   1,  32,  32,   8,  16,  16,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenFloat,   1,   1,  16,  32,  16,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   2,   1},   {1,  16,   1,   8},   {1,   1,   4,   1},   {1,  16,   1,   8},   0},
        { "wrw", miopenFloat,   1,   1,  16,  32,  16,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   2,   1},   {1,  16,   1,   8},   {1,   1,   4,   1},   {1,  16,   1,   8},   1},
        { "wrw", miopenFloat,   1,   1,  16,  32,   8,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  16},   {1,   1,   2,   1},   {1,   8,   1,  16},   0},
        { "wrw", miopenFloat,   1,   1,  16,  32,   8,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  16},   {1,   1,   2,   1},   {1,   8,   1,  16},   1},
        { "wrw", miopenFloat,   4,   1,  16,  32,  16,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   2,   1},   {1,  16,   1,   8},   {1,   1,   4,   1},   {1,  16,   1,   8},   0},
        { "wrw", miopenFloat,   4,   1,  16,  32,  16,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   2,   1},   {1,  16,   1,   8},   {1,   1,   4,   1},   {1,  16,   1,   8},   1},
        { "wrw", miopenFloat,   4,   1,  16,  32,   8,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  16},   {1,   1,   2,   1},   {1,   8,   1,  16},   0},
        { "wrw", miopenFloat,   4,   1,  16,  32,   8,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  16},   {1,   1,   2,   1},   {1,   8,   1,  16},   1},
        { "wrw", miopenFloat,   8,   1,  16,  32,   8,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  16},   {1,   1,   2,   1},   {1,   8,   1,  16},   0},
        { "wrw", miopenFloat,   8,   1,  16,  32,   8,   8,  32,   1,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  16},   {1,   1,   2,   1},   {1,   8,   1,  16},   1},
        { "wrw", miopenFloat,   8,   1,  32,  16,   8,  32,   8,   1,   1,   1,   1,   1,   {1,   1,   2,   1},   {1,   8,   1,  16},   {1,   1,   1,   1},   {1,   8,   1,  16},   1},
        { "wrw", miopenFloat,   8,   1,  32,  16,   8,  32,   8,   1,   1,   1,   1,   1,   {1,   1,   2,   1},   {1,   8,   1,  16},   {1,   1,   1,   1},   {1,   8,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0, 256, 128,  32,  32,  32,   8,   2,   1,   2,   2,   {1,   4,   1,   8},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 256, 128,  32,  32,  32,   8,   2,   1,   2,   2,   {1,   4,   1,   8},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   1,   0, 256, 128,  32,  32,  32,   8,   2,   1,   2,   2,   {1,   1,   1,  32},   {1,  32,   1,   8},   {1,   1,   1,  16},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   0, 256, 128,  16,  32,  32,   8,   2,   1,   2,   2,   {1,   4,   1,   4},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0, 256, 128,  16,  32,  32,   8,   2,   1,   2,   2,   {1,   4,   1,   4},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenHalf,   1,   1, 256, 128,  16,  32,  32,   8,   2,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   1,   1, 256, 128,  16,  32,  32,   8,   2,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   1, 256, 128,  16,  32,  32,   8,   2,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 256, 128,  16,  32,  32,   8,   2,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0, 128, 256,  32,  32,  32,   8,   1,   2,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   8,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 128, 256,  32,  32,  32,   8,   1,   2,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   8,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0, 128, 256,  16,  32,  32,   8,   1,   2,   2,   2,   {1,   4,   1,   2},   {1,   4,   1,  64},   {1,   4,   4,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0, 128, 256,  16,  32,  32,   8,   1,   2,   2,   2,   {1,   4,   1,   2},   {1,   4,   1,  64},   {1,   4,   4,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenHalf,   4,   1, 128, 256,  16,  32,  32,   8,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 128, 256,  16,  32,  32,   8,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 128, 256,  16,  32,  32,   8,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 128, 256,  16,  32,  32,   8,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0, 128, 128,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 128, 128,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0, 128, 128,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   2},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0, 128, 128,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   2},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenHalf,   4,   1, 128, 128,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 128, 128,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 128, 128,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 128, 128,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0, 256,  64,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   8},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 256,  64,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   8},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0, 256,  64,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   4},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0, 256,  64,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   4},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenHalf,   4,   1, 256,  64,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,  32},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1, 256,  64,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,  32},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1, 256,  64,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 256,  64,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 256,  64,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 256,  64,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  64, 256,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   8,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  64, 256,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   8,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  64, 256,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   1},   {1,   4,   1,  64},   {1,   4,   4,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0,  64, 256,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   4,   1,   1},   {1,   4,   1,  64},   {1,   4,   4,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenHalf,   4,   1,  64, 256,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,  32,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  64, 256,  32,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,  32,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  64, 256,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  64, 256,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  64, 256,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  64, 256,  16,  32,  32,   8,   1,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0, 128,  64,  32,  16,  16,  16,   2,   1,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 128,  64,  32,  16,  16,  16,   2,   1,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0, 128,  64,  16,  32,   8,   4,   1,   2,   2,   2,   {1,   4,   1,   2},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0, 128,  64,  16,  32,   8,   4,   1,   2,   2,   2,   {1,   4,   1,   2},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenHalf,   4,   1, 128,  64,  32,  16,  16,  16,   2,   1,   2,   2,   {1,   1,   1,  16},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1, 128,  64,  32,  16,  16,  16,   2,   1,   2,   2,   {1,   1,   1,  16},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1, 128,  64,  16,  32,   8,   4,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 128,  64,  16,  32,   8,   4,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 128,  64,  16,  32,   8,   4,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 128,  64,  16,  32,   8,   4,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  64, 128,  32,  16,  16,  16,   1,   2,   2,   2,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  64, 128,  32,  16,  16,  16,   1,   2,   2,   2,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  64, 128,  16,   8,  32,   4,   2,   1,   2,   2,   {1,   4,   1,   1},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0,  64, 128,  16,   8,  32,   4,   2,   1,   2,   2,   {1,   4,   1,   1},   {1,   4,   1,  64},   {1,   4,   2,   1},   {1,   4,   1,  64},   0},
        { "wrw", miopenHalf,   4,   1,  64, 128,  32,  16,  16,  16,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,  16,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  64, 128,  32,  16,  16,  16,   1,   2,   2,   2,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,  16,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  64, 128,  16,   8,  32,   4,   2,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  64, 128,  16,   8,  32,   4,   2,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  64, 128,  16,   8,  32,   4,   2,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  64, 128,  16,   8,  32,   4,   2,   1,   2,   2,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0, 256,  32,  32,  64,   4,   4,   1,   2,   2,   2,   {1,   4,   1,   8},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 256,  32,  32,  64,   4,   4,   1,   2,   2,   2,   {1,   4,   1,   8},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0, 256,  32,  16,  64,   4,   4,   1,   2,   2,   2,   {1,   2,   1,   8},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 256,  32,  16,  64,   4,   4,   1,   2,   2,   2,   {1,   2,   1,   8},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   1, 256,  32,  32,  64,   4,   4,   1,   2,   2,   2,   {1,   1,   1,  32},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1, 256,  32,  32,  64,   4,   4,   1,   2,   2,   2,   {1,   1,   1,  32},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1, 256,  32,  16,  64,   4,   4,   1,   2,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 256,  32,  16,  64,   4,   4,   1,   2,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 256,  32,  16,  64,   4,   4,   1,   2,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 256,  32,  16,  64,   4,   4,   1,   2,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  32, 256,  32,   4,  64,   4,   2,   1,   2,   2,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   8,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32, 256,  32,   4,  64,   4,   2,   1,   2,   2,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   8,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  32, 256,  16,   4,  64,   4,   2,   1,   2,   2,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   8,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32, 256,  16,   4,  64,   4,   2,   1,   2,   2,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   8,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   1,  32, 256,  32,   4,  64,   4,   2,   1,   2,   2,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,  32,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  32, 256,  32,   4,  64,   4,   2,   1,   2,   2,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,  32,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  32, 256,  16,   4,  64,   4,   2,   1,   2,   2,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  32, 256,  16,   4,  64,   4,   2,   1,   2,   2,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  32, 256,  16,   4,  64,   4,   2,   1,   2,   2,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  32, 256,  16,   4,  64,   4,   2,   1,   2,   2,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  64,  64,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  64,  64,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  64,  64,  16,  32,  32,   8,   1,   1,   1,   1,   {1,   4,   1,   1},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0,  64,  64,  16,  32,  32,   8,   1,   1,   1,   1,   {1,   4,   1,   1},   {1,   4,   1,  64},   {1,   4,   1,   1},   {1,   4,   1,  64},   0},
        // Normally, the 64x64x32 kernels get lower efficiency than 64x64x16 kernels for nxe==1 cases, so we deprecated them for now.
        //{ "wrw", miopenHalf,   4,   1,  64,  64,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   1},
        //{ "wrw", miopenHalf,   4,   1,  64,  64,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  64,  64,  16,  32,  32,   8,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  64,  64,  16,  32,  32,   8,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  64,  64,  16,  32,  32,   8,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  64,  64,  16,  32,  32,   8,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   8,   1,  64,  64,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   8,   1,   1},   {1,   4,   1,  64},   {1,   8,   1,   1},   {1,   4,   1,  64},   1},
        { "wrw", miopenHalf,   4,   0, 128,  32,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 128,  32,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   4,   1,   4},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0, 128,  32,  16,  64,  16,   4,   1,   1,   1,   1,   {1,   2,   1,   4},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 128,  32,  16,  64,  16,   4,   1,   1,   1,   1,   {1,   2,   1,   4},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   1, 128,  32,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1, 128,  32,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1, 128,  32,  16,  64,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 128,  32,  16,  64,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 128,  32,  16,  64,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 128,  32,  16,  64,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  32, 128,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32, 128,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  32, 128,  16,  16,  64,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   4,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32, 128,  16,  16,  64,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   4,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   1,  32, 128,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,  16,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  32, 128,  32,  16,  16,  16,   1,   1,   2,   2,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,  16,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  32, 128,  16,  16,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  32, 128,  16,  16,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  32, 128,  16,  16,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  32, 128,  16,  16,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0, 256,  16,  32,  64,   4,   4,   1,   1,   2,   2,   {1,   2,   1,  16},   {1,  16,   1,  16},   {1,   2,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   0, 256,  16,  32,  64,   4,   4,   1,   1,   2,   2,   {1,   2,   1,  16},   {1,  16,   1,  16},   {1,   2,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   1, 256,  16,  32,  64,   4,   4,   1,   1,   2,   2,   {1,   1,   1,  32},   {1,  32,   1,   8},   {1,   1,   2,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1, 256,  16,  32,  64,   4,   4,   1,   1,   2,   2,   {1,   1,   1,  32},   {1,  32,   1,   8},   {1,   1,   2,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1, 256,  16,  16,  64,   4,   4,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 256,  16,  16,  64,   4,   4,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 256,  16,  16,  64,   4,   4,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 256,  16,  16,  64,   4,   4,   1,   1,   2,   2,   {1,   1,   1,  16},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  16, 256,  32,   4,  64,   4,   1,   1,   2,   2,   {1,   2,   1,   1},   {1,  16,   1,  16},   {1,   2,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   0,  16, 256,  32,   4,  64,   4,   1,   1,   2,   2,   {1,   2,   1,   1},   {1,  16,   1,  16},   {1,   2,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   1,  16, 256,  32,   4,  64,   4,   1,   1,   2,   2,   {1,   1,   1,   2},   {1,  32,   1,   8},   {1,   1,  32,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  16, 256,  32,   4,  64,   4,   1,   1,   2,   2,   {1,   1,   1,   2},   {1,  32,   1,   8},   {1,   1,  32,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  16, 256,  16,   4,  64,   4,   1,   1,   2,   2,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  16, 256,  16,   4,  64,   4,   1,   1,   2,   2,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  16, 256,  16,   4,  64,   4,   1,   1,   2,   2,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  16, 256,  16,   4,  64,   4,   1,   1,   2,   2,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,  16,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  64,  32,  32,  32,   8,   4,   1,   2,   1,   1,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  64,  32,  32,  32,   8,   4,   1,   2,   1,   1,   {1,   4,   1,   2},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  64,  32,  16,  32,   8,   4,   1,   2,   1,   1,   {1,   2,   1,   2},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  64,  32,  16,  32,   8,   4,   1,   2,   1,   1,   {1,   2,   1,   2},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   1,  64,  32,  32,  32,   8,   4,   1,   2,   1,   1,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  64,  32,  32,  32,   8,   4,   1,   2,   1,   1,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  64,  32,  16,  32,   8,   4,   1,   2,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  64,  32,  16,  32,   8,   4,   1,   2,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  64,  32,  16,  32,   8,   4,   1,   2,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  64,  32,  16,  32,   8,   4,   1,   2,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   8,   1,  64,  32,   8,  32,   8,   4,   1,   2,   1,   1,   {1,   1,   1,   2},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32,  64,  32,   8,  32,   4,   2,   1,   1,   1,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32,  64,  32,   8,  32,   4,   2,   1,   1,   1,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  32,  64,  16,   8,  32,   4,   2,   1,   1,   1,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32,  64,  16,   8,  32,   4,   2,   1,   1,   1,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   2,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   1,  32,  64,  32,   8,  32,   4,   2,   1,   1,   1,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  32,  64,  32,   8,  32,   4,   2,   1,   1,   1,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  32,  64,  16,   8,  32,   4,   2,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  32,  64,  16,   8,  32,   4,   2,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  32,  64,  16,   8,  32,   4,   2,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  32,  64,  16,   8,  32,   4,   2,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   8,   1,  32,  64,   8,   8,  32,   4,   2,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  32},   {1,   1,   2,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0, 128,  16,  32,  32,   8,   4,   2,   1,   1,   1,   {1,   2,   1,   8},   {1,  16,   1,  16},   {1,   2,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   0, 128,  16,  32,  32,   8,   4,   2,   1,   1,   1,   {1,   2,   1,   8},   {1,  16,   1,  16},   {1,   2,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   1, 128,  16,  32,  32,   8,   4,   2,   1,   1,   1,   {1,   1,   1,  16},   {1,  32,   1,   8},   {1,   1,   2,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1, 128,  16,  32,  32,   8,   4,   2,   1,   1,   1,   {1,   1,   1,  16},   {1,  32,   1,   8},   {1,   1,   2,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1, 128,  16,  16,  32,   8,   4,   2,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1, 128,  16,  16,  32,   8,   4,   2,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1, 128,  16,  16,  32,   8,   4,   2,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1, 128,  16,  16,  32,   8,   4,   2,   1,   1,   1,   {1,   1,   1,   8},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  16, 128,  32,   8,  32,   4,   1,   2,   1,   1,   {1,   2,   1,   1},   {1,  16,   1,  16},   {1,   2,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   0,  16, 128,  32,   8,  32,   4,   1,   2,   1,   1,   {1,   2,   1,   1},   {1,  16,   1,  16},   {1,   2,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   1,  16, 128,  32,   8,  32,   4,   1,   2,   1,   1,   {1,   1,   1,   2},   {1,  32,   1,   8},   {1,   1,  16,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  16, 128,  32,   8,  32,   4,   1,   2,   1,   1,   {1,   1,   1,   2},   {1,  32,   1,   8},   {1,   1,  16,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  16, 128,  16,   8,  32,   4,   1,   2,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  16, 128,  16,   8,  32,   4,   1,   2,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  16, 128,  16,   8,  32,   4,   1,   2,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  16, 128,  16,   8,  32,   4,   1,   2,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   8,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  32,  32,  32,  16,  16,  16,   1,   1,   1,   1,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32,  32,  32,  16,  16,  16,   1,   1,   1,   1,   {1,   4,   1,   1},   {1,   8,   1,  32},   {1,   4,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  32,  32,  16,  16,  16,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   4,   0,  32,  32,  16,  16,  16,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,   8,   1,  32},   {1,   2,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   1,  32,  32,  32,  16,  16,  16,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   0,  32,  32,  32,  16,  16,  16,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  32,   1,   8},   {1,   1,   4,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  32,  32,  16,  16,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  32,  32,  16,  16,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  32,  32,  16,  16,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  32,  32,  16,  16,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  16,   1,  16},   {1,   1,   2,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   8,   1,  32,  32,   8,  16,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   1},
        { "wrw", miopenHalf,   8,   1,  32,  32,   8,  16,  16,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,   8,   1,  32},   {1,   1,   1,   1},   {1,   8,   1,  32},   0},
        { "wrw", miopenHalf,   4,   0,  64,  16,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   2,   1,   4},   {1,  16,   1,  16},   {1,   2,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   0,  64,  16,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   2,   1,   4},   {1,  16,   1,  16},   {1,   2,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   1,  64,  16,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,   2,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  64,  16,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,   8},   {1,  32,   1,   8},   {1,   1,   2,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  64,  16,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  64,  16,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  64,  16,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  64,  16,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,   4},   {1,  16,   1,  16},   {1,   1,   1,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  16,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,  16,   1,  16},   {1,   2,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   0,  16,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,  16,   1,  16},   {1,   2,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   1,  16,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   1},
        { "wrw", miopenHalf,   4,   1,  16,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  32,   1,   8},   {1,   1,   8,   1},   {1,  32,   1,   8},   0},
        { "wrw", miopenHalf,   4,   1,  16,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,   4,   1,  16,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,  16,   1,  16,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   1},
        { "wrw", miopenHalf,  16,   1,  16,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,  16},   {1,   1,   4,   1},   {1,  16,   1,  16},   0},
        { "wrw", miopenHalf,   4,   0,  64,   4,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   2,   1,  16},   {1,  16,   1,   4},   {1,   2,   1,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenHalf,   4,   0,  64,   4,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   2,   1,  16},   {1,  16,   1,   4},   {1,   2,   1,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenHalf,   4,   1,  64,   4,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,  32},   {1,  32,   1,   2},   {1,   1,   2,   1},   {1,  32,   1,   2},   1},
        { "wrw", miopenHalf,   4,   1,  64,   4,  32,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,  32},   {1,  32,   1,   2},   {1,   1,   2,   1},   {1,  32,   1,   2},   0},
        { "wrw", miopenHalf,   4,   1,  64,   4,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,  16},   {1,  16,   1,   4},   {1,   1,   1,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenHalf,   4,   1,  64,   4,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,  16},   {1,  16,   1,   4},   {1,   1,   1,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenHalf,  16,   1,  64,   4,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,  16},   {1,  16,   1,   4},   {1,   1,   1,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenHalf,  16,   1,  64,   4,  16,  64,   4,   4,   1,   1,   1,   1,   {1,   1,   1,  16},   {1,  16,   1,   4},   {1,   1,   1,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenHalf,   4,   0,   4,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,  16,   1,   4},   {1,   2,  16,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenHalf,   4,   0,   4,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   2,   1,   1},   {1,  16,   1,   4},   {1,   2,  16,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenHalf,   4,   1,   4,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  32,   1,   2},   {1,   1,  32,   1},   {1,  32,   1,   2},   1},
        { "wrw", miopenHalf,   4,   1,   4,  64,  32,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   2},   {1,  32,   1,   2},   {1,   1,  32,   1},   {1,  32,   1,   2},   0},
        { "wrw", miopenHalf,   4,   1,   4,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenHalf,   4,   1,   4,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   0},
        { "wrw", miopenHalf,  16,   1,   4,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   1},
        { "wrw", miopenHalf,  16,   1,   4,  64,  16,   4,  64,   4,   1,   1,   1,   1,   {1,   1,   1,   1},   {1,  16,   1,   4},   {1,   1,  16,   1},   {1,  16,   1,   4},   0},
        // clang-format on
    };
    return kernel_param_list;
}

static inline int find_tunable(const std::vector<TunableImplicitGemmGTCDynamic_t> tunables,
                               const int gemm_m_per_block,
                               const int gemm_n_per_block,
                               const int gemm_k_per_block,
                               const int gemm_k_global_split,
                               const int nxb,
                               const int nxe,
                               const miopenDataType_t precision)
{
    int i;
    for(i = 0; i < tunables.size(); i++)
    {
        if((tunables[i].gemm_m_per_block == gemm_m_per_block) &&
           (tunables[i].gemm_n_per_block == gemm_n_per_block) &&
           (tunables[i].gemm_k_per_block == gemm_k_per_block) &&
           (tunables[i].gemm_k_global_split == gemm_k_global_split) && (tunables[i].nxb == nxb) &&
           (tunables[i].nxe == nxe) && (tunables[i].precision == precision))
        {
            break;
        }
    }
    return i;
}

static inline int if_gemm_k_global_split(const ProblemDescription& problem,
                                         const int gemm_m_per_block,
                                         const int gemm_n_per_block,
                                         const int gemm_k_per_block,
                                         const int b)
{
    int gemm_k_global_split = 0;
    const int n             = problem.GetBatchSize();
    const int k             = problem.GetInChannels();
    const int c             = problem.GetOutChannels();
    const int y             = problem.GetWeightsHeight();
    const int x             = problem.GetWeightsWidth();

    const auto& gemm_m = k;
    const auto gemm_n  = c * y * x;

    int max_grid_size = 1200;

    int grid_size;
    // assume that gemm m/n can be divided with no remainder by gemm m/n per block
    grid_size = (gemm_m / gemm_m_per_block) * (gemm_n / gemm_n_per_block);
    if((n % 2 == 0) && ((grid_size << 1) < max_grid_size) && ((n >> 1) * b % gemm_k_per_block == 0))
    {
        gemm_k_global_split = 1;
    }
    else
    {
        gemm_k_global_split = 0;
    }
    return gemm_k_global_split;
}

inline std::vector<OpKernelArg>
ComputeDynamicIGemmWrwKernelArgs(const ProblemDescription& problem,
                                 const int log2_gemm_k_global_splits,
                                 const int nxb,
                                 const int gemm_k_per_block)
{
    int hi         = problem.GetOutHeight();
    int wi         = problem.GetOutWidth();
    int n          = problem.GetInBatchSize();
    int k          = problem.GetInChannels();
    int c          = problem.GetOutChannels();
    int ho         = problem.GetInHeight();
    int wo         = problem.GetInWidth();
    int stride_h   = problem.GetOutHeight() > 1 ? problem.GetKernelStrideH() : 1;
    int stride_w   = problem.GetOutWidth() > 1 ? problem.GetKernelStrideW() : 1;
    int dilation_h = problem.GetWeightsHeight() > 1 ? problem.GetDilationH() : 1;
    int dilation_w = problem.GetWeightsWidth() > 1 ? problem.GetDilationW() : 1;
    int pad_h      = problem.GetPadH();
    int pad_w      = problem.GetPadW();
    int y          = problem.GetWeightsHeight();
    int x          = problem.GetWeightsWidth();
    int group      = problem.GetGroupCount();

    int dim_b = (ho * wo + nxb - 1) / nxb * nxb;

    // if ho*wo<nxb(equals to dim_b<=gemm_k_per_block), ho need to be padded.
    // int ho_padded = dim_b == nxb ? integer_divide_ceil(dim_b, wo) : ho;
    int ho_padded = dim_b <= gemm_k_per_block ? integer_divide_ceil(dim_b, wo) : ho;

    std::vector<OpKernelArg> opArgs;
    opArgs.emplace_back(0); // placeholder
    opArgs.emplace_back(0); // placeholder
    opArgs.emplace_back(0); // placeholder
    opArgs.emplace_back(hi);
    opArgs.emplace_back(wi);
    opArgs.emplace_back(n);
    opArgs.emplace_back(k);
    opArgs.emplace_back(c);
    opArgs.emplace_back(ho);
    opArgs.emplace_back(wo);
    opArgs.emplace_back(stride_h);
    opArgs.emplace_back(stride_w);
    opArgs.emplace_back(dilation_h);
    opArgs.emplace_back(dilation_w);
    opArgs.emplace_back(pad_h);
    opArgs.emplace_back(pad_w);
    opArgs.emplace_back(y);
    opArgs.emplace_back(x);
    opArgs.emplace_back(log2_gemm_k_global_splits);
    opArgs.emplace_back(group);
    opArgs.emplace_back(ho_padded);

    return opArgs;
}

// calculate log2_gemm_k_global_splits
static inline int ComputeLog2GemmKGlobalSplits(const int& grid_size,
                                               const int& max_grid_size,
                                               const int& n,
                                               const int& b,
                                               const int& gemm_k_per_block)
{
    int log2_gemm_k_global_splits = 0;
    for(int gs = 0; gs < 9; gs++)
    {
        if((grid_size << gs) > max_grid_size)
            break;

        if((n % (1 << gs)) != 0)
        {
            break;
        }

        if((n >> gs) * b % gemm_k_per_block != 0)
        {
            break;
        }
        log2_gemm_k_global_splits = gs;
    }
    return log2_gemm_k_global_splits;
}

// find wrw dynamic kernel by a simple algo
// check wether this kernel can be applicable
static inline std::tuple<bool, // is valid
                         int,  // tunable index
                         int,  // block_size
                         int,  // grid_size
                         int>  // gemm_k_split
FindImplicitGemmWrwGTCDynamicXdlopsKernel(const ProblemDescription& problem)
{
    const int n           = problem.GetBatchSize();
    const int k           = problem.GetInChannels();
    const int c           = problem.GetOutChannels();
    const int ho          = problem.GetInHeight();
    const int wo          = problem.GetInWidth();
    const int y           = problem.GetWeightsHeight();
    const int x           = problem.GetWeightsWidth();
    const auto stride_h   = problem.GetKernelStrideH();
    const auto stride_w   = problem.GetKernelStrideW();
    const auto dilation_h = problem.GetWeightsHeight() > 1 ? problem.GetDilationH() : 1;
    const auto dilation_w = problem.GetWeightsWidth() > 1 ? problem.GetDilationW() : 1;
    const auto pad_h      = problem.GetPadH();
    const auto pad_w      = problem.GetPadW();
    const auto precision  = problem.IsFp16() ? miopenHalf : miopenFloat;

    const auto gemm_n  = c * y * x;
    const auto& gemm_m = k;

    const std::vector<TunableImplicitGemmGTCDynamic_t>& tunables =
        GetImplicitGemmWrwGTCDynamicXdlopsKernelList();

    /* applicable table (except 128x128 case):
    gemm_m/gemmn        256 64  32  16  4
                --------------------------
                256 |   0  |1  |0  |0  |0
                64  |   1  |1  |0  |0  |1
                32  |   1  |1  |1  |1  |0
                16  |   0  |1  |0  |0  |0

    */
    int max_grid_size                 = 1200;
    int sel_index                     = -1;
    int sel_block_size                = 0;
    int sel_grid_size                 = 0;
    int sel_log2_gemm_k_global_splits = 0;

    int num_cu                = 120;
    std::vector<int> nxb_list = {16, 8, 4, 1};
    std::vector<int> nxe_list = {0, 1};

    // i=log2(gemm_m_per_block*gemm_n_per_block)  to find largest kernel
    // when pack=0, means no need to search with pack image size. when pack=1, we need pack
    for(int pack = 0; pack < 2; pack++)
    {
        // switch l and r to get differnet kernel size like 256*64 or 64*256
        for(int i = 15; i > 7; i--)
        {
            int r, l;
            r = (i + 1) >> 1;
            l = i - r;
            while(l > 1 && r < 9)
            {
                for(int swap = 0; swap < 2; swap++)
                {
                    const auto gemm_m_per_block = swap == 0 ? 1 << r : 1 << l;
                    const auto gemm_n_per_block = swap == 0 ? 1 << l : 1 << r;

                    if(gemm_m % gemm_m_per_block != 0)
                        continue;

                    // j = log2(gemm_k_per_block).
                    // In wrw kernels, gemm_k_per_block={32, 16, 8, 4}, so j={5,4,3,2}.
                    for(int j = 5; j > 1; j--)
                    {
                        const auto gemm_k_per_block = 1 << j;
                        for(const auto& nxe : nxe_list)
                        {
                            for(const auto& nxb : nxb_list)
                            {
                                if(pack == 0 && nxb != 1)
                                {
                                    continue;
                                }
                                const auto b =
                                    pack == 0
                                        ? ho * wo
                                        : (nxe == 0 ? ho * wo : ((ho * wo + nxb - 1) / nxb) * nxb);
                                const auto gemm_k = n * b;
                                if(c % (gemm_n_per_block / (nxe == 0 ? 1 : nxe)) != 0)
                                    continue;
                                if(gemm_k % gemm_k_per_block != 0)
                                    continue;
                                if(nxe == 0)
                                {
                                    if((x != 1) || (y != 1) || (dilation_h != 1) ||
                                       (dilation_w != 1) || (pad_h != 0) || (pad_w != 0))
                                        continue;
                                    if(stride_h != 1 || stride_w != 1)
                                    {
                                        if(nxb != 1)
                                            continue;
                                    }
                                    else
                                    {
                                        // nxe==0 case, need vector check(in nxe==0 case, nxb means
                                        // vector length)
                                        if(ho * wo % nxb != 0)
                                            continue;
                                    }
                                }

                                int gemm_k_global_split = if_gemm_k_global_split(problem,
                                                                                 gemm_m_per_block,
                                                                                 gemm_n_per_block,
                                                                                 gemm_k_per_block,
                                                                                 b);

                                int tunable_index = find_tunable(tunables,
                                                                 gemm_m_per_block,
                                                                 gemm_n_per_block,
                                                                 gemm_k_per_block,
                                                                 gemm_k_global_split,
                                                                 nxb,
                                                                 nxe,
                                                                 precision);
                                if(tunable_index < 0 || tunable_index >= tunables.size())
                                    continue;

                                int log2_gemm_k_global_splits = 0;
                                int grid_size = integer_divide_ceil(gemm_m, gemm_m_per_block) *
                                                integer_divide_ceil(gemm_n, gemm_n_per_block);
                                ;
                                int block_size            = tunables[tunable_index].GetBlockSize();
                                log2_gemm_k_global_splits = ComputeLog2GemmKGlobalSplits(
                                    grid_size, max_grid_size, n, b, gemm_k_per_block);
                                if(gemm_k_global_split == 0)
                                    log2_gemm_k_global_splits = 0;

                                // in nxe==1 cases, wo%tb[1] need to be 0; when tb[1] > 1, need
                                // (pad_h+pad_w)==0
                                if(nxe != 0)
                                {
                                    if(wo % tunables[tunable_index].tensor_b_thread_lengths[1] != 0)
                                    {
                                        continue;
                                    }
                                    if(tunables[tunable_index].tensor_b_thread_lengths[1] > 1 &&
                                       (pad_h != 0 || pad_w != 0))
                                    {
                                        continue;
                                    }
                                }

                                grid_size = grid_size << log2_gemm_k_global_splits;

                                if(block_size >= sel_block_size && grid_size > sel_grid_size)
                                {
                                    sel_block_size                = block_size;
                                    sel_grid_size                 = grid_size;
                                    sel_index                     = tunable_index;
                                    sel_log2_gemm_k_global_splits = log2_gemm_k_global_splits;
                                    break;
                                }
                            }
                        }
                        if(sel_grid_size > num_cu * 2)
                            break;
                    }
                    if(sel_grid_size > num_cu * 2)
                        break;
                }
                if(sel_grid_size > num_cu * 2)
                    break;
                r++;
                l--;
            }
            if(sel_grid_size > num_cu)
                break;
        }
    }
    bool is_valid = !(sel_index < 0 || sel_index >= tunables.size());

    // gemm_m and gemm_n padding cases
    if(!is_valid)
    {
        for(int cfg_index = 0; cfg_index < tunables.size(); cfg_index++)
        {
            const auto& cfg = tunables[cfg_index];
            // fp32 cases do not have padding function
            if(precision == miopenFloat)
                continue;
            if(cfg.precision != precision)
                continue;
            // nxe==0 case do not have padding function
            if(cfg.nxe == 0)
                continue;

            const auto& gemm_m_per_block = cfg.gemm_m_per_block;
            const auto& gemm_n_per_block = cfg.gemm_n_per_block;
            const auto& gemm_k_per_block = cfg.gemm_k_per_block;
            if(gemm_m_per_block == 0 || gemm_n_per_block == 0 || gemm_k_per_block == 0 ||
               cfg.tensor_b_thread_lengths[1] == 0)
                MIOPEN_THROW("invalid config parameter");

            const auto b      = (ho * wo + cfg.nxb - 1) / cfg.nxb * cfg.nxb;
            const auto gemm_k = n * b;
            if(gemm_k % gemm_k_per_block != 0)
            {
                continue;
            }
            // do not need to check gemm_n when n_c0 == 1; n_c0 = t_c0 * c_c0
            if(cfg.tensor_b_thread_lengths[2] * cfg.tensor_b_cluster_lengths[2] > 1)
            {

                if(c % gemm_n_per_block != 0 || gemm_m % gemm_m_per_block != 0)
                {
                    continue;
                }
            }
            else
            {
                if(cfg.tensor_a_thread_lengths[2] * cfg.tensor_a_thread_lengths[3] > 1)
                {
                    if(gemm_m % gemm_m_per_block != 0)
                        continue;
                }
            }

            if(wo % cfg.tensor_b_thread_lengths[1] != 0)
            {
                continue;
            }
            if(cfg.tensor_b_thread_lengths[1] != 1 && (pad_h != 0 || pad_w != 0))
            {
                continue;
            }

            int gemm_k_global_split = if_gemm_k_global_split(
                problem, gemm_m_per_block, gemm_n_per_block, gemm_k_per_block, b);

            // if conv cannot be split, gkgs kernels cannot be used
            if(gemm_k_global_split != cfg.gemm_k_global_split)
            {
                continue;
            }

            int log2_gemm_k_global_splits = 0;
            int grid_size                 = integer_divide_ceil(gemm_m, gemm_m_per_block) *
                            integer_divide_ceil(gemm_n, gemm_n_per_block);
            int block_size = cfg.GetBlockSize();
            log2_gemm_k_global_splits =
                ComputeLog2GemmKGlobalSplits(grid_size, max_grid_size, n, b, gemm_k_per_block);
            if(gemm_k_global_split == 0)
                log2_gemm_k_global_splits = 0;

            grid_size = grid_size << log2_gemm_k_global_splits;

            is_valid                      = true;
            sel_block_size                = block_size;
            sel_grid_size                 = grid_size;
            sel_index                     = cfg_index;
            sel_log2_gemm_k_global_splits = log2_gemm_k_global_splits;
            break;
        }
    }

    return std::make_tuple(
        is_valid, sel_index, sel_block_size, sel_grid_size, sel_log2_gemm_k_global_splits);
}

size_t
ConvAsmImplicitGemmGTCDynamicWrwXdlops::GetWorkspaceSize(const ExecutionContext&,
                                                         const ProblemDescription& problem) const
{
    if(problem.IsFp32())
    {
        return 0;
    }
    else
    {
        const int k        = problem.GetInChannels();
        const int c        = problem.GetOutChannels();
        const int y        = problem.GetWeightsHeight();
        const int x        = problem.GetWeightsWidth();
        const auto ngroups = problem.GetGroupCount();

        return static_cast<size_t>(ngroups) * (k / ngroups) * (c / ngroups) * y * x *
               miopen::GetTypeSize(miopenFloat);
    }
}

bool ConvAsmImplicitGemmGTCDynamicWrwXdlops::IsApplicable(const ExecutionContext& ctx,
                                                          const ProblemDescription& problem) const
{
    if(env::disabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS))
        return false;

    if(problem.GetConv().attribute.deterministic)
        return false;

    const auto device_name = ctx.GetStream().GetDeviceName();
    if(device_name != "gfx908")
        return false;

    if(!ctx.use_asm_kernels)
        return false;

    if(!problem.IsDirectionBackwardWrW())
        return false;

    if(!problem.Is2d())
        return false;

    if(problem.HasNonPackedTensors())
        return false;

    if(!problem.AllTensorsDimsFitIntoInt())
        return false;

    if(!problem.IsFp32() && !problem.IsFp16())
        return false;

    if(problem.IsTensorsCasted())
        return false;

    if(!ctx.rmv.IsV3())
        return false;

    if(problem.GetGroupCount() != 1)
        return false;

    if(!problem.IsLayoutDefault())
        return false;

    const auto& target = ctx.GetStream().GetTargetProperties();
    if(target.Xnack() && *target.Xnack())
        return false;
    bool is_valid;
    std::tie(is_valid, std::ignore, std::ignore, std::ignore, std::ignore) =
        FindImplicitGemmWrwGTCDynamicXdlopsKernel(problem);

    return is_valid;
}

ConvSolution
ConvAsmImplicitGemmGTCDynamicWrwXdlops::GetSolution(const ExecutionContext& ctx,
                                                    const ProblemDescription& problem) const
{
    ConvSolution result;

    KernelInfo kernel;
    std::ostringstream options;

    const std::vector<TunableImplicitGemmGTCDynamic_t>& kernel_configs =
        GetImplicitGemmWrwGTCDynamicXdlopsKernelList();

    bool is_valid;
    int kernel_index;
    int block_size;
    int grid_size;
    int log2_gemm_k_global_splits;
    std::string kernel_name;
    int nxb;
    int gemm_k_per_block;

    std::tie(is_valid, kernel_index, block_size, grid_size, log2_gemm_k_global_splits) =
        FindImplicitGemmWrwGTCDynamicXdlopsKernel(problem);

    if(!is_valid)
        MIOPEN_THROW("this kernel should not run with igemm dynamic!");

    kernel_name      = kernel_configs[kernel_index].GetKernelName();
    nxb              = kernel_configs[kernel_index].nxb;
    gemm_k_per_block = kernel_configs[kernel_index].gemm_k_per_block;

    // MIOPEN_LOG_I2(kernel_name << " with groups for reduction: "
    //                           << (1 << log2_gemm_k_global_splits));

    const auto required_workspace_size = GetWorkspaceSize(ctx, problem);
    result.workspace_sz                = required_workspace_size;

    std::ostringstream kernel_file_name;
    kernel_file_name << kernel_name << ".s";
    kernel.kernel_file = kernel_file_name.str();

    kernel.kernel_name = kernel_name;
    kernel.g_wk.clear();
    /* Note here, for API like hipHccModuleLaunchKernel(), hipExtModuleLaunchKernel()
     * grid dims is in unit of work item.
     * But for api like hipModuleLaunchKernel(), grid dim is in unit of block.
     */
    kernel.g_wk.push_back(static_cast<std::size_t>(grid_size) * block_size);
    kernel.g_wk.push_back(1);
    kernel.g_wk.push_back(1);
    kernel.l_wk.clear();
    kernel.l_wk.push_back(block_size);
    kernel.l_wk.push_back(1);
    kernel.l_wk.push_back(1);

    GenerateClangDefsym(options, "ROCM_METADATA_VERSION", ctx.rmv.UseV3() ? 5 : 4);

    kernel.comp_options = options.str();

    MIOPEN_LOG_I2(kernel.kernel_file << ":" << kernel.kernel_name);

    result.construction_params.push_back(kernel);

    const auto& lowp_quant = problem.GetConv().lowp_quant;

    auto opArgs =
        ComputeDynamicIGemmWrwKernelArgs(problem, log2_gemm_k_global_splits, nxb, gemm_k_per_block);

    if(problem.IsFp32())
    {
        result.invoker_factory = [=](const std::vector<Kernel>& kernels) mutable {
            return [=](const Handle& handle, const AnyInvokeParams& primitive_parameters) mutable {
                decltype(auto) wrw_invoke_params =
                    primitive_parameters.CastTo<miopen::conv::WrWInvokeParams>();
                const auto& tensors = wrw_invoke_params.tensors;
                const auto k        = handle.Run(kernels[0]);
                float elapsed       = 0;
                float zero          = 0.f;

                opArgs[0] = OpKernelArg(tensors.x);
                opArgs[1] = OpKernelArg(tensors.dw);
                opArgs[2] = OpKernelArg(tensors.dy);

                SetTensor(handle, tensors.dwDesc, tensors.dw, &zero);
                if(handle.IsProfilingEnabled())
                    elapsed += handle.GetKernelTime();

                k(opArgs);
                if(handle.IsProfilingEnabled())
                    elapsed += handle.GetKernelTime();

                if(handle.IsProfilingEnabled())
                {
                    handle.ResetKernelTime();
                    handle.AccumKernelTime(elapsed);
                }
            };
        };
    }
    else if(problem.IsFp16() && log2_gemm_k_global_splits > 0)
    {
        TensorDescriptor workspaceDesc(
            miopenFloat, problem.GetWeights().GetLengths(), problem.GetWeights().GetStrides());
        result.invoker_factory = [=](const std::vector<Kernel>& kernels) mutable {
            return [=](const Handle& handle, const AnyInvokeParams& primitive_parameters) mutable {
                decltype(auto) wrw_invoke_params =
                    primitive_parameters.CastTo<miopen::conv::WrWInvokeParams>();
                const auto& tensors       = wrw_invoke_params.tensors;
                const auto k              = handle.Run(kernels[0]);
                const auto& workSpace     = wrw_invoke_params.workSpace;
                const auto& workSpaceSize = wrw_invoke_params.workSpaceSize;
                float elapsed             = 0;
                float zero                = 0.f;

                if(workSpace == nullptr || workSpaceSize < required_workspace_size)
                {
                    MIOPEN_THROW("Not enough workspace has been provided for "
                                 "ConvAsmImplicitGemmGTCDynamicWrwXdlops with fp16 and atomic "
                                 "add.");
                }

                SetTensor(handle, workspaceDesc, workSpace, &zero);
                if(handle.IsProfilingEnabled())
                    elapsed += handle.GetKernelTime();

                opArgs[0] = OpKernelArg(tensors.x);
                opArgs[1] = OpKernelArg(workSpace);
                opArgs[2] = OpKernelArg(tensors.dy);

                k(opArgs);
                if(handle.IsProfilingEnabled())
                    elapsed += handle.GetKernelTime();

                CastTensor(handle,
                           &lowp_quant,
                           false,
                           workspaceDesc,
                           workSpace,
                           tensors.dwDesc,
                           tensors.dw,
                           0,
                           0);

                if(handle.IsProfilingEnabled())
                    elapsed += handle.GetKernelTime();

                if(handle.IsProfilingEnabled())
                {
                    handle.ResetKernelTime();
                    handle.AccumKernelTime(elapsed);
                }
            };
        };
    }
    else
    {
        result.invoker_factory = [=](const std::vector<Kernel>& kernels) mutable {
            return [=](const Handle& handle, const AnyInvokeParams& primitive_parameters) mutable {
                decltype(auto) wrw_invoke_params =
                    primitive_parameters.CastTo<miopen::conv::WrWInvokeParams>();
                const auto& tensors = wrw_invoke_params.tensors;
                const auto k        = handle.Run(kernels[0]);

                opArgs[0] = OpKernelArg(tensors.x);
                opArgs[1] = OpKernelArg(tensors.dw);
                opArgs[2] = OpKernelArg(tensors.dy);

                k(opArgs);
            };
        };
    }

    return result;
}

} // namespace conv
} // namespace solver
} // namespace miopen
