mirror of
https://github.com/uxlfoundation/oneDNN.git
synced 2025-10-20 18:43:49 +08:00
xe: ukernel: Add new ukernel entries for MoE support
This commit is contained in:
@ -94,8 +94,13 @@ Package selectGEMMMicrokernel(GEMMProtocol protocol, HWInformation hwInfo, SizeP
|
||||
|
||||
bool isIntegrated = getPlatformType(product.family) == PlatformType::Integrated;
|
||||
|
||||
/* Strip internal upconversions */
|
||||
auto problemMatch = problem;
|
||||
if (problemMatch.Ta_ext.bits() < problemMatch.Ta.bits()) problemMatch.Ta = problemMatch.Ta_ext;
|
||||
if (problemMatch.Tb_ext.bits() < problemMatch.Tb.bits()) problemMatch.Tb = problemMatch.Tb_ext;
|
||||
|
||||
/* Create catalog matcher */
|
||||
MatchParams matchParams(hw, hwInfo.systolicAvailable, isIntegrated, problem);
|
||||
MatchParams matchParams(hw, hwInfo.systolicAvailable, isIntegrated, problemMatch);
|
||||
|
||||
matchParams.sizes = sizes;
|
||||
matchParams.stepping = stepping;
|
||||
|
@ -104,6 +104,12 @@ auto _CATALOG_ = kcatalog::toArray({
|
||||
{{'E', "ugemm", {"D", "D", "D"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB4x2 aB4x2 rB", {8, (LoopType) 0, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 768}, {8192, 8192, 768}, {8, 8, 4}, {2, 8, 1}, 1, (WGType) 0, 1, 0, 0, {8, 8, 8}, {true, true, false}}, {'W', 1, {64}}},
|
||||
{{'E', "ugemm", {"D", "D", "D"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS4x2 aS4x2 rB", {8, (LoopType) 0, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 256}, {8192, 8192, 256}, {8, 8, 4}, {2, 8, 1}, 1, (WGType) 0, 1, 0, 0, {8, 8, 8}, {true, true, false}}, {'W', 1, {64}}},
|
||||
{{'E', "ugemm", {"D", "D", "D"}, {"T", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS4x2 aB4x2 rB", {8, (LoopType) 0, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 512}, {8192, 8192, 512}, {8, 8, 4}, {2, 8, 1}, 1, (WGType) 0, 1, 0, 0, {8, 8, 8}, {true, true, false}}, {'W', 1, {64}}},
|
||||
{{'E', "ugemm", {"F", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "Ixy"}, "aB16 aB16 rB wg 8x8 cab4 ks16 af dw vav sn grf256 sys l4", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {8192, 8192, 16777216}, {32, 32, 16}, {8, 8, 1}, 1, (WGType) 1, 257, 65536, 0, {2, 2, 4}, {false, false, true}}, {'E', 17, {568500, 1.28115e+06, 0, 0, 0, 0, 5.43293, 5.22848, 6.20609, 17.6026, 0.0197921, 0.0197921, 0, 1, 1.32313, 1.19039, 7.17197e-13}}},
|
||||
{{'E', "ugemm", {"F", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "ixy"}, "aB2x2 aB2x2 rB wg 4x8 kc2 cab4 ks8 nse sn l4 grf256", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 8}, {4, 8, 1}, 1, (WGType) 1, 257, 32768, 0, {1, 2, 4}, {false, false, true}}, {'W', 1, {512}}},
|
||||
{{'E', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aB32 aB64 rB wg 8x4 cab4 ks64 af dw vav sm sn sys l4", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {131072, 131072, 16777216}, {8192, 8192, 16777216}, {8, 8, 64}, {8, 4, 1}, 1, (WGType) 1, 257, 49152, 0, {2, 2, 4}, {false, false, true}}, {'E', 17, {1.06234e+06, 445401, 0, 0, 0, 0, 4.62244, 5.31323, 3.78655, 11.4316, 0.08593, 0.0721937, 0.0244972, 1, 1.21089, 1.20155, -5.01322e-15}}},
|
||||
{{'E', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aB32 aB32 rB wg 4x4 cab3 ks64 af dw vav sm sn grf256 sys l4", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 65536, 16777216}, {8192, 8192, 16777216}, {16, 4, 64}, {4, 4, 1}, 1, (WGType) 1, 257, 30720, 0, {2, 2, 4}, {false, false, true}}, {'E', 17, {1.24598e+06, 199092, 0, 0, 0, 0, 5.11406, 6.54063, 3.6868, 10.138, 0.114685, 0.0491769, 0.0713602, 0.902881, 1.20724, 1.20086, 6.44189e-15}}},
|
||||
{{'E', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "Ixyz"}, "aS32x2 aB16 rB wg 16x2 cb4 ks32 xaf dw vav sn dm grf256 sys", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {false, false, false}}, {'E', 17, {982213, 473301, 0, 0, 0, 0, 1.74644, 5.1767, 6.10829, 17.1708, 0.0167439, 0.0136956, 0.00599404, 0.999577, 1.37511, 1.22059, 7.53689e-13}}},
|
||||
{{'E', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 33, -1}, {-1, -1, -1}, {4, 4, 1}, "xyIs"}, "aB16 aB32 rB wg 4x8 cab3x2 ks32 xaf st dw vav dm grf256 sys", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 393216, 16777216}, {524288, 393216, 16777216}, {32, 24, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 61440, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {1.01302e+06, 570829, 0, 0, 0, 0, 3.67307, 6.66635, 6.86396, 18.2302, 0.0202076, 0.0155595, 0.00597746, 1, 1.56109, 1.12816, 4.46535e-12}}},
|
||||
{{'E', "ugemm", {"H", "H", "H"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "@ipq"}, "aB8/4 aB8 rB wg 8x4 kc8 cab4 ks8 nse di sn l4", {8, (LoopType) 0, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 2048}, {8192, 8192, 2048}, {32, 32, 8}, {8, 4, 1}, 1, (WGType) 1, 1, 24576, 0, {2, 2, 2}, {true, true, false}}, {'W', 1, {1024}}},
|
||||
{{'E', "ugemm", {"H", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aB16 aB32 rB wg 8x8 cab4x2 ks32 xaf dw vav di sn grf256 sys l4 dm", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 2048}, {8192, 8192, 2048}, {16, 8, 32}, {8, 8, 1}, 1, (WGType) 1, 257, 49152, 0, {2, 2, 4}, {true, true, false}}, {'E', 17, {577632, 1.31042e+06, 0, 0, 0, 0, 6.15959, 6.87283, 4.53581, 13.1549, 0.0527369, 0.0527369, 0, 1, 1.2126, 1.20117, 9.25713e-15}}},
|
||||
{{'E', "ugemm", {"H", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aB16 aB32 rB wg 2x16 cab4 ks64 af dw vav di sn grf256 sys dm", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 2048}, {8192, 8192, 2048}, {16, 4, 64}, {2, 16, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {true, true, false}}, {'E', 17, {1.14032e+06, 564642, 0, 0, 0, 0, 6.56444, 4.63397, 2.77135, 9.77858, 0.0825907, 0.035527, 0.0779663, 0.969555, 1.21564, 1.20197, 9.03249e-16}}},
|
||||
@ -337,6 +343,14 @@ auto _CATALOG_ = kcatalog::toArray({
|
||||
{{'F', "ugemm", {"D", "D", "D"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS8x2+S16@24 aS16+S16@32 rB wg 4x2 kc8 nse di sm sn grf256 sb32", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 256}, {8192, 8192, 256}, {8, 8, 16}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 8}, {true, true, false}}, {'E', 17, {1.2692e+06, -138155, -69946.2, 247166, 2.48218e+06, 0, 2.22535, 2.9076, -0.226323, 2.36118, 0.179437, 0.0955498, 0.0965902, 0.986833, 1.48781, 1.14074, 6.5259e-12}}},
|
||||
{{'F', "ugemm", {"D", "D", "D"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS16+S8@32 aS8+S1,8@32 rB wg 16x1 kc8 nse di nmk sm sn grf256 sb32", {16, (LoopType) 255, 256, {(LoopType) 1, (LoopType) 0, (LoopType) 255}, {8192, 8192, 256}, {8192, 8192, 256}, {8, 16, 16}, {16, 1, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 8}, {true, true, false}}, {'E', 17, {1.08982e+06, -365235, -57570.3, 563017, 2.44122e+06, 0, 1.97404, 6.45479, 0.729323, 2.0721, 0.109576, 0.0188555, 0.121551, 0.995776, 1.6362, 1.09808, 8.84348e-12}}},
|
||||
{{'F', "ugemm", {"D", "D", "D"}, {"T", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS8+S1,8@16 aB8+B8@16 rS nse di wg 8x4 kc8 sm sb256 grf256", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 512}, {8192, 8192, 512}, {32, 32, 8}, {8, 4, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 8}, {true, true, false}}, {'E', 17, {863507, 843091, 0, 0, 0, 0, 2.99578, 2.42156, 4.42824, 4.74906, 0.0650645, 0.0650645, 0, 0.993213, 1.59693, 1.16972, 1.1178e-11}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16x2 aB16x2 rB wg 4x8 cab4 ks64 af vav sn nb 4x8 dm grf256 sys l4", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 98304, 0, {2, 2, 4}, {true, true, true}}, {'E', 17, {1.08073e+06, 763490, 0, 0, 0, 0, 0.80246, 1.46921, 0.963229, 2.26732, 0.0122976, 0.0122976, 0, 0.986616, 1.49449, 0.966336, 3.40072e-12}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16x2 aB16x2 rB wg 2x16 cab3x2 ks32 af vav sn nb 2x16 dm grf256 sys l4", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 65536, 16777216}, {524288, 65536, 16777216}, {32, 4, 32}, {2, 16, 1}, 1, (WGType) 1, 257, 24576, 0, {1, 2, 4}, {true, true, true}}, {'E', 17, {1.03544e+06, 807346, 0, 0, 0, 0, 1.28444, 1.03004, 1.50088, 3.00586, 0.0180079, 0.0180079, 0, 0.984218, 1.40315, 0.791237, 5.90572e-12}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "BIp"}, "aB16+m32@48 am32+S32@64 rB wg 4x8 xaf st vav ca4x2 ks16 sn grf256 sys rr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 524288, 16777216}, {1048576, 524288, 32}, {32, 32, 32}, {4, 8, 1}, 1, (WGType) 1, 441, 32768, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {802877, 573250, 0, 0, 1.50997e+07, 5.83296e+06, 1.33358, 0.860821, 1.18713, 2.11721, 0.00513884, 0.00513884, 0, 0.996337, 1.41895, 1.02888, 2.72693e-12}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "BIp"}, "aB16+m32@48 am32+S32@64 rB wg 4x8 xaf st vav ca4x2 ks16 sn grf256 sys rr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 524288, 16777216}, {1048576, 524288, 32}, {64, 32, 32}, {4, 8, 1}, 1, (WGType) 1, 441, 32768, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {953955, 796880, 0, 0, 6.54705e+06, 1.02728e+07, 0.868475, 0.996885, 0.927331, 1.55842, 0.00423299, 0.00423299, 0, 0.997407, 1.58612, 1.11537, 2.43106e-12}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, -1, -1}, {4, 4, 1}, "ABI"}, "aB16x2+m16@32 at16x2+m16@32 rB wg 1x16 xaf st vav sb64 grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 255}, {262144, 262144, 16777216}, {262144, 262144, 32}, {16, 16, 16}, {1, 16, 1}, 1, (WGType) 1, 441, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.03366e+06, 206195, 0, 0, 2.5985e+06, 2.90488e+06, 5.86262, 0.614358, 0.589806, 1.16158, 0.0300187, 0.00214889, 0.0294786, 0.604289, 1.43473, 0.822773, 7.94852e-12}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIp"}, "at32+m32@64 am32/16+m16@64 rB wg 8x4 xaf st vav sb64 sm sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 32}, {32, 64, 32}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {867949, 633849, 0, 0, 5.89824e+06, 1.00844e+07, 0.930403, 0.918787, 0.795424, 1.23078, 0.00404692, 0.00404692, 0, 0.990134, 1.69975, 1.14597, 2.34849e-12}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "IABs"}, "at32+m32@64 am32+m32@64 rB wg 8x4 xaf st vav sb64 sm sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 917504, 16777216}, {524288, 917504, 32}, {32, 56, 32}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {871124, 600159, 0, 0, 5.72375e+06, 8.32307e+06, 0.555762, 0.650262, 0.789585, 1.21694, 0.00411938, 0.00411938, 0, 1, 1.69975, 1.10825, 2.61032e-12}}},
|
||||
{{'F', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "IABs"}, "at64x2+m64@64 am32+m32@64 rB wg 8x4 xaf fx vav sb64 sm sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 786432, 16777216}, {524288, 786432, 64}, {32, 48, 64}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {866537, 568292, 0, 0, 4.29425e+06, 7.31546e+06, 0.475793, 0.638791, 0.753591, 1.18019, 0.0041899, 0.0041899, 0, 1, 1.78867, 1.11209, 1.76449e-12}}},
|
||||
{{'F', "ugemm", {"H", "H", "S"}, {"A2#16,16", "B16", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aB16+B16@48 aB16+B16@48 rB vav di sys grf256 af wg 4x8 sb256", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 2048}, {8192, 8192, 2048}, {64, 32, 16}, {4, 8, 1}, 1, (WGType) 1, 256, 0, 0, {128, 128, 4}, {true, true, false}}, {'W', 1, {2048}}},
|
||||
{{'F', "ugemm", {"H", "H", "S"}, {"A2", "B16", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aB16x2+B16@80 aB16x2+B16@80 rB vav sb256 wg 4x8 di sys", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 2048}, {8192, 8192, 2048}, {16, 16, 16}, {4, 8, 1}, 1, (WGType) 1, 256, 0, 0, {64, 128, 4}, {true, true, false}}, {'W', 1, {256}}},
|
||||
{{'F', "ugemm", {"H", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m64@64 am16 rB wg 8x4 cb4x2 ks32 xaf vav di sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 2048}, {8192, 8192, 2048}, {32, 32, 32}, {8, 4, 1}, 1, (WGType) 1, 257, 32768, 0, {8, 8, 4}, {true, true, false}}, {'E', 17, {1.00706e+06, 522382, 0, 0, 0, 0, 0.725659, 1.44632, 0.970408, 1.74134, 0.0067111, 0.0067111, 0, 0.90349, 1.42986, 1.13348, 2.91269e-12}}},
|
||||
@ -613,5 +627,9 @@ auto _CATALOG_ = kcatalog::toArray({
|
||||
{{'F', "ugemm", {"T", "T", "S"}, {"T", "N", "N"}}, {5, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#AI"}, "at8+m16@8 aB16 rB wg 8x1 cb4x2 ks16 xaf st vav di sm sn dm grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 1024}, {8192, 8192, 1024}, {16, 64, 16}, {8, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, false}}, {'E', 17, {1.69161e+06, -246817, -147908, 433263, 2.74432e+06, 3.16211e+06, 0.956798, 1.40211, 0.805897, 1.31444, 0.0181823, 0.0148306, 0.00804353, 1, 1.44649, 0.968708, 7.35847e-12}}},
|
||||
{{'F', "ugemm", {"T", "T", "S"}, {"T", "N", "N"}}, {5, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#AI"}, "at16+m16@16 aB8x2 rB wg 4x2 cb4x2 ks16 xaf st vav di sm sn dm grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 1024}, {8192, 8192, 1024}, {16, 32, 16}, {4, 2, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, false}}, {'E', 17, {1.8509e+06, -175907, -189196, 392631, 2.74432e+06, 2.18726e+06, 1.12474, 1.04753, 0.43698, 1.42448, 0.0254728, 0.0206988, 0.0107976, 0.953996, 1.38293, 0.912126, 8.89018e-12}}},
|
||||
{{'F', "ugemm", {"T", "T", "S"}, {"T", "N", "N"}}, {5, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#AI"}, "at8x2+m16@24 aB16 rB wg 4x2 cb4x2 ks32 af vav di sm sn dm grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 1024}, {8192, 8192, 1024}, {16, 16, 32}, {4, 2, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, false}}, {'E', 17, {1.64426e+06, -145120, -138637, 309031, 2.31014e+06, 2.00704e+06, 0.918257, 1.44548, 0.262092, 1.02772, 0.0377766, 0.0386483, 0.015333, 0.989041, 1.28006, -1.30164, 1.0788e-10}}},
|
||||
{{'F', "ugemm", {"T", "T", "S"}, {"T", "N", "N"}}, {5, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#AI"}, "at16+m16@24 aB16x2 rB wg 2x2 cb4x2 ks16 xaf vav di sm sn dm grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 1024}, {8192, 8192, 1024}, {16, 16, 16}, {2, 2, 1}, 1, (WGType) 1, 257, 8192, 0, {4, 4, 4}, {true, true, false}}, {'E', 17, {1.65746e+06, -77290.3, -69015.6, 169032, 2.75251e+06, 1.85958e+06, 0.958255, 0.997806, 0.100098, 0.957911, 0.0468295, 0.0427724, 0.0158824, 1, 1.31247, 0.973533, 8.29974e-12}}}
|
||||
{{'F', "ugemm", {"T", "T", "S"}, {"T", "N", "N"}}, {5, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#AI"}, "at16+m16@24 aB16x2 rB wg 2x2 cb4x2 ks16 xaf vav di sm sn dm grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {8192, 8192, 1024}, {8192, 8192, 1024}, {16, 16, 16}, {2, 2, 1}, 1, (WGType) 1, 257, 8192, 0, {4, 4, 4}, {true, true, false}}, {'E', 17, {1.65746e+06, -77290.3, -69015.6, 169032, 2.75251e+06, 1.85958e+06, 0.958255, 0.997806, 0.100098, 0.957911, 0.0468295, 0.0427724, 0.0158824, 1, 1.31247, 0.973533, 8.29974e-12}}},
|
||||
{{'G', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 17, -1}, {-1, 24, -1}, {-1, 17, -1}, {-1, 24, -1}, {16, 16, 1}, "ABI"}, "at64x2 am32x2+m64@32 rB wg 4x2 af rr vav sb64 grf256 sys np np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'W', 1, {1024}}},
|
||||
{{'G', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1024, 128, 1024}, {4096, 512, 4096}, {1024, 128, 1024}, {4096, 512, 4096}, {16, 16, 1}, "ABIhr"}, "at32+m64@40 am16+m64@40 rB wg 4x4 xaf rr vav sb64 sm sn sys np", {16, (LoopType) 255, 128, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 32}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {548120, 53799.9, 0, 0, 0, 0, 0.610964, 1.95, 1.59225, 4.49998, 0.00862291, 0.043525, 0, 1, 1.44207, 0.952908, 1.47695e-11}}},
|
||||
{{'G', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 129, 1}, {-1, 768, 3583}, {1025, 129, 1}, {-1, 768, 3583}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 rB wg 4x2 xaf st vav sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
|
||||
{{'G', "ugemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {3072, 448, 1}, {-1, -1, 8192}, {3072, 448, 1}, {-1, -1, 8192}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 rB wg 4x2 xaf st vav sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
|
||||
});
|
||||
|
Reference in New Issue
Block a user