diff --git a/kutacc/build.sh b/kutacc/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..8bb7efdc8cb058171176dbce9baf6d854741cadf --- /dev/null +++ b/kutacc/build.sh @@ -0,0 +1,84 @@ +set -e + +export KUTACC_PROJ_PATH=$(cd "$(dirname ${0})"; pwd -P) +export KUTACC_CLEANUP="on" +export KUTACC_COMPILER="gcc" + +function build_test() +{ + echo "" + echo "build test" + local source_path=$KUTACC_PROJ_PATH/test + local build_path=$KUTACC_PROJ_PATH/build + mkdir -p ${build_path} && cd ${build_path} + cmake -S ${source_path} -B ${build_path} \ + -DCMAKE_C_COMPILER=${KUTACC_C_COMPILER} \ + -DCMAKE_CXX_COMPILER=${KUTACC_CXX_COMPILER} \ + -DCMAKE_INSTALL_PREFIX=${KUTACC_PROJ_PATH} + cmake --build ${build_path} -j --target install +} + +function cleanup() +{ + echo "" + echo "do cleanup" + local build_path=$KUTACC_PROJ_PATH/build + if [ -d "${build_path}" ];then + rm -rf ${build_path} + echo "cleanup ${build_path}" + fi +} + +function set_compiler() +{ + case "$1" in + bisheng) + export CC=$(which clang) + export CXX=$(which clang++) + KUTACC_C_COMPILER=$(which clang) + KUTACC_CXX_COMPILER=$(which clang++) + ;; + *) + echo "Unsupported compiler $1." + exit 1 + ;; + esac + KUTACC_COMPILER="$1" +} + +function parse_args() +{ + for i in "$@"; do + case "$i" in + bisheng) + KUTACC_COMPILER="$i" + ;; + gcc) + KUTACC_COMPILER="$i" + ;; + *) + echo "Unknown option: $i" + echo "" + exit 1 + ;; + esac + done +} + +function main() +{ + echo "PROJ_PATH: " $KUTACC_PROJ_PATH + if [[ "${KUTACC_CLEANUP,,}" == "on" ]]; then + cleanup + fi + set_compiler $KUTACC_COMPILER + build_test +} + +cputype=$(cat /proc/cpuinfo | grep 'CPU part' | awk 'NR==1{print $4}') +if [[ "$cputype" == "0xd22" ]]; then + parse_args $@ + if [[ "${KUTACC_COMPILER}" == "bisheng" ]]; then + main + fi +fi diff --git a/kutacc/readme.md b/kutacc/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..1771a84641849d0112e78aee505f421a9899d4a8 --- /dev/null +++ b/kutacc/readme.md @@ -0,0 +1,27 @@ +# kutacc-test + +#### 介绍 +kutacc_test 提供了一组使用 kutacc 库的用例 + +#### 软件架构 +test目录下包含1个用例 + +test_bgemm_ex: kutacc_core_bgemm_ex功能 + +#### 编译方法 + +执行以下命令 + +sh build.sh bisheng + +#### 运行方法 + +执行以下命令 + +sh run.sh + +#### 运行结果 + +运行输出结果: + +[ PASSED ] 1 tests. \ No newline at end of file diff --git a/kutacc/run.sh b/kutacc/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..b515c8ccda9a0a282c68b4abe8b11b6febc405e4 --- /dev/null +++ b/kutacc/run.sh @@ -0,0 +1,6 @@ +cputype=$(cat /proc/cpuinfo | grep 'CPU part' | awk 'NR==1{print $4}') +if [[ "$cputype" == "0xd22" ]]; then + if [ -e ./bin/test_kutacc ]; then + ./bin/test_kutacc + fi +fi diff --git a/kutacc/test/CMakeLists.txt b/kutacc/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe16486ac550c56c0ef5bae7abf661d33c3ebb89 --- /dev/null +++ b/kutacc/test/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.14) + +project(kutacc_test VERSION 1.0.0) + +set(KUTACC_INSTALL_BIN_PATH ${CMAKE_INSTALL_PREFIX}/bin) + +set(LIBRARY_PATH_ENV "$ENV{LIBRARY_PATH}") +string(REPLACE ":" ";" LIBRARY_PATH_LIST "${LIBRARY_PATH_ENV}") + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + set(COMPILER_FILTER "gcc") +elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(COMPILER_FILTER "clang") +else() + message(FATAL_ERROR "Unsupported compiler: ${CMAKE_CXX_COMPILER_ID}") +endif() + +set(FILTERED_PATHS "") +foreach(PATH_ITEM IN LISTS LIBRARY_PATH_LIST) + if(NOT "${PATH_ITEM}" STREQUAL "") + if("${PATH_ITEM}" MATCHES "${COMPILER_FILTER}") + list(APPEND FILTERED_PATHS "${PATH_ITEM}") + endif() + endif() +endforeach() + +find_package(GTest + HINTS ${FILTERED_PATHS} +) + +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + set(COV_LIB "gcov") +endif() + +add_compile_options(-O0 -g -gdwarf-4) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -march=armv8.6-a+sme-f64f64+fp16+bf16 -msve-vector-bits=512") +set(CMAKE_EXE_LINKER_FLAGS "-fprofile-arcs -ftest-coverage") + +file(GLOB_RECURSE TEST_KUTACC_FILES *.cpp) + +add_executable(test_kutacc ${TEST_KUTACC_FILES}) +target_link_libraries(test_kutacc + -Wl,--start-group + kutacc_core ${COV_LIB} + -Wl,--end-group +) +install(TARGETS test_kutacc DESTINATION ${KUTACC_INSTALL_BIN_PATH}) \ No newline at end of file diff --git a/kutacc/test/test_bgemm_ex.cpp b/kutacc/test/test_bgemm_ex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7f7bc67b8a32bbb6c9086851ff3e48eba521b332 --- /dev/null +++ b/kutacc/test/test_bgemm_ex.cpp @@ -0,0 +1,126 @@ +#undef NEEDBUNDERSCORE + +#include +#include +#include +#include +#include +#include "kutacc_core.h" + +#define MATRIX_ROW 31 +#define MATRIX_COL 15 +#define BF16_EPS 5.0 + +static void cal_leading_dimension(char transA, char transB, BLASINT *lda, BLASINT *ldb) +{ + BLASINT m = MATRIX_ROW; + BLASINT n = MATRIX_ROW; + BLASINT k = MATRIX_COL; + + if (transA == 'N') { + *lda = m; + } else { + *lda = k; + } + if (transB == 'N') { + *ldb = k; + } else { + *ldb = n; + } +} + +static void init_expect_c_matrix(char transA, char transB, BLASINT m, BLASINT n, BLASINT k, __bf16 alpha, __bf16 beta, + __bf16 bgemm_a[], __bf16 bgemm_b[], __bf16 expect_c[]) +{ + for (int j = 0; j < n; j++) { + for (int i = 0; i < m; i++) { + float cij = 0; + for (int t = 0; t < k; t++) { + float aik = (transA == 'N') ? vcvtah_f32_bf16(bgemm_a[i + t * MATRIX_ROW]) : + vcvtah_f32_bf16(bgemm_a[t + i * MATRIX_COL]); + float bkj = (transB == 'N') ? vcvtah_f32_bf16(bgemm_b[t + j * MATRIX_COL]) : + vcvtah_f32_bf16(bgemm_b[j + t * MATRIX_ROW]); + cij += aik * bkj; + } + expect_c[i + j * MATRIX_ROW] = vcvth_bf16_f32( + vcvtah_f32_bf16(alpha) * cij + vcvtah_f32_bf16(beta) * vcvtah_f32_bf16(expect_c[i + j * MATRIX_ROW])); + } + } +} + +static void test_bgemm_ex(char transA, char transB, uint32_t prepack_mask) +{ + __bf16 bgemm_a[MATRIX_ROW * MATRIX_COL]; + __bf16 bgemm_b[MATRIX_COL * MATRIX_ROW]; + __bf16 bgemm_c[MATRIX_ROW * MATRIX_ROW]; + __bf16 expect_c[MATRIX_ROW * MATRIX_ROW]; + __bf16 *sa = NULL; + __bf16 *sb = NULL; + + for (int i = 0; i < MATRIX_ROW * MATRIX_COL; i++) { + bgemm_a[i] = vcvth_bf16_f32(1 + 0.01 * i); + bgemm_b[i] = vcvth_bf16_f32(1 + 0.01 * i); + } + for (int i = 0; i < MATRIX_ROW * MATRIX_ROW; i++) { + bgemm_c[i] = vcvth_bf16_f32(1 + 0.01 * i); + expect_c[i] = vcvth_bf16_f32(1 + 0.01 * i); + } + + BLASINT m = MATRIX_ROW; + BLASINT n = MATRIX_ROW; + BLASINT k = MATRIX_COL; + BLASINT lda = m; + BLASINT ldb = n; + BLASINT ldc = m; + __bf16 alpha = vcvth_bf16_f32(1.0); + __bf16 beta = vcvth_bf16_f32(3.0); + + init_expect_c_matrix(transA, transB, m, n, k, alpha, beta, bgemm_a, bgemm_b, expect_c); + cal_leading_dimension(transA, transB, &lda, &ldb); + + BlasExtendParam extend_param = {.type = BLAS_EXTEND_TYPE_PREPACK, .extra = prepack_mask, .next = NULL}; + + if (prepack_mask & BLAS_EXTEND_PREPACK_A_MASK) { + size_t size_a = kutacc_core_bgemm_pack_get_size('A', m, n, k); + if (size_a != MATRIX_ROW * MATRIX_COL) { + printf("[FAILED] 1 tests\n"); + } + sa = (__bf16 *)malloc(size_a * sizeof(__bf16)); + + kutacc_core_bgemm_pack('A', transA, transB, m, n, k, lda, ldb, bgemm_a, sa); + kutacc_core_bgemm_ex(transA, transB, m, n, k, alpha, sa, lda, bgemm_b, ldb, beta, bgemm_c, ldc, &extend_param); + } else if (prepack_mask & BLAS_EXTEND_PREPACK_B_MASK) { + size_t size_b = kutacc_core_bgemm_pack_get_size('B', m, n, k); + if (size_b != MATRIX_ROW * MATRIX_COL) { + printf("[FAILED] 1 tests\n"); + } + sb = (__bf16 *)malloc(size_b * sizeof(__bf16)); + + kutacc_core_bgemm_pack('B', transA, transB, m, n, k, lda, ldb, bgemm_b, sb); + kutacc_core_bgemm_ex(transA, transB, m, n, k, alpha, bgemm_a, lda, sb, ldb, beta, bgemm_c, ldc, &extend_param); + } else { + printf("input param prepack_mask error!\n"); + return; + } + + // compare bgemm_ex result + for (int i = 0; i < MATRIX_ROW * MATRIX_ROW; i++) { + if (fabs(vcvtah_f32_bf16(bgemm_c[i]) - vcvtah_f32_bf16(expect_c[i])) > BF16_EPS) { + printf("[FAILED] 1 tests\n"); + } + } + + if (sa) { + free(sa); + } + if (sb) { + free(sb); + } + printf("[PASSED] 1 tests\n"); +} + +int main() +{ + test_bgemm_ex('N', 'N', BLAS_EXTEND_PREPACK_A_MASK); + return 0; +}