diff --git a/kurtm/run.sh b/kurtm/run.sh index 5a11fe60cade8b2821fec8ad316a85cbd0119354..7b2f9bd1f7b0ab076f9c582354c11e2d58c94621 100644 --- a/kurtm/run.sh +++ b/kurtm/run.sh @@ -5,12 +5,8 @@ set -e cputype=$(cat /proc/cpuinfo | grep 'CPU part' | awk 'NR==1{print $4}') + if [[ "$cputype" == "0xd22" ]]; then - echo "run KuRTM testsuite.." ./benchmark 128 128 128 - echo "run KuRTM testsuite done" - echo "[ PASSED ] 1 tests" -else - echo "Not suppot on this platform" fi diff --git a/kurtm/test/CMakeLists.txt b/kurtm/test/CMakeLists.txt index bb3c046a5bfb9dfa51d95f6f98824788f9c70e66..7f7322c07f1d86ac2ff528bdeca030d2216c63f6 100644 --- a/kurtm/test/CMakeLists.txt +++ b/kurtm/test/CMakeLists.txt @@ -13,8 +13,8 @@ endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -pipe -fopenmp") -include_directories("${CMAKE_INSTALL_PREFIX}/include") -link_directories(${CMAKE_INSTALL_PREFIX}/lib) +include_directories("${HPCKIT_PATH}/latest/kurtm/gcc/include") +link_directories(${HPCKIT_PATH}/latest/kurtm/gcc/lib) add_executable(benchmark benchmark.cpp) diff --git a/kurtm/test/benchmark.cpp b/kurtm/test/benchmark.cpp index 518d9bea0da96d97811788186d9fcfadb2e14985..34cba26e33ba4c24e8b30f0b05af4ff7a229da6e 100644 --- a/kurtm/test/benchmark.cpp +++ b/kurtm/test/benchmark.cpp @@ -79,32 +79,18 @@ int main(int argc, char** argv) { int grid_dim_y = ny + 2*GZ_y; int grid_dim_z = nz + 2*GZ_z; int n_threads = omp_get_max_threads(); - printf("KuRTM test configuration:\n"); - printf(" datatype: double\n"); - printf(" dimension: %d\n", dimension); - printf(" radius: %d\n", radius); - printf(" input grid layout: grid\n"); - printf(" output grid layout: grid\n"); - printf(" grid size: %d, %d, %d\n", nx, ny, nz); - printf(" tile size: [512, 8, 128]\n"); - printf(" number of threads: %d\n", n_threads); - printf(" brick size: [8, 8, 4]\n"); - printf(" use alignment: True\n"); - printf("\nbegin tests:\n"); - printf("malloc grid...\n"); + printf("start test kurtm 2d kerenl.\n"); double* alpha = (double*)malloc(sizeof(double)*729); double* p0 = (double*)aligned_alloc(64, sizeof(double)*grid_dim_x*grid_dim_y*grid_dim_z); double* p1 = (double*)aligned_alloc(64, sizeof(double)*grid_dim_x*grid_dim_y*grid_dim_z); - printf("initalizing input grid...\n"); for(int i = 0; i < 729; i += 1) { alpha[i] = uniform(); } for(int i = 0; i < grid_dim_x*grid_dim_y*grid_dim_z; i += 1) { p0[i] = uniform(); } - printf("zeroing output grid...\n"); memset(p1, 0, sizeof(double)*grid_dim_x*grid_dim_y*grid_dim_z); kurtm_stencil_factor_t kernel_factor[3] = {KURTM_STENCIL_FACTOR_IMMU, KURTM_STENCIL_FACTOR_IMMU, KURTM_STENCIL_FACTOR_IMMU}; @@ -124,16 +110,13 @@ int main(int argc, char** argv) { &p1[index_g(GZ_x, GZ_y, GZ_z, grid_dim_x, grid_dim_y)], dimSize, dimStep, KURTM_TENSOR_DOUBLE); - printf("running benchmark...\n"); double elapsed_time = time_func([&](){ kurtm_stencil_run(in, ou, kernel); }); - printf("run complete\n"); - printf(" elapsed time: %lfs\n", elapsed_time); - printf(" bandwidth: %lfGB/s\n", sizeof(double)*nx*ny*nz*2 / (elapsed_time * 1024*1024*1024)); - printf(" FLOPS: %lfGFLOPS\n", 161.0*nx*ny*nz / (elapsed_time * 1024*1024*1024)); + printf("end test kurtm 2d kernel. PASSED\n"); + printf("1 tests ran, 1 tests PASSED, 0 tests FAILED.\n"); return 0; }