Skip to content
Snippets Groups Projects
main.cpp 1.97 KiB
Newer Older
  • Learn to ignore specific revisions
  • vel0109's avatar
    vel0109 committed
    #include "hip/hip_runtime.h"
    #include "hip/hip_runtime_api.h"
    
    #include <stdio.h>
    #include <stdlib.h>
    
    __global__ void simple_add(const float *a, const float *b, float *c){
        int idx = blockIdx.x * blockDim.x + threadIdx.x;
    
        asm volatile("global_load_dword v10, %0, off;\n\t" //load first operand
                     "global_load_dword v11, %1, off;\n\t" //load second operand
                     "s_waitcnt vmcnt(0);\n\t"             //wait for load finish
                     "v_add_f32 v12, v11, v10;\n\t"        //perform the addition
                     "global_store_dword %2, v12, off;\n\t" :: "v"(a+idx), "v"(b+idx), "v"(c+idx) : "v10", "v11", "v12", "memory"); //save the result
        
    }
    
    
    int main(){
        int block_num = 4;
        int block_size = 16;
    
        float *a, *b, *c;
    
        hipHostMalloc((void **)&a, block_num*block_size*sizeof(float));
        hipHostMalloc((void **)&b, block_num*block_size*sizeof(float));
        hipHostMalloc((void **)&c, block_num*block_size*sizeof(float));
    
        float x_0 = 1.0f;
        for(int i = 0; i < block_num * block_size; i++)
            a[i] = x_0 + i * 0.1f;
    
        float y_0 = 2.5f;
        for(int i = 0; i < block_num * block_size; i++)
            b[i] = y_0 - i * 0.2f;
    
    
        hipLaunchKernelGGL(simple_add, dim3(block_num), dim3(block_size), 0, 0, a,b,c);
        hipDeviceSynchronize();
    
        printf("Operand matrix 1:\n");
        for(int i = 0; i < block_num; i++){
            for(int j = 0; j < block_size; j++){
                printf("%.3f ", a[j+i*block_size]);    
            }
            printf("\n");
        }
        
        printf("Operand matrix 2:\n");
        for(int i = 0; i < block_num; i++){
            for(int j = 0; j < block_size; j++){
                printf("%.3f ", b[j+i*block_size]);    
            }
            printf("\n");
        }   
    
        printf("Result matrix:\n");
        for(int i = 0; i < block_num; i++){
            for(int j = 0; j < block_size; j++){
                printf("%.3f ", c[j+i*block_size]);    
            }
            printf("\n");
        }   
    
        hipHostFree(a);
        hipHostFree(b);
        hipHostFree(c);
    
    }