您尚未登录。

楼主 # 2024-08-08 18:16:19

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

A133 使用GPU做OpenCL 并行运算

cl_vec3_add.c

/*
 * This confidential and proprietary software should be used
 * under the licensing agreement from Allwinner Technology.
 *
 * Copyright (C) 2020 Allwinner Technology Limited
 * All rights reserved.
 *
 * Author: Albert Yu <yuxyun@allwinnertech.com>
 *
 * The entire notice above must be reproduced on all authorised
 * copies and copies may only be made to the extent permitted
 * by a licensing agreement from Allwinner Technology Limited.
 */

#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>

#define DATA_SIZE 3
#define KERNEL_SRC_FILE "cl_vec3_add.cl"

int main(void)
{
	int i;
	FILE *fp;
	char *source_str = NULL;
	int a[DATA_SIZE] = { 1, 2, 3};
	int b[DATA_SIZE] = { 4, 5, 6};
	int c[DATA_SIZE] = { 0 };
	size_t source_size;
	cl_int cl_ret;
	cl_platform_id platform_id;
	cl_device_id device_id;
	cl_context context;
	cl_command_queue command_queue;
	cl_mem mem_a, mem_b, mem_c;
	cl_program program;
	cl_kernel kernel;
	size_t global_item_size = DATA_SIZE;
	size_t local_item_size = DATA_SIZE;

	fp = fopen(KERNEL_SRC_FILE, "r");
	if (!fp) {
		printf("Failed to open %s\n", KERNEL_SRC_FILE);
		return -1;
	}

	if (fseek(fp, 0, SEEK_END))
		goto exit;

	source_size = ftell(fp);

	rewind(fp);

	source_str = (char*)malloc(source_size);
	if (!source_str) {
		printf("Failed to allocate memory for source_str\n");
		goto exit;
	}

	/* Step 1: read kernel source code from kernel file */
	source_size = fread(source_str, 1, source_size, fp);
	if (source_size <= 0) {
		printf("Failed to read kernel source code from kernel file\n");
		goto exit;
	}

	/* Step 2: get the platform id */
	cl_ret = clGetPlatformIDs(1, &platform_id, NULL);
	if (cl_ret != CL_SUCCESS) {
		printf("clGetPlatformIDs failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 3: get the device id */
	cl_ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
	if (cl_ret != CL_SUCCESS) {
		printf("clGetDeviceIDs failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 4: create an OpenCL context */
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &cl_ret);
	if (!context || cl_ret != CL_SUCCESS) {
		printf("clCreateContext failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 5: create a command-queue on the specific device */
	command_queue = clCreateCommandQueue(context, device_id, 0, &cl_ret);
	if (!command_queue || cl_ret != CL_SUCCESS) {
		printf("clCreateCommandQueue failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

    /* Step 6: allocate buffers on the device */
	mem_a = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(int), NULL, &cl_ret);
	if (!mem_a || cl_ret != CL_SUCCESS) {
		printf("Failed to allocate buffer for a, cl_ret=%d\n", cl_ret);
		goto exit;
	}
	mem_b = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(int), NULL, &cl_ret);
	if (!mem_b || cl_ret != CL_SUCCESS) {
		printf("Failed to allocate buffer for b, cl_ret=%d\n", cl_ret);
		goto exit;
	}
	mem_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, DATA_SIZE * sizeof(int), NULL, &cl_ret);
	if (!mem_c || cl_ret != CL_SUCCESS) {
		printf("Failed to allocate buffer for c, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 7: copy the data to the device */
	cl_ret = clEnqueueWriteBuffer(command_queue, mem_a, CL_TRUE, 0, DATA_SIZE * sizeof(int), a, 0, NULL, NULL);
	if (cl_ret != CL_SUCCESS) {
		printf("clEnqueueWriteBuffer for a failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}
	cl_ret = clEnqueueWriteBuffer(command_queue, mem_b, CL_TRUE, 0, DATA_SIZE * sizeof(int), b, 0, NULL, NULL);
	if (cl_ret != CL_SUCCESS) {
		printf("clEnqueueWriteBuffer for b failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 8: create a program object from the kernel source */
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &cl_ret);
	if (!program || cl_ret != CL_SUCCESS) {
		printf("clCreateProgramWithSource failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 9: build the program */
	cl_ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
	if (cl_ret != CL_SUCCESS) {
		printf("clBuildProgram failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 10: create a kernel object */
	kernel = clCreateKernel(program, "vec3_add", &cl_ret);
	if (!kernel || cl_ret != CL_SUCCESS) {
		printf("clCreateKernel failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 11: set the arguments of the kernel */
	cl_ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&mem_a);
	if (cl_ret != CL_SUCCESS) {
		printf("clSetKernelArg for mem_a failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}
	cl_ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&mem_b);
	if (cl_ret != CL_SUCCESS) {
		printf("clSetKernelArg for mem_b failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}
	cl_ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&mem_c);
	if (cl_ret != CL_SUCCESS) {
		printf("clSetKernelArg for mem_c failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 12: enqueues a command to execute a kernel on the device */
	cl_ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
	if (cl_ret != CL_SUCCESS) {
		printf("clEnqueueNDRangeKernel failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Step 13: read back the result from the device */
	cl_ret = clEnqueueReadBuffer(command_queue, mem_c, CL_TRUE, 0, DATA_SIZE * sizeof(int), c, 0, NULL, NULL);
	if (cl_ret != CL_SUCCESS) {
		printf("clEnqueueReadBuffer failed, cl_ret=%d\n", cl_ret);
		goto exit;
	}

	/* Show the result */
	for(i = 0; i < DATA_SIZE; i++)
		printf("%d + %d -> expected result: %d, opencl result: %d\n", a[i], b[i], a[i] + b [i], c[i]);

exit:
	if (fp)
		fclose(fp);

	if (source_str)
		free(source_str);

	if (command_queue) {
		clFlush(command_queue);
		clFinish(command_queue);
		clReleaseCommandQueue(command_queue);
	}

	if (kernel)
		clReleaseKernel(kernel);

	if (program)
		clReleaseProgram(program);

	if (mem_a)
		clReleaseMemObject(mem_a);
	if (mem_b)
		clReleaseMemObject(mem_b);
	if (mem_c)
		clReleaseMemObject(mem_c);

	if (context)
		clReleaseContext(context);

	return 0;
}

计算脚本:cl_vec3_add.cl

__kernel void vec3_add(__global const int *a, __global const int *b, __global int *c)
{
    int i = get_global_id(0);

    c[i] = a[i] + b[i];
}

编译:

/opt/A133/buildroot/buildroot-20200212-Qt-sunxi-C/output/host/bin/aarch64-linux-gnu-gcc -o /mnt/hgfs/D/opencl_test cl_vec3_add.c -I/opt/A133/tina4/package/libs/libgpu-opencl/ge8300/3rdparty/include/khronos/ -lOpenCL

执行结果:

# /usr/bin/opencl
1 + 4 -> expected result: 5, opencl result: 5
2 + 5 -> expected result: 7, opencl result: 7
3 + 6 -> expected result: 9, opencl result: 9
#

计算脚本改成乘法:cl_vec3_add.cl

__kernel void vec3_add(__global const int *a, __global const int *b, __global int *c)
{
    int i = get_global_id(0);

    c[i] = a[i] + b[i];
}

执行结果:

# /usr/bin/opencl
1 + 4 -> expected result: 5, opencl result: 4
2 + 5 -> expected result: 7, opencl result: 10
3 + 6 -> expected result: 9, opencl result: 18
#

离线

楼主 #2 2024-08-26 15:21:36

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

Re: A133 使用GPU做OpenCL 并行运算

opencl 只会跑demo,不会onnxruntime

我这边 Qt跑OpenGL DEMO正常,没有花屏。

离线

楼主 #4 2024-08-26 15:49:20

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

Re: A133 使用GPU做OpenCL 并行运算

@微凉VeiLiang
https://www.bilibili.com/video/BV1fU4y1z7Rp/

https://www.bilibili.com/video/BV1LK411f7x9

跑这个 cellphone 等Qt demo都是正常的

离线

楼主 #6 2024-08-26 16:08:14

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

Re: A133 使用GPU做OpenCL 并行运算

离线

楼主 #10 2024-08-26 17:04:49

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

Re: A133 使用GPU做OpenCL 并行运算

微凉VeiLiang 说:

tplayer播放mp4文件是正常的。QT如果不使用到gpu也是正常的

我用的是这个开发板: https://item.taobao.com/item.htm?id=747735085973

cellphone测试固件:
tina_a133-dock2_uart0_buildroot-20200212-Qt-sunxi-C_202408252225.img.7z

离线

楼主 #12 2024-08-26 17:28:08

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

Re: A133 使用GPU做OpenCL 并行运算

离线

楼主 #14 2024-08-27 11:02:26

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

Re: A133 使用GPU做OpenCL 并行运算

@微凉VeiLiang
我问了一下店主,说可能是tina4只适配了1G,2G内存的问题,

4G版本可能需要微调一下 sys_config.fex ddr 参数。

离线

楼主 #16 2024-08-27 11:29:42

memory
会员
注册时间: 2021-08-11
已发帖子: 416
积分: 392

Re: A133 使用GPU做OpenCL 并行运算

离线

页脚

工信部备案:粤ICP备20025096号 Powered by FluxBB

感谢为中文互联网持续输出优质内容的各位老铁们。 QQ: 516333132, 微信(wechat): whycan_cn (哇酷网/挖坑网/填坑网) service@whycan.cn