2007年6月25日月曜日

複数のSPEを利用したCellプログラムの雛形

このブログ記事をはてなブックマークに追加

入力データ in を複数のSPEに割り当てて処理を行う。結果は出力データ out に格納される。

program_ppe.c

/* ppu-gcc -lspe2 -lpthread program_ppe.c -o program_ppe */ #include <stdio.h> #include <libspe2.h> #include <pthread.h> #define SPE_PROGRAM "program_spe" #define NUM_SPE 4 #define SIZE (10000) #define DMA_BY_TYPE (16) /* (16) for char, (4) for int */ typedef struct { unsigned long long ea_in; unsigned long long ea_out; unsigned int size; int pad[3]; } params_t; params_t params[NUM_SPE] __attribute__((aligned(16))); typedef struct { spe_context_ptr_t spe; params_t *params; } thread_arg_t; void *run_spe(void *thread_arg) { int ret; thread_arg_t *arg = (thread_arg_t *)thread_arg; unsigned int entry; spe_stop_info_t stop_info; entry = SPE_DEFAULT_ENTRY; spe_context_run(arg->spe, &entry, 0, arg->params, NULL, &stop_info); return NULL; } void process_spe(unsigned char *in, unsigned char *out) { int i; int size, spe_size[NUM_SPE]; spe_program_handle_t *prog; spe_context_ptr_t spe[NUM_SPE]; pthread_t thread[NUM_SPE]; thread_arg_t arg[NUM_SPE]; prog = spe_image_open(SPE_PROGRAM); for (i = 0; i < NUM_SPE; i++) { spe[i] = spe_context_create(0, NULL); spe_program_load(spe[i], prog); } size = SIZE / DMA_BY_TYPE / NUM_SPE; for (i = 0; i < NUM_SPE - 1; i++) spe_size[i] = size * DMA_BY_TYPE; spe_size[NUM_SPE - 1] = (SIZE - size * DMA_BY_TYPE * (NUM_SPE - 1)); size = 0; for (i = 0; i < NUM_SPE; i++) { params[i].ea_in = (unsigned long)&in[size]; params[i].ea_out = (unsigned long)&out[size]; params[i].size = spe_size[i]; size += spe_size[i]; arg[i].spe = spe[i]; arg[i].params = &params[i]; pthread_create(&thread[i], NULL, run_spe, &arg[i]); } for (i = 0; i < NUM_SPE; i++) { pthread_join(thread[i], NULL); spe_context_destroy(spe[i]); } } int main() { unsigned char in[SIZE] __attribute__((aligned(16))) = { 0 }; unsigned char out[SIZE] __attribute__((aligned(16))); process_spe(in, out); return 0; }

program_spe.c

/* spu-gcc program_spe.c -o program_spe */ #include <stdio.h> #include <spu_intrinsics.h> #include <spu_mfcio.h> #define DMA_BUFSIZE (16 << 10) #define DMA_BY_TYPE (16) /* (16) for char, (4) for int */ unsigned char in[DMA_BUFSIZE] __attribute((aligned(16))); unsigned char out[DMA_BUFSIZE] __attribute((aligned(16))); typedef struct { unsigned long long ea_in; unsigned long long ea_out; unsigned int size; int pad[3]; } params_t; params_t params __attribute__((aligned(16))); int main(unsigned long long spe, unsigned long long argp, unsigned long long envp) { int i; int tag = 1; unsigned int bufsize; unsigned int padding = 0; spu_mfcdma64(&params, mfc_ea2h(argp), mfc_ea2l(argp), sizeof(params_t), tag, MFC_GET_CMD); spu_writech(MFC_WrTagMask, 1 << tag); spu_mfcstat(MFC_TAG_UPDATE_ALL); for (i = 0; i < params.size; i += DMA_BUFSIZE) { if (i + DMA_BUFSIZE > params.size) { bufsize = params.size - i; padding = bufsize % DMA_BY_TYPE; if (padding != 0) bufsize += (DMA_BY_TYPE - padding); } else bufsize = DMA_BUFSIZE; spu_mfcdma64(in, mfc_ea2h(params.ea_in+i), mfc_ea2l(params.ea_in+i), bufsize * sizeof(char), tag, MFC_GET_CMD); spu_writech(MFC_WrTagMask, 1 << tag); spu_mfcstat(MFC_TAG_UPDATE_ALL); /* processing */ spu_mfcdma64(out, mfc_ea2h(params.ea_out+i), mfc_ea2l(params.ea_out+i), bufsize * sizeof(char), tag, MFC_PUT_CMD); spu_writech(MFC_WrTagMask, 1 << tag); spu_mfcstat(MFC_TAG_UPDATE_ALL); } return 0; }

0 コメント: