ToArray( c, A, sizeof (A)/sizeof (int)); tgtVector->Delete(); } OR like this : Target *tgtVector = CreateMulticoreTarget(); Target *tgtVector = CreateDX9Target();"> ToArray( c, A, sizeof (A)/sizeof (int)); tgtVector->Delete(); } OR like this : Target *tgtVector = CreateMulticoreTarget(); Target *tgtVector = CreateDX9Target();">
Download presentation
Presentation is loading. Please wait.
Published byNorah Anderson Modified over 8 years ago
1
Zhiduo Liu Aaron Severance Satnam Singh Guy Lemieux Accelerator Compiler for the VENICE Vector Processor
2
This is the VENICE Vector Processor : Complicated
3
#include "vector.h“ int main() { int A[] = {1,2,3,4,5,6,7,8}; const int data_len = sizeof ( A ); int *va = ( int *) vector_malloc ( data_len ); vector_dma_to_vector ( va, A, data_len ); vector_wait_for_dma (); vector_set_vl ( data_len / sizeof (int) ); vector ( SVW, VADD, va, 42, va ); vector_instr_sync (); // wait for operation to complete vector_dma_to_host ( A, va, data_len ); vector_wait_for_dma (); vector_free (); // deallocate scratchpad malloc } You can program it like this : #include "Accelerator.h" #include "VectorTarget.h" using namespace ParallelArrays; using namespace MicrosoftTargets; int main() { Target *tgtVector = CreateVectorTarget(); int A[] = {1,2,3,4,5,6,7,8}; IPA b = IPA( A, sizeof (A)/sizeof (int)); IPA c = b + 42; tgtVector->ToArray( c, A, sizeof (A)/sizeof (int)); tgtVector->Delete(); } OR like this : Target *tgtVector = CreateMulticoreTarget(); Target *tgtVector = CreateDX9Target();
4
Assembly Programming : Write Assembly Download to board Compile with Gcc Get Result Doesn’t compile? Result Incorrect? Accelerator Programming : Write in Accelerator Download to board Compile with Microsoft Visual Studio Get Result Compile with Gcc Doesn’t compile? Or result incorrect?
5
Assembly Programming : 1.Hard to program 2.Long debug cycle 3.Not portable 4.Manual – Not always optimal or correct (wysiwyg) Accelerator Programming : 1.Easy to program 2.Easy to debug 3.Can also target other devices 4.Automated compiler optimizations
6
LIR Convert To LIR IR Add Intermediates Combine Operations Evaluation Ordering & Reference Counting Evaluation Ordering & Reference Counting Buffer Counting Calculate Buffer Size Allocate & Initialize Memory Transfer Data To Scratchpad Set VL Write Vector Instructions Transfer Result To Host Assign Buffers to Inputs Need Double buffering? LIR Compiler Flow
7
CPU Benchmark Runtime (seconds) fir2Dfirlifeimgblendmedianmotest Xeon W3690 (3.47GHz)0.070.440.530.129.970.24 VENICE (V64, 100MHz)0.070.290.230.333.110.22 Speedup1.0 x1.5 x2.3 x0.4 x3.2 x1.1 x 369x
8
Thank you !
Similar presentations
© 2024 SlidePlayer.com. Inc.
All rights reserved.