Download presentation
Presentation is loading. Please wait.
1
Selected MaxCompiler Examples
Sasa Stojanovic
2
Content E#1: Hello world E#2: Vector addition E#3: Type mixing
E#4: Addition of a constant and a vector E#5: Input/output control E#6: Conditional execution E#7: Moving average 1D E#8: Moving average 2D E#9: Array summation E#10: Optimization of E#9
3
Example No.1: Hello World!
Write a program that sends the “Hello World!” string to the MAX2 card, for the MAX2 card kernel to return it back to the host. To be learned through this example: How to make the configuration of the accelerator (MAX2 card) using Java: How to make a simple kernel (ops description) using Java (the only language), How to write the standard manager (config description based on kernel(s)) using Java, How to test the kernel using a test (code+data) written in Java, How to compile the Java code for MAX2, How to write a simple C code that runs on the host and triggers the kernel, How to write the C code that streams data to the kernel, How to write the C code that accepts data from the kernel, How to simulate and execute an application program in C that runs on the host and periodically calls the accelerator.
4
Standard Files in a MAX Project
Example No. 1 Standard Files in a MAX Project One or more kernel files, to define operations of the application: <app_name>Kernel[<additional_name>].java One (or more) Java file, for simulation of the kernel(s): <app_name>SimRunner.java One manager file for transforming the kernel(s) into the configuration of the MAX card (instantiation and connection of kernels): <app_name>Manager.java Simulator builder: <app_name>HostSimBuilder.java Hardware builder: <app_name>HWBuilder.java Application code that uses the MAX card accelerator: <app_name>HostCode.c Makefile A script file that defines the compilation related commands
5
example1Kernel.java Example No. 1
package ind.z1; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class helloKernel extends Kernel { public helloKernel(KernelParameters parameters) { super(parameters); // Input: HWVar x = io.input("x", hwInt(8)); HWVar result = x; // Output: io.output("z", result, hwInt(8)); } It is possible to substitute the last three lines with: io.output("z", result, hwInt(8));
6
example1SimRunner.java Example No. 1
package ind.z1; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class helloSimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager(“helloSim"); helloKernel k = new helloKernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); }
7
example1HostSimBuilder.java Example No. 1
package ind.z1; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class helloHostSimBuilder { public static void main(String[] args) { Manager m = new Manager(true,”helloHostSim", BOARDMODEL); Kernel k = new helloKernel(m.makeKernelParameters(“helloKernel")); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); }
8
example1HwBuilder.java Example No. 1
package ind.z1; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class helloHWBuilder { public static void main(String[] args) { Manager m = new Manager(“hello", BOARDMODEL); Kernel k = new helloKernel( m.makeKernelParameters() ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); }
9
example1HostCode.c 1/2 Example No. 1
#include <stdio.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; char data_in1[16] = "Hello world!"; char data_out[16]; printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_hello(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device);
10
Example No. 1 example1HostCode.c /2 printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, 16 * sizeof(char)), max_output("z", data_out, 16 * sizeof(char)), max_runfor(“helloKernel", 16), max_end()); printf("Checking data read from FPGA.\n"); max_close_device(device); max_destroy(maxfile); return 0; }
11
Example No. 1 Makefile # Root of the project directory tree BASEDIR=../../.. # Java package name PACKAGE=ind/z1 # Application name APP=example1 # Names of your maxfiles HWMAXFILE=$(APP).max HOSTSIMMAXFILE=$(APP)HostSim.max # Java application builders HWBUILDER=$(APP)HWBuilder.java HOSTSIMBUILDER=$(APP)HostSimBuilder.java SIMRUNNER=$(APP)SimRunner.java # C host code HOSTCODE=$(APP)HostCode.c # Target board BOARD_MODEL=23312 # Include the master makefile.include nullstring := space := $(nullstring) # comment MAXCOMPILERDIR_QUOTE:=$(subst $(space),\ ,$(MAXCOMPILERDIR)) include $(MAXCOMPILERDIR_QUOTE)/examples/common/Makefile.include
12
Example No. 1 BoardModel.java package config; import com.maxeler.maxcompiler.v1.managers.MAX2BoardModel; public class BoardModel { public static final MAX2BoardModel BOARDMODEL = MAX2BoardModel.MAX2336B; }
13
Types Hardware Types
14
Hardware Primitive Types
Floating point numbers - HWFloat: hwFloat(exponent_bits, mantissa_bits); float ~ hwFloat(8,24) double ~ hwFloat(11,53) Fixed point numbers - HWFix: hwFix(integer_bits, fractional_bits, sign_mode) SignMode.UNSIGNED SignMode.TWOSCOMPLEMENT Integers - HWFix: hwInt(bits) ~ hwFix(bits, 0, SignMode.TWOSCOMPLEMENT) Unsigned integers - HWFix: hwUint(bits) ~ hwFix(bits, 0, SignMode.UNSIGNED) Boolean – HWFix: hwBool() ~ hwFix(1, 0, SignMode.UNSIGNED) 1 ~ true 2 ~ false Raw bits – HWRawBits: hwRawBits(width)
15
Example No. 2: Vector Addition
Write a program that adds two arrays of floating point numbers. Program reads the size of arrays, makes two arrays with an arbitrary content (test inputs), and adds them using a MAX card.
16
example2Kernel.Java Example No. 2
package ind.z2; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example2Kernel extends Kernel { public example2Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwFloat(8,24)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); }
17
example2SimRunner.java Example No. 2
package ind.z2; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example2SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example2Sim"); example2Kernel k = new example2Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2, 3, 4, 5, 6, 7, 8, 9); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 5, 7, 9, 11, 13, 15, 17 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); }
18
example2HostSimBuilder.java Example No. 2
package ind.z2; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example2HostSimBuilder { public static void main(String[] args) { Manager m = new Manager(true,"example2HostSim", BOARDMODEL); Kernel k = new example2Kernel( m.makeKernelParameters("example2Kernel") ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); }
19
example2HWBuilder.java Example No. 2
package ind.z2; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example2HWBuilder { public static void main(String[] args) { Manager m = new Manager("example2", BOARDMODEL); Kernel k = new example2Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); }
20
example2HostCode.c 1/2 Example No. 2
#include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_in2, *data_out; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_in2 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; data_in2[i] = i%3; } printf("Opening and configuring FPGA.\n");
21
example2HostCode.c 2/2 Example No. 2
maxfile = max_maxfile_init_example2(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, N * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example2Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++) if (data_out[i] != i%10 + i%3){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+i%3), data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0;
22
Example No. 3: Type Mixing
Do the same as in the example no 2, with the following modification: one input array contains floating point numbers, and the other one contains integers.
23
Type Conversion Example No. 3
Casting here means moving data from one form to another, without changing their essence. Type is: specified for inputs and outputs, propagated from inputs, down the dataflow graph to outputs, used to check that output stream has correct type. If conversion is needed, explicit conversion (cast) is required How to do it? use the method cast in class HWVar, Additional hardware required (especially for conversion to or from floating point numbers), introduces additional latency. Cast between a floating point number and an integer number is done by rounding to the nearest integer!
24
example3Kernel.Java Example No. 3
package ind.z3; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example3Kernel extends Kernel { public example3Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwInt(32)); HWVar result = x + y.cast(hwFloat(8,24)); // Output io.output("z", result, hwFloat(8,24)); }
25
example3SimRunner.java Example No. 3
package ind.z3; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example3SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example3Sim"); example3Kernel k = new example3Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2, 3, 4, 5, 6, 7, 8, 9); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 5, 7, 9, 11, 13, 15, 17 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); }
26
example3HostCode.c 1/2 Example No. 3
#include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out; int *data_in2; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_in2 = malloc(N * sizeof(int)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; data_in2[i] = i%3; } printf("Opening and configuring FPGA.\n");
27
example3HostCode.c 2/2 Example No. 3
maxfile = max_maxfile_init_example3(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, N * sizeof(int)), max_output("z", data_out, N * sizeof(float)), max_runfor("example3Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ if (data_out[i] != i%10 + i%3){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+i%3), data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0;
28
Generating Graph Command:
maxRenderGraphs <build_dir> <build_dir> - directory where the design is compiled In the virtual machine, directory “Desktop/MaxCompiler-Builds” contains the build directories. Example for application “example2”: maxRenderGraphs example2HostSim Renders graphs for the resulting max file
29
Final Kernel Graph for Example No 2
Generating Graph Final Kernel Graph for Example No 2
30
Final Kernel Graph for Example No 3
Generating Graph Final Kernel Graph for Example No 3
31
Example No. 4: Addition of a Constant and a Vector
Write a program that adds a constant to an array that contains floating point numbers. Program: reads the size of the array and the constant that will add to elements of the array, makes one array in an arbitrary way, and adds the constant to the array using the MAX card.
32
Example4Kernel.java Example No. 4
package ind.z4; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example4Kernel extends Kernel { public example4Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.scalarInput("y", hwFloat(8,24)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); }
33
Other Modifications in Example 4
Example No. 4 Other Modifications in Example 4 example4SimRunner.java: Before the kernel run, invoke: setScalarInput(“y”,2); example4HostCode.c: Read const from standard input, After the device is opened, but before run, set scalar inputs: max_set_scalar_input_f(device, “example4Kernel.y”, const_add, FPGA_A); max_upload_runtime_params(device, FPGA_A);
34
Example No. 5: Input/Output Control
Do the same as in example no 4, with the following modification: use controlled inputs and counters.
35
example5Kernel.java Example No. 5
package ind.z5; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example5Kernel extends Kernel { public example5Kernel(KernelParameters parameters) { super(parameters); HWVar ie = control.count.simpleCounter(32); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwFloat(8,24), ie.eq(0)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); }
36
example5SimRunner.java Example No. 5
package ind.z5; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example5SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example5Sim"); example5Kernel k = new example5Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 4, 5, 6, 7, 8, 9, 10 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); }
37
example5HostCode.c 1/2 Example No. 5
#include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, data_in2[2], *data_out; unsigned long N, i; printf("Enter size of array: "); scanf("%lu%f",&N, data_in2); data_in1 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++) data_in1[i] = i%10; printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example5(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device);
38
example5HostCode.c 1/2 Example No. 5
printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, 2 * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example5Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ if (data_out[i] != i%10 + data_in2[0]){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+data_in2[0]), data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0;
39
Example No. 6: Conditional Execution
Translate the following part of code for the Maxeler MAX2 card: for(int i=0; i<N; i++) if(a[i] != b[i]){ c[i] = b[i]-a[i]; d[i] = a[i]*b[i]/c[i]; }else { c[i] = a[i]; d[i] = a[i]+b[i]; }
40
example6Kernel.java Example No. 6
package ind.z6; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example6Kernel extends Kernel { public example6Kernel(KernelParameters parameters) { super(parameters); // Input HWVar a = io.input("a", hwFloat(8,24)); HWVar b = io.input("b", hwFloat(8,24)); HWVar c = ~a.eq(b)?b-a:a; HWVar d = ~a.eq(b)?a*b/c:a+b; // Output io.output("c", c, hwFloat(8,24)); io.output("d", d, hwFloat(8,24)); }
41
example6SimRunner.java Example No. 6
package ind.z6; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example6SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example6Sim"); example6Kernel k = new example6Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("a", 1, 3); m.setInputData("b", 2, 3); m.setKernelCycles(2); m.runTest(); m.dumpOutput(); double expectedOutputc[] = { 1, 3 }; double expectedOutputd[] = { 2, 6 }; m.checkOutputData("c", expectedOutputc); m.checkOutputData("d", expectedOutputd); m.logMsg("Test passed OK!"); }
42
Example No. 7: Moving Average 1D
Write a program that calculates moving average over an array, calculating the average value for each one of the three successive elements of the input array. (a[0]+a[1])/2 , for i = 0; avg[i] = (a[i-1]+a[i]+a[i+1])/3 , for 0 < i < n-1; (a[n-2]+a[n-3], for i = n-1.
43
example7Kernel.java Example No. 7
package ind.z7; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example7Kernel extends Kernel { public example7Kernel(KernelParameters parameters) { super(parameters); HWVar N = io.scalarInput("N", hwUInt(64)); HWVar count = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar result = ( (count>0?stream.offset(x,-1):0) + x + (count<N-1?stream.offset(x,1):0) )/ (count>0&count<N-1? constant.var(hwFloat(8,24),3):2); // Output io.output("z", result, hwFloat(8,24)); }
44
Example No. 8: Moving Average 2D
Write a program that calculates moving average along a 2D matrix of the size MxN. Transfer the matrix to the MAX2 card through one stream, row by row.
45
example8Kernel.java 1/2 Example No. 8
package ind.z8; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.Stream.OffsetExpr; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example8Kernel extends Kernel { public example8Kernel(KernelParameters parameters) { super(parameters); HWVar M = io.scalarInput("M", hwUInt(32)); OffsetExpr Nof = stream.makeOffsetParam("Nof", 3, 128); HWVar N = io.scalarInput("N", hwUInt(32)); CounterChain cc = control.count.makeCounterChain(); HWVar j = cc.addCounter(M,1); HWVar i = cc.addCounter(N,1);
46
example8Kernel.java 2/2 Example No. 8
// Input HWVar mat = io.input("mat", hwFloat(8,24)); // Extract 8 point window around current point HWVar window[] = new HWVar[9]; int ii = 0; for ( int x=-1; x<=1; x++) for ( int y= -1; y<=1; y++) window[ii++] = (i.cast(hwInt(33))+x>=0 & i.cast(hwInt(33))+x<= N.cast(hwInt(33))-1 & j.cast(hwInt(33))+y >= 0 & j.cast(hwInt(33))+y<=M.cast(hwInt(33))-1)?stream.offset(mat, y*Nof+x):0; // Sum points in window and divide by 9 to average HWVar sum = constant.var(hwFloat(8, 24), 0); for ( HWVar hwVar : window) { sum = sum + hwVar; } HWVar divider = i.eq(0)|i.eq(N-1)|j.eq(0)|j.eq(M-1)?((i.eq(0)|i.eq(N-1))&(j.eq(0)|j.eq(M- 1))?constant.var(hwFloat(8,24),4):6):9; HWVar result = sum / divider; // Output io.output("z", result, hwFloat(8,24));
47
example8SimRunner.java Example No. 8
package ind.z8; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example8SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example8Sim"); example8Kernel k = new example8Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("mat", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11,12, 13,14,15,16); m.setScalarInput("M", 4); m.setScalarInput("N", 4); m.setStreamOffsetParam("Nof",4); m.setKernelCycles(16); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3.5, 4, 5, 5.5, 5.5, 6, 7, 7.5, 9.5, 10, 11, 11.5, 11.5, 12, 13, 13.5 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); }
48
example8HostCode.java 1/2
Example No. 8 example8HostCode.java 1/2 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_in2, *data_out; unsigned long M, N, i; printf("Enter size of matrix (MxN, max 1024x1024): "); scanf("%lu%lu",&M,&N); data_in1 = malloc(M*N * sizeof(float)); data_out = malloc(M*N * sizeof(float)); for(i = 0; i < M*N; i++){ data_in1[i] = i%10; } printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example8(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device);
49
example8HostCode.java 2/2
Example No. 8 example8HostCode.java /2 max_set_scalar_input_f(device, "example8Kernel.M", M, FPGA_A); max_set_scalar_input_f(device, "example8Kernel.N", N, FPGA_A); max_set_runtime_param(device, "example8Kernel.Nof", N); max_upload_runtime_params(device, FPGA_A); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("mat", data_in1, M*N * sizeof(float)), max_output("z", data_out, M*N * sizeof(float)), max_runfor("example8Kernel", M*N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < M*N; i++){ float expected=0, divider = 9; for (int ii = -1; ii<2; ii++) for(int jj = -1; jj<2; jj++) expected += i/N+ii>=0 && i/N+ii<M && i%N+jj>=0 && i%N+jj<N ?data_in1[i+ii*N+jj]:0; if (i/N==0 || i/N==M-1) divider = 6; if (i%N==0 || i%N==N-1) divider = divider == 6? 4:6; expected /= divider; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } } max_close_device(device); max_destroy(maxfile); return 0;
50
Example No. 9: Array summation
Write a program that calculates the sum of n floating point numbers.
51
example9Kernel.java, try #1
Example No. 9 example9Kernel.java, try #1 package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -1); // Output io.output("z", result, hwFloat(8,24)); } Problem?
52
Graph of Dataflow for Summation
Example No. 9 Graph of Dataflow for Summation
53
example9Kernel.java #2 Example No. 9 Solution: New offset =
package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>12?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24)); } Solution: New offset = Depth of pipeline loop
54
example9SimRunner.java #2
Example No. 9 example9SimRunner.java #2 Still, we need to send 13 times mor data then needed package ind.z9; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example9SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example9Sim"); example9Kernel k = new example9Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 0, 0, 0, 3 , 0, 0, 0, 9 , 0, 0, 0, 0, 2 , 0, 0, 0, 3 , 0, 0, 0, 3 , 0, 0, 0, 0, 3); m.setKernelCycles(27); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 3, 6 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } 12 unnecessarily data 12 unnecessarily data
55
example9HostCode.c 1/2 #2 Example No. 9
#include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * 13 * sizeof(float)); data_out = malloc(N * 13 * sizeof(float)); for(i = 0; i < N; i++) for( int j=0; j<13; j++) data_in1[13*i+j] = i%10; printf("Opening and configuring FPGA.\n");
56
example9HostCode.c 2/2 #2 Example No. 9
maxfile = max_maxfile_init_example9(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * 13 * sizeof(float)), max_output("z", data_out, N * 13* sizeof(float)), max_runfor("example9Kernel", N * 13), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ expected += !(i%13) ? i%10 : 0; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0;
57
example9Kernel.java #3 Example No. 9
package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); CounterChain cc = control.count.makeCounterChain(); HWVar cnt = cc.addCounter( ,1); HWVar depth = cc.addCounter(13,1); // Input HWVar x = io.input("x", hwFloat(8,24), depth.eq(0) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), depth.eq(0)); }
58
example9SimRunner.java #3
Example No. 9 example9SimRunner.java #3 package ind.z9; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example9SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example9Sim"); example9Kernel k = new example9Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2 , 3); m.setKernelCycles(27); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 3, 6 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } We still need at least 27 cycles.
59
example9HostCode.c 1/2 #3 Example No. 9
#include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++) data_in1[i] = i%10; printf("Opening and configuring FPGA.\n");
60
example9HostCode.c 2/2 #3 Example No. 9
maxfile = max_maxfile_init_example9(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example9Kernel", N * ), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ expected += i%10; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0;
61
Example No. 10: Optimized Array Summation
Write an optimized program that calculates the sum of numbers in an input array First, calculate several parallel/partial sums; then, add them at the end
62
example10Kernel1.java Example No. 10
package ind.z10; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example10Kernel1 extends Kernel { public example10Kernel1(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar N = io.scalarInput("N", hwUInt(64)); HWVar x = io.input("x", hwFloat(8,24) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), cnt > N-14); }
63
example10Kernel2.java Example No. 10
package ind.z10; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; public class example10Kernel2 extends Kernel { public example10Kernel2(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); CounterChain cc = control.count.makeCounterChain(); HWVar cnt = cc.addCounter(14,1); HWVar depth = cc.addCounter(13,1); // Input HWVar x = io.input("x", hwFloat(8,24), depth.eq(0) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), cnt.eq(12)); }
64
example10SimRunner.java Example No. 10
package ind.z10; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example10SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example10Sim"); example10Kernel1 k = new example10Kernel1( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26); m.setKernelCycles(26); m.runTest(); m.dumpOutput(); double exOutput[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39 }; m.checkOutputData("z", exOutput); m.logMsg("Test passed OK!"); }
65
example10Manager.java Example No. 10
package ind.z10; import com.maxeler.maxcompiler.v1.managers.custom.blocks.KernelBlock; import com.maxeler.maxcompiler.v1.managers.custom.CustomManager; import com.maxeler.maxcompiler.v1.managers.MAXBoardModel; class example10Manager extends CustomManager { public example10Manager(boolean is_simulation, String name, MAXBoardModel board_model ){ super(is_simulation, board_model, name); KernelBlock kb1 = addKernel(new example10Kernel1(makeKernelParameters("example10Kernel1"))); KernelBlock kb2 = addKernel(new example10Kernel2(makeKernelParameters("example10Kernel2"))); kb1.getInput("x") <== addStreamFromHost("x"); kb2.getInput("x") <== kb1.getOutput("z"); addStreamToHost("z") <== kb2.getOutput("z"); }
66
example10HostSimBuilder.java Example No. 10
package ind.z10; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.managers.BuildConfig; import com.maxeler.maxcompiler.v1.managers.BuildConfig.Level; public class example10HostSimBuilder { public static void main(String[] args) { example10Manager m = new example10Manager(true,"example10HostSim", BOARDMODEL); m.setBuildConfig(new BuildConfig(Level.FULL_BUILD)); m.build(); }
67
example10HWBuilder.java Example No. 10
package ind.z10; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example10HWBuilder { public static void main(String[] args) { example10Manager m = new example10Manager(false,"example10HostSim", BOARDMODEL); m.setBuildConfig(new BuildConfig(Level.FULL_BUILD)); m.build(); }
68
example10HostCode.c 1/2 Example No. 10
#include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array (it will be truncated to the firs lower number dividable with 13): "); scanf("%lu",&N); N /= 13; N *= 13; data_in1 = malloc(N * sizeof(float)); data_out = malloc(1 * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; expected += data_in1[i]; }
69
example10HostCode.c 2/2 Example No. 10
printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example10(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); max_set_scalar_input_f(device, "example10Kernel1.N", N, FPGA_A); max_upload_runtime_params(device, FPGA_A); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_output("z", data_out, 2 * sizeof(float)), max_runfor("example10Kernel1", N), max_runfor("example10Kernel2", 13*12+2), max_end()); printf("Checking data read from FPGA.\n"); printf("Expected: %f, returned: %f\n", expected, *data_out); max_close_device(device); max_destroy(maxfile); return 0; }
Similar presentations
© 2024 SlidePlayer.com. Inc.
All rights reserved.