Download presentation
Presentation is loading. Please wait.
1
Faculty of Computer Science © 2006 CMPUT 229 Memory Hierarchy Part 2 Refreshing Memory
2
© 2006 Department of Computing Science CMPUT 229 Writing Cache-Conscious Programs Problem: Write C code for a function that computes the sum of the elements of a two dimensional array, a[M][N], of integers. int SumArray(int a[][], int M, int N) 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } 1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 } Byant/O’Hallaron, pp. 508
3
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][1] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } Byant/O’Hallaron, pp. 508 Cache Memory
4
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][1] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } Byant/O’Hallaron, pp. 508 a[0][0]a[0][1]a[0][2]a[0][3] Cache Memory
5
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[0][1] Cache Memory Byant/O’Hallaron, pp. 508
6
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[0][1] Cache Memory Byant/O’Hallaron, pp. 508
7
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[0][1] Cache Memory Byant/O’Hallaron, pp. 508
8
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[0][5]a[1][0]a[1][1]a[0][4] a[0][1] Cache Memory Byant/O’Hallaron, pp. 508
9
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[0][5]a[1][0]a[1][1]a[0][4] a[0][1] Cache Memory Byant/O’Hallaron, pp. 508
10
© 2006 Department of Computing Science CMPUT 229 SumArrayRows Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[0][5]a[1][0]a[1][1]a[0][4] a[0][1] Cache Memory Byant/O’Hallaron, pp. 508
11
© 2006 Department of Computing Science CMPUT 229 SumArrayCols Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][1] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] Cache Memory Byant/O’Hallaron, pp. 508
12
© 2006 Department of Computing Science CMPUT 229 SumArrayCols Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][1] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[0][5]a[1][0]a[1][1]a[0][4] Cache Byant/O’Hallaron, pp. 508
13
© 2006 Department of Computing Science CMPUT 229 SumArrayCols Data Access Order a[1][2] a[1][3] a[1][4] a[1][5] a[2][0] a[2][1] a[2]2] a[2][3] a[2][4] a[2][5] a[3][0] a[3][1] a[3][2] a[3][3] a[3][4] a[0][0] a[0][1] a[0][2] a[0][3] a[0][4] a[0][5] a[1][0] a[1][1] 0x8000 4000 0x8000 4004 0x8000 4010 0x8000 4024 0x8000 4008 0x8000 4014 0x8000 4028 0x8000 403C 0x8000 400C 0x8000 4018 0x8000 402C 0x8000 4040 0x8000 401C 0x8000 4030 0x8000 4044 0x8000 4050 0x8000 4020 0x8000 4034 0x8000 4048 0x8000 4054 0x8000 4038 0x8000 404C 0x8000 4058 1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 } a[0][0]a[0][1]a[0][2]a[0][3] a[2][1]a[2][2]a[2][3] a[2][0] a[0][5]a[1][0]a[1][1]a[0][4] Cache Byant/O’Hallaron, pp. 508
14
© 2006 Department of Computing Science CMPUT 229 The Cost of Programming Productivity Easy-to-read and easy-to-maintain code often result in lower runtime performance. Student Class University
15
© 2006 Department of Computing Science CMPUT 229 The Cost of Programming Productivity Abstraction Inheritance Student Professor Support Staff Person
16
© 2006 Department of Computing Science CMPUT 229 The Cost of Programming Productivity Data Encapsulation Person Date of Birth Gender Address Citizenship Name Driver Lic. Student Faculty Date of Adm Department Program Univ. ID Classes Enr. Grades
17
© 2006 Department of Computing Science CMPUT 229 Data Locality Primer AMD Atlon 64 X2
18
© 2006 Department of Computing Science CMPUT 229 Data Locality Primer: Cache Organization POWER5 Cache Organization –L1 Data Cache: 32 Kbytes, 128-byte cache lines –L2 Cache: 1.44 Mbytes, 128-byte cache lines –L3 Cache: 32 Mbytes, 512-byte cache lines
19
© 2006 Department of Computing Science CMPUT 229 Data Locality Primer: Cache Organization Bytes Faculty Date of Adm Department Program Univ. ID Classes Enr. Grades Student: 1 byte 4 bytes 1 byte 2 bytes 4 bytes Date of Birth Gender Address Citizenship Name Driver Lic. Person: 4 byte 1 bytes 32 bytes 16 bytes 32 bytes 4 bytes 012345678910111213141516171819127 0 2 255 Cache Lines
20
© 2006 Department of Computing Science CMPUT 229 Data Locality Primer: Data in Memory Memory Address Bytes 012345678910111213141516171819127 0 128 256 384 Univ. IDDate of Adm. Fa. De Progr. Classes Enr.Grades Fa. De Progr.Classes Enr.GradesUniv. ID Date of Adm. Fa. De Progr. Classes Enr.Grades Fa. De Progr.Classes Enr.GradesUniv. ID Faculty Date of Adm Department Program Univ. ID Classes Enr. Grades Student: 1 byte 4 bytes 1 byte 2 bytes 4 bytes
21
© 2006 Department of Computing Science CMPUT 229 03031323336374748515269848589 768 1024 1152 1280 Memory Address Data Locality Primer: Data in Memory NameDofB Ge Citizens. Address Dr. Lic.Namedress Ge Citizens. Dr. Lic.DofBNameDofB Ge Citizens. Address Dr. Lic.Namedress Ge Citizens. Dr. Lic.DofB Date of Birth Gender Address Citizenship Name Driver Lic. Person: 4 byte 1 bytes 32 bytes 16 bytes 32 bytes 4 bytes
22
© 2006 Department of Computing Science CMPUT 229 03031323336374748515269848589 768 1024 1152 1280 Memory Address Data Locality Primer: Data in Memory Memory Address Bytes NameDofB Ge Citizens. Address Dr. Lic.Namedress Ge Citizens. Dr. Lic.DofBNameDofB Ge Citizens. Address Dr. Lic.Namedress Ge Citizens. Dr. Lic.DofB 012345678910111213141516171819127 0 128 256 384 Univ. IDDate of Adm. Fa. De Progr. Classes Enr.Grades Fa. De Progr.Classes Enr.GradesUniv. ID Date of Adm. Fa. De Progr. Classes Enr.Grades Fa. De Progr.Classes Enr.GradesUniv. ID
23
© 2006 Department of Computing Science CMPUT 229 Example: A search through the data structures How many Computing Science students are younger than 23 year old? Bytes 012345678910111213141516171819127 0 2 255 Univ. IDDate of Adm. Fa. De Progr. Classes Enr.Grades Cache Lines
24
© 2006 Department of Computing Science CMPUT 229 Example: A search through the data structures How many Computing Science students are younger than 23 year old? Load 128 bytes and uses 5 bytes! Bytes 012345678910111213141516171819127 0 2 255 Univ. IDDate of Adm. Fa. De Progr. Classes Enr.Grades Cache Lines
25
© 2006 Department of Computing Science CMPUT 229 Example: A search through the data structures How many Computing Science students are younger than 23 year old? Load 128 bytes and uses 5 bytes! Bytes 012345678910111213141516171819127 0 2 255 Univ. IDDate of Adm. Fa. De Progr. Classes Enr.Grades NameDofB Ge Citizens. Address Dr. Lic. Cache Lines
26
© 2006 Department of Computing Science CMPUT 229 Example: A search through the data structures How many Computing Science students are younger than 23 year old? Load 128 bytes and uses 5.3 bytes! Load 128 bytes and uses 5.8 bytes! Bytes 012345678910111213141516171819127 0 2 255 Univ. IDDate of Adm. Fa. De Progr. Classes Enr.Grades NameDofB Ge Citizens. Address Dr. Lic. Cache Lines
27
© 2006 Department of Computing Science CMPUT 229 Data Reshaping for Arrays of Structures Student*ListOfStudents; …. ListOfStudents = (Student*)malloc(….); Univ. IDDate of Adm. Fa. De Progr. Classes Enr.GradesUniv. IDDate of Adm. Fa. De Progr. Classes Enr.GradesUniv. IDDate of Adm. Fa. De Progr. Classes Enr.Grades Univ. ID Date of Adm. Fa. De Progr. Univ. ID Date of Adm. Fa. De Progr. Univ. ID Date of Adm. Fa. De Progr.
28
© 2006 Department of Computing Science CMPUT 229 Reshaping Linked Data Structures E.g. A linked list of students struct student { int age; int studentNumber; int studentProgram; float averageGrade; struct student *next; }; agenumgpaprogagenumgpaprog …
29
© 2006 Department of Computing Science CMPUT 229 Maximal Structure Splitting age 1 num 1 gpa 1 prog 1 age 2 num 2 gpa 2 prog 2 … age 3 num 3 gpa 3 prog 3 age 1 age 2 age 3 num 1 num 2 num 3 prog 1 prog 2 prog 3 gpa 1 gpa 2 gpa 3 next 1 next 2 next 3
30
© 2006 Department of Computing Science CMPUT 229 Is it safe to transform a given data structure? Build alias set –If a pointer P points to the structure Then all the objects in the points-to set of P must have the same layout. The layout of two structures is the same if each field has the same offset and the same length.
31
© 2006 Department of Computing Science CMPUT 229 Pool Allocation Intercept mallocs and replace by pool allocation: each structure layout gets its own pool. If pool is full another pool can be allocated
32
© 2006 Department of Computing Science CMPUT 229 Pool Allocation age 1 num 1 prog 1 gpa 1 next 1 Intercept mallocs and replace by pool allocation: each structure layout gets its own pool.
33
© 2006 Department of Computing Science CMPUT 229 Pool Allocation age 1 age 2 num 1 num 2 prog 1 prog 2 gpa 1 gpa 2 next 1 next 2 Intercept mallocs and replace by pool allocation: each structure layout gets its own pool.
34
© 2006 Department of Computing Science CMPUT 229 Pool Allocation age 1 age 2 age 3 num 1 num 2 num 3 prog 1 prog 2 prog 3 gpa 1 gpa 2 gpa 3 next 1 next 2 next 3 Intercept mallocs and replace by pool allocation: each structure layout gets its own pool.
35
© 2006 Department of Computing Science CMPUT 229 Pool Allocation age 1 age 2 age 3 num 1 num 2 num 3 prog 1 prog 2 prog 3 gpa 1 gpa 2 gpa 3 next 1 next 2 next 3 age 4 num 4 prog 4 gpa 4 next 4 Intercept mallocs and replace by pool allocation: each structure layout gets its own pool.
36
© 2006 Department of Computing Science CMPUT 229 Pool Allocation age 1 age 2 age 3 num 1 num 2 num 3 prog 1 prog 2 prog 3 gpa 1 gpa 2 gpa 3 next 1 next 2 next 3 age 4 num 4 prog 4 gpa 4 next 4 age 5 num 5 prog 5 gpa 5 next 6 Intercept mallocs and replace by pool allocation: each structure layout gets its own pool.
37
© 2006 Department of Computing Science CMPUT 229 Pool Allocation age 1 age 2 age 3 num 1 num 2 num 3 prog 1 prog 2 prog 3 gpa 1 gpa 2 gpa 3 next 1 next 2 next 3 age 4 num 4 prog 4 gpa 4 next 4 age 5 num 5 prog 5 gpa 5 next 6 Intercept mallocs and replace by pool allocation: each structure layout gets its own pool. If pool is full another pool can be allocated
38
© 2006 Department of Computing Science CMPUT 229 Pool Allocation age 1 age 2 age 3 num 1 num 2 num 3 prog 1 prog 2 prog 3 gpa 1 gpa 2 gpa 3 next 1 next 2 next 3 age 4 num 4 prog 4 gpa 4 next 4 age 5 num 5 prog 5 gpa 5 next 6 age 7 num 7 prog 7 gpa 7 next 7 Intercept mallocs and replace by pool allocation: each structure layout gets its own pool. If pool is full another pool can be allocated
39
© 2006 Department of Computing Science CMPUT 229 Pointer Dereferencing - Before struct student { int age; int studentNumber; int studentProgram; float averageGrade; struct student *next; }; struct student *s = malloc (sizeof (struct student)); s->age = 21; s->averageGrade = 3.8; s->age == *(s + 0) s->averageGrade == *(s + 12) agenumgpaprog 0481216 agenumgpaprog … 0481216 s
40
© 2006 Department of Computing Science CMPUT 229 Uniform Structure Splitting Requires that all in the structure have the same number of bytes –Advantage Simpler address computation –Disadvantage Either restrict the application of the technique Or wastes memory with padding to create same-length fields
41
© 2006 Department of Computing Science CMPUT 229 Uniform Splitting Pointer Transformation age 1 age 2 age 3 num 1 num 2 num 3 prog 1 prog 2 prog 3 gpa 1 gpa 2 gpa 3 next 1 next 2 next 3 s 1 ->age == *(s 1 + 0) s 1 ->gpa == *(s 1 + (3 * pool_field_len)) s1s1 Pool_field_len is the same for each field 3 * pool_field_len pool_field_len
42
© 2006 Department of Computing Science CMPUT 229 Non-Uniform Structure Splitting Requires pools to be aligned by the size of the pool. E.g. If the pools are 4k then they must be aligned on 4k boundaries. More general Address calculation is more involved
43
© 2006 Department of Computing Science CMPUT 229 Non-Uniform Example struct example { type_2 a; /* 4 bytes */ type_8 b; /* 8 bytes */ type_4 c; /* 4 bytes */ }; s How can the compiler find the address to access: s->c
44
© 2006 Department of Computing Science CMPUT 229 Non-Uniform Example struct example { type_2 a; /* 4 bytes */ type_8 b; /* 8 bytes */ type_4 c; /* 4 bytes */ }; s How can the compiler find the address to access: s->c pool_base = s & 0x0…0FFF index = (s – pool_base) / 2 field_base = (2+8)*num_structs_per_pool s->c = *(s + field_base + 4*index - index*2) s->c = *(s + field_base + 4*index - s + pool_base) s->c = *(field_base + 4*index + pool_base)
45
© 2006 Department of Computing Science CMPUT 229 Experiments - Micro Benchmarks (Speedup) Power 4 Power 5
46
© 2006 Department of Computing Science CMPUT 229 Experiments - Micro Benchmarks (Instruction Count) Power 4 Power 5
47
© 2006 Department of Computing Science CMPUT 229 Experiments - Micro Benchmarks (CPI) Power 4 Power 5
48
© 2006 Department of Computing Science CMPUT 229 Experiments - Micro Benchmarks (DTLB Misses) Power 4 Power 5
49
© 2006 Department of Computing Science CMPUT 229 Experiments - Micro Benchmarks (L1D Misses) Power 4 Power 5
50
© 2006 Department of Computing Science CMPUT 229 Experiments - Micro Benchmarks (L2 Misses) Power 4 Power 5
51
© 2006 Department of Computing Science CMPUT 229 Experiments - Micro Benchmarks (L3 Misses) Power 4 Power 5
52
© 2006 Department of Computing Science CMPUT 229 Experiments Evaluated SPEC 2000, Olden and LLU Many opportunities in SPEC missed –Pointer analysis didn’t have enough precision to identify opportunities in the SPEC 2000 benchmarks –Could only identify small opportunities –No impact on performance
53
© 2006 Department of Computing Science CMPUT 229 Experiments - Olden & LLU (Speedup) Power 4 Power 5 bh em3d health power tsp llu bh em3d health power tsp llu
Similar presentations
© 2025 SlidePlayer.com. Inc.
All rights reserved.