Machine Level Representation of Programs (IV)
Outline X86-64 representation Floating point Suggested readings Chap 3.12(2ed), 3.11(3ed)
X86-64 Representation Procedures and Stack Alignment Byte Ordering
Example: swap() IA32 swap: swap(int *xp, int *yp) pushl %ebp movl %esp,%ebp pushl %ebx movl 12(%ebp),%ecx movl 8(%ebp),%edx movl (%ecx),%eax movl (%edx),%ebx movl %eax,(%edx) movl %ebx,(%ecx) movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret IA32 void swap(int *xp, int *yp) { int t0 = *xp; int t1 = *yp; *xp = t1; *yp = t0; } Setup Body Finish
Example: swap() x86-64 swap: swap(int *xp, int *yp) movl (%rdi), %edx movl (%rsi), %eax movl %eax, (%rdi) movl %edx, (%rsi) retq x86-64 void swap(int *xp, int *yp) { int t0 = *xp; int t1 = *yp; *xp = t1; *yp = t0; }
Operands passed in registers Example: swap() swap: movl (%rdi), %edx movl (%rsi), %eax movl %eax, (%rdi) movl %edx, (%rsi) retq Operands passed in registers First (xp) in %rdi Second (yp) in %rsi 64-bit pointers No stack operation required 32-bit data Data held in register %eax and %edx movl operation
Example: swap() Swap long int in 64-bit swap: swap(long int *xp, movq (%rdi), %rdx movq (%rsi), %rax movq %rax, (%rdi) movq %rdx, (%rsi) retq Swap long int in 64-bit void swap(long int *xp, long int *yp) { long int t0 = *xp; long int t1 = *yp; *xp = t1; *yp = t0; } 64-bit data Data held in registers %rax and %rdx movq operation “q” stands for quad-word
Procedures - Stack IA32/Linux Stack Frame Caller Stack Frame Arguments for this call Return Address (pushed by “call”) Callee Stack Frame Old %ebp (saved by “push %ebp”) Saved registers Local variable s Arguments for next call Arguments Ret Addr Old %ebp %ebp frame pointer Saved registers Local variables Arguments %esp stack pointer
IA32/Linux Register Usage Procedures - Register IA32/Linux Register Usage %eax, %edx, %ecx Caller saves prior the call if values are used by later %eax Return integer value %ebx, %esi, %edi Callee saves if want to used them %esp, %ebp special Caller-Save %eax %edx %ecx Callee-Save %ebx %esi %edi Special %esp %ebp
X86-64/Linux Register Usage Procedures - Register X86-64/Linux Register Usage Caller-Save %rax %rcx %rdx %rsi %rdi %r8 %r9 Callee-Save %rbx %rbp %r10 %r12 %r13 %r14 %r15 Special %rsp, %r11 %rax %rax %r8 %r8 %rbx %rbx %r9 %r9 %rcx %rcx %r10 %r10 %rdx %rdx %r11 %r11 %rsi %rsi %r12 %r12 %rdi %rdi %r13 %r13 %rsp %rsp %r14 %r14 %rbp %rbp %r15 %r15
X86-64/Linux Register Usage Procedures - Register X86-64/Linux Register Usage Arguments passed via regs %rcx %rdx %rsi %rdi %r8 %r9 If more than 6 integer parameters, then pass rest on stack Return value by %rax No frame pointer Special %rsp stack pointer %r11 used for linking %rax ret %rax %r8 %r8 arg#5 %rbx %r9 arg#6 %r9 %rcx arg#4 %rcx %r10 %rdx arg#3 %rdx %r11 %r11 link %rsi %rsi arg#2 %r12 %rdi %rdi arg#1 %r13 %rsp stack %rsp %r14 %rbp %r15
Procedures - Stack x86-64/Linux Stack Frame Caller Stack Frame Arguments passed via registers Return Address (pushed by “call”) Callee Stack Frame Saved registers Local variables Ret Addr Saved registers Local variables %rsp stack pointer
Operands passed in registers No stack operations required (except ret) X86-64 Swap void swap(long *xp, long *yp) { long t0 = *xp; long t1 = *yp; *xp = t1; *yp = t0; } swap: movq (%rdi), %rdx movq (%rsi), %rax movq %rax, (%rdi) movq %rdx, (%rsi) ret Operands passed in registers First (xp) in %rdi, second (yp) in %rsi No stack operations required (except ret) Avoid stack Can hold all local information in registers
Local Variables in Stack void swap_a(long *xp, long *yp) { volatile long loc[2]; loc[0] = *xp; loc[1] = *yp; *xp = loc[1]; *yp = loc[0]; } swap_a: movq (%rdi), %rax movq %rax, -24(%rsp) movq (%rsi), %rax movq %rax, -16(%rsp) movq -16(%rsp), %rax movq %rax, (%rdi) movq -24(%rsp), %rax movq %rax, (%rsi) ret ret ptr %rsp -8 unused Avoid Stack Pointer change Can hold all information within small windows beyond stack pointer -16 loc[1] -24 loc[0]
Without Stack Frame No value held while swap being invoked long scount = 0 void swap_b(long a[], int i) { swap(&a[i], &a[i+1]); scount++ } No value held while swap being invoked No callee save registers needed swap_b: movslq %esi,%rsi # sign extend leaq (%rdi,%rsi,8), %rdi # &a[i] leaq 8(%rdi,%rsi,8),%rsi # &a[i+1] call swap # swap() incq scount(%rip) # scount++; ret . . . ret ptr1 ret ptr2 %rsp execute in swap
Call using Jump Directly return from swap long scount = 0 void swap_c(long a[], int i) { swap(&a[i], &a[i+1]); } Directly return from swap Possible since swap is a “tail call “ swap_c: movslq %esi,%rsi # Sign extend leaq (%rdi,%rsi,8), %rdi # &a[i] leaq 8(%rdi, rsi,8), %rsi# &a[i+1] jmp swap # swap() . . . ret ptr1 %rsp execute in swap
Stack Frame Example swap_d: movq %rbx, -16(%rsp) movslq %esi,%rbx movq %r12, -8(%rsp) movq %rdi, %r12 leaq (%rdi,%rbx,8), %rdi subq $16, %rsp leaq 8(%rdi), %rsi call swap movq (%r12,%rbx,8), %rax addq %rax, sum(%rip) movq (%rsp), %rbx movq 8(%rsp), %r12 addq $16, %rsp ret long sum = 0 void swap_d(long a[], int i) { swap(a[i], a[i+1]); sum += a[i]; } Keep values of a and i in callee save registers Must set up stack frame to save these registers
Understanding x86-64 Stack Frame swap_d: movq %rbx, -16(%rsp) movslq %esi,%rbx movq %r12, -8(%rsp) movq %rdi, %r12 leaq (%rdi,%rbx,8), %rdi subq $16, %rsp leaq 8(%rdi), %rsi . . . addq %rax, sum(%rip) movq (%rsp), %rbx movq 8(%rsp), %r12 addq $16, %rsp ret ret ptr %rsp # save %rbx %r12 -8 # save %r12 %rbx -16
Understanding x86-64 Stack Frame swap_d: movq %rbx, -16(%rsp) movslq %esi,%rbx movq %r12, -8(%rsp) movq %rdi, %r12 leaq (%rdi,%rbx,8), %rdi subq $16, %rsp leaq 8(%rdi), %rsi . . . addq %rax, sum(%rip) movq (%rsp), %rbx movq 8(%rsp), %r12 addq $16, %rsp ret ret ptr # save %rbx %r12 +8 # save %r12 %rbx %rsp # move stack frame # restore %rbx # restore %r12
Understanding x86-64 Stack Frame swap_d: movq %rbx, -16(%rsp) movslq %esi,%rbx movq %r12, -8(%rsp) movq %rdi, %r12 leaq (%rdi,%rbx,8), %rdi subq $16, %rsp leaq 8(%rdi), %rsi . . . addq %rax, sum(%rip) movq (%rsp), %rbx movq 8(%rsp), %r12 addq $16, %rsp ret ret ptr %rsp # save %rbx %r12 -8 # save %r12 %rbx -16 # move stack frame # restore %rbx # restore %r12 # move stack frame
Features of Stack Frame Allocate entire frame at once All stack accesses can be relative to %rsp Do by decrementing stack pointer Can delay allocation Simple deallocation Increment stack pointer No base/frame pointer needed
Alignment IA32 x86-64 Bytes Type Alignment char No short 02 1 char No 2 short 02 4 int, float, pointer 002 int, float 8 double 0002(Win) 002(Lin) double, pointer 0002 12/16 long double 00002
Example X86-64 or IA32 Windows: IA32 Linux struct s1 { char c; int i[2]; double d; } *p; X86-64 or IA32 Windows: K = 8; due to double element C 3bytes i[0] i[1] 4bytes d p+0 p+4 p+8 p+16 p+24 IA32 Linux K = 4; double treated like a 4-byte data type C 3bytes i[0] i[1] d p+0 p+4 p+8 p+12 p+20
Byte Ordering IA32 (Little Endian) Output on IA32 0xf0 0xf1 0xf2 0xf3 0xf4 0xf5 0xf6 0xf7 C[0] C[1] C[2] C[3] C[4] C[5] C[6] C[7] S[0] S[1] S[2] S[3] LSB MSB LSB MSB I[0] I[1] LSB MSB L[0] Output on IA32 Characters 0-7 = [0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7] Shorts 0-3 = [0xf1f0,0xf3f2,0xf5f4,0xf7f6] Ints 0-1 = [0xf3f2f1f0,0xf7f6f5f4] Long 0 = [0xf3f2f1f0]
Byte Ordering X86-64 (Little Endian) Output on x86-64 0xf0 0xf1 0xf2 0xf3 0xf4 0xf5 0xf6 0xf7 C[0] C[1] C[2] C[3] C[4] C[5] C[6] C[7] S[0] S[1] S[2] S[3] LSB MSB LSB MSB I[0] I[1] LSB MSB L[0] Output on x86-64 Characters 0-7 = [0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7] Shorts 0-3 = [0xf1f0,0xf3f2,0xf5f4,0xf7f6] Ints 0-1 = [0xf3f2f1f0,0xf7f6f5f4] Long 0 = [0xf7f6f5f4f3f2f1f0]
Floating Point Registers Operations
Background History x87 FP SSE FP AVX FP Legacy, very ugly Supported by Shark machines Special case use of vector instructions AVX FP Newest version Similar to SSE (but registers are 32 bytes instead of 16) Documented in book
x86-64 Media Registers(Floating) %ymm0 %xmm0 %ymm1 %xmm1 %ymm2 %xmm2 %ymm3 %xmm3 %ymm4 %xmm4 %ymm5 %xmm5 … %ymm15 %xmm15 255 127
Programming with SSE3 XMM Registers 16 total, each 16 bytes 16 single-byte integers 8 16-bit integers 4 32-bit integers 4 single-precision floats 2 double-precision floats 1 single-precision float 1 double-precision float
Movement Instructions X: XMM register M32: 32 bit memory M64: 64 bit memory
Conversion Instruction In common usage, source2 and destination are identical
Arithmetic Operation Vector operation has 3 operands Scalar operation is the same as integer
Scalar & SIMD Operations Scalar Operations: Single Precision SIMD Operations: Single Precision Scalar Operations: Double Precision + %xmm0 %xmm1 addss %xmm0,%xmm1 addps %xmm0,%xmm1 addsd %xmm0,%xmm1
Arguments passed in %xmm0, %xmm1, ... Result returned in %xmm0 FP Basics Arguments passed in %xmm0, %xmm1, ... Result returned in %xmm0 All XMM registers caller-saved float fadd(float x, float y) { return x + y; } double dadd(double x, double y) { return x + y; } # x in %xmm0, y in %xmm1 addss %xmm1, %xmm0 ret # x in %xmm0, y in %xmm1 addsd %xmm1, %xmm0 ret
FP Memory Referencing Integer (and pointer) arguments passed in regular registers FP values passed in XMM registers Different mov instructions to move between XMM registers, and between memory and XMM registers double dincr(double *p, double v) { double x = *p; *p = x + v; return x; } # p in %rdi, v in %xmm0 movapd %xmm0, %xmm1 # Copy v movsd (%rdi), %xmm0 # x = *p addsd %xmm0, %xmm1 # t = x + v movsd %xmm1, (%rdi) # *p = t ret
Other Aspects Lots of instructions Floating-point comparisons Different operations, different formats, ... Floating-point comparisons Instructions ucomiss and ucomisd Set condition codes ZF, PF and CF Zeros OF and SF Using constant values Set XMM0 register to 0 with instruction xorpd %xmm0, %xmm0 Others loaded from memory UNORDERED: ZF,PF,CF←111 GREATER_THAN: ZF,PF,CF←000 LESS_THAN: ZF,PF,CF←001 EQUAL: ZF,PF,CF←100 Parity Flag