순천향대학교 전산학과 2002. 10. 8 홍종국(siro1hope@hanmail.net) SimpleScalar 예제 프로그램 순천향대학교 전산학과 2002. 10. 8 홍종국(siro1hope@hanmail.net)
C 예제 File facctorial.c #include <stdio.h> int factorial(int val); main() { int inval=3, outval; outval = factorial(inval); printf(“%d factorial = %d\n”, inval, outval); } 컴퓨터구조특론 I
C 예제 File int factorial(int val) { if(val == 1) return 1; else return (val * factorial(val-1)); } 컴퓨터구조특론 I
컴파일 .file 1 "factorial.c" # GNU C 2.6.3 [AL 1.1, MM 40, tma 0.1] SimpleScalar running sstrix compiled by GNU C # Cc1 defaults: # -mgas -mgpOPT # Cc1 arguments (-G value = 8, Cpu = default, ISA = 1): # -quiet -dumpbase -o gcc2_compiled.: __gnu_compiled_c: .rdata .align 2 $LC0: .ascii "%d factorial = %d\n\000" .text .globl factorial .loc 1 5 컴퓨터구조특론 I
.ent main main: .frame $fp,32,$31 # vars= 8, regs= 2/0, args= 16, extra= 0 .mask 0xc0000000,-4 .fmask 0x00000000,0 subu $sp,$sp,32 sw $31,28($sp) sw $fp,24($sp) move $fp,$sp jal __main li $2,0x00000003 # 3 sw $2,16($fp) lw $4,16($fp) jal factorial sw $2,20($fp) la $4,$LC0 lw $5,16($fp) lw $6,20($fp) jal printf 컴퓨터구조특론 I
$L1: move $sp,$fp # sp not trusted here lw $31,28($sp) lw $fp,24($sp) addu $sp,$sp,32 j $31 .end main .loc 1 12 .ent factorial factorial: .frame $fp,24,$31 # vars= 0, regs= 2/0, args= 16, extra= 0 .mask 0xc0000000,-4 .fmask 0x00000000,0 subu $sp,$sp,24 sw $31,20($sp) sw $fp,16($sp) move $fp,$sp sw $4,24($fp) lw $2,24($fp) 컴퓨터구조특론 I
li $3,0x00000001 # 1 bne $2,$3,$L3 li $2,0x00000001 # 1 j $L2 j $L4 $L3: lw $3,24($fp) subu $2,$3,1 move $4,$2 jal factorial lw $4,24($fp) mult $2,$4 mflo $3 move $2,$3 $L4: $L2: move $sp,$fp # sp not trusted here lw $31,20($sp) lw $fp,16($sp) addu $sp,$sp,24 j $31 .end factoria 컴퓨터구조특론 I
Sim-safe 수행 ☞ /edu/simplesim-3.0/sim-saf factorial.ss sim-safe: SimpleScalar/PISA Tool Set version 3.0 of November, 2000. ….. # -config # load configuration from a file # -dumpconfig # dump configuration to a file # -h false # print help message # -v false # verbose operation # -d false # enable debug message # -i false # start in Dlite debugger -seed 1 # random number generator seed (0 for timer seed) # -q false # initialize and terminate immediately # -chkpt <null> # restore EIO trace execution from <fname> # -redir:sim <null> # redirect simulator output to file (non-interactive only) # -redir:prog <null> # redirect simulated program output to file -nice 0 # simulator scheduling priority -max:inst 0 # maximum number of inst's to execute 컴퓨터구조특론 I
sim: ** starting functional simulation ** 3 factorial = 6 sim: ** simulation statistics ** sim_num_insn 2755 # total number of instructions executed sim_num_refs 911 # total number of loads and stores executed sim_elapsed_time 1 # total simulation time in seconds sim_inst_rate 2755.0000 # simulation speed (in insts/sec) ld_text_base 0x00400000 # program text (code) segment base ld_text_size 70368 # program text (code) size in bytes ld_data_base 0x10000000 # program initialized data segment base ld_data_size 8192 # program init'ed `.data' and uninit'ed `.bss' size in bytes ld_stack_base 0x7fffc000 # program stack segment base (highest address in stack) ld_stack_size 16384 # program initial stack size ld_prog_entry 0x00400140 # program entry point (initial PC) ld_environ_base 0x7fff8000 # program environment base address address ld_target_big_endian 0 # target executable endian-ness, non-zero if big endian mem.page_count 22 # total number of pages allocated mem.page_mem 88k # total size of memory pages allocated mem.ptab_misses 22 # total first level page table misses mem.ptab_accesses 448440 # total page table accesses mem.ptab_miss_rate 0.0000 # first level page table miss rate 컴퓨터구조특론 I
sim-outorder 수행 sim-outorder: SimpleScalar/PISA Tool Set version 3.0 of November, 2000. … # -config # load configuration from a file # -dumpconfig # dump configuration to a file # -h false # print help message # -v false # verbose operation # -d false # enable debug message # -i false # start in Dlite debugger -seed 1 # random number generator seed (0 for timer seed) # -q false # initialize and terminate immediately # -chkpt <null> # restore EIO trace execution from <fname> # -redir:sim <null> # redirect simulator output to file (non-interactive only) # -redir:prog <null> # redirect simulated program output to file -nice 0 # simulator scheduling priority -max:inst 0 # maximum number of inst's to execute -fastfwd 0 # number of insts skipped before timing starts # -ptrace <null> # generate pipetrace, i.e., <fname|stdout|stderr> <range> -fetch:ifqsize 4 # instruction fetch queue size (in insts) -fetch:mplat 3 # extra branch mis-prediction latency -fetch:speed 1 # speed of front-end of machine relative to execution core 컴퓨터구조특론 I
sim: ** starting performance simulation ** 3 factorial = 6 sim: ** simulation statistics ** sim_num_insn 2755 # total number of instructions committed sim_num_refs 911 # total number of loads and stores committed sim_num_loads 538 # total number of loads committed sim_num_stores 373.0000 # total number of stores committed sim_num_branches 631 # total number of branches committed sim_elapsed_time 1 # total simulation time in seconds sim_inst_rate 2755.0000 # simulation speed (in insts/sec) sim_total_insn 3430 # total number of instructions executed sim_total_refs 1081 # total number of loads and stores executed sim_total_loads 653 # total number of loads executed sim_total_stores 428.0000 # total number of stores executed sim_total_branches 714 # total number of branches executed sim_cycle 10419 # total simulation time in cycles sim_IPC 0.2644 # instructions per cycle sim_CPI 3.7819 # cycles per instruction sim_exec_BW 0.3292 # total instructions (mis-spec + committed) per cycle sim_IPB 4.3661 # instruction per branch IFQ_count 4772 # cumulative IFQ occupancy IFQ_fcount 933 # cumulative IFQ full count ifq_occupancy 0.4580 # avg IFQ occupancy (insn's) 컴퓨터구조특론 I
ifq_rate 0.3292 # avg IFQ dispatch rate (insn/cycle) ifq_latency 1.3913 # avg IFQ occupant latency (cycle's) ifq_full 0.0895 # fraction of time (cycle's) IFQ was full RUU_count 18776 # cumulative RUU occupancy RUU_fcount 402 # cumulative RUU full count ruu_occupancy 1.8021 # avg RUU occupancy (insn's) ruu_rate 0.3292 # avg RUU dispatch rate (insn/cycle) ruu_latency 5.4741 # avg RUU occupant latency (cycle's) ruu_full 0.0386 # fraction of time (cycle's) RUU was full LSQ_count 5617 # cumulative LSQ occupancy LSQ_fcount 68 # cumulative LSQ full count lsq_occupancy 0.5391 # avg LSQ occupancy (insn's) lsq_rate 0.3292 # avg LSQ dispatch rate (insn/cycle) lsq_latency 1.6376 # avg LSQ occupant latency (cycle's) lsq_full 0.0065 # fraction of time (cycle's) LSQ was full sim_slip 25615 # total number of slip cycles avg_sim_slip 9.2976 # the average slip between issue and retirement bpred_bimod.lookups 740 # total number of bpred lookups bpred_bimod.updates 631 # total number of updates bpred_bimod.addr_hits 383 # total number of address-predicted hits bpred_bimod.dir_hits 496 # total number of direction-predicted hits (includes addr-hits) bpred_bimod.misses 135 # total number of misses bpred_bimod.jr_hits 54 # total number of address-predicted hits for JR's 컴퓨터구조특론 I
bpred_bimod.jr_seen 61 # total number of JR's seen bpred_bimod.jr_non_ras_hits.PP 0 # total number of address-predicted hits for non-RAS JR's bpred_bimod.jr_non_ras_seen.PP 2 # total number of non-RAS JR's seen bpred_bimod.bpred_addr_rate 0.6070 # branch address-prediction rate (i.e., addr-hits/updates) bpred_bimod.bpred_dir_rate 0.7861 # branch direction-prediction rate (i.e., all-hits/updates) bpred_bimod.bpred_jr_rate 0.8852 # JR address-prediction rate (i.e., JR addr-hits/JRs seen) bpred_bimod.bpred_jr_non_ras_rate.PP 0.0000 # non-RAS JR addr-pred rate (ie, non-RAS JR hits/JRs seen) bpred_bimod.retstack_pushes 75 # total number of address pushed onto ret-addr stack bpred_bimod.retstack_pops 63 # total number of address popped off of ret-addr stack bpred_bimod.used_ras.PP 59 # total number of RAS predictions used bpred_bimod.ras_hits.PP 54 # total number of RAS hits bpred_bimod.ras_rate.PP 0.9153 # RAS prediction rate (i.e., RAS hits/used RAS) il1.accesses 3812 # total number of accesses il1.hits 3364 # total number of hits il1.misses 448 # total number of misses il1.replacements 181 # total number of replacements il1.writebacks 0 # total number of writebacks il1.invalidations 0 # total number of invalidations il1.miss_rate 0.1175 # miss rate (i.e., misses/ref) il1.repl_rate 0.0475 # replacement rate (i.e., repls/ref) 컴퓨터구조특론 I
bpred_bimod.jr_seen 61 # total number of JR's seen bpred_bimod.jr_non_ras_hits.PP 0 # total number of address-predicted hits for non-RAS JR's bpred_bimod.jr_non_ras_seen.PP 2 # total number of non-RAS JR's seen bpred_bimod.bpred_addr_rate 0.6070 # branch address-prediction rate (i.e., addr-hits/updates) bpred_bimod.bpred_dir_rate 0.7861 # branch direction-prediction rate (i.e., all-hits/updates) bpred_bimod.bpred_jr_rate 0.8852 # JR address-prediction rate (i.e., JR addr-hits/JRs seen) bpred_bimod.bpred_jr_non_ras_rate.PP 0.0000 # non-RAS JR addr-pred rate (ie, non-RAS JR hits/JRs seen) bpred_bimod.retstack_pushes 75 # total number of address pushed onto ret-addr stack bpred_bimod.retstack_pops 63 # total number of address popped off of ret-addr stack bpred_bimod.used_ras.PP 59 # total number of RAS predictions used bpred_bimod.ras_hits.PP 54 # total number of RAS hits bpred_bimod.ras_rate.PP 0.9153 # RAS prediction rate (i.e., RAS hits/used RAS) il1.accesses 3812 # total number of accesses il1.hits 3364 # total number of hits il1.misses 448 # total number of misses il1.replacements 181 # total number of replacements il1.writebacks 0 # total number of writebacks il1.invalidations 0 # total number of invalidations il1.miss_rate 0.1175 # miss rate (i.e., misses/ref) il1.repl_rate 0.0475 # replacement rate (i.e., repls/ref) 컴퓨터구조특론 I
il1.wb_rate 0.0000 # writeback rate (i.e., wrbks/ref) il1.inv_rate 0.0000 # invalidation rate (i.e., invs/ref) dl1.accesses 938 # total number of accesses dl1.hits 898 # total number of hits dl1.misses 40 # total number of misses dl1.replacements 0 # total number of replacements dl1.writebacks 0 # total number of writebacks dl1.invalidations 0 # total number of invalidations dl1.miss_rate 0.0426 # miss rate (i.e., misses/ref) dl1.repl_rate 0.0000 # replacement rate (i.e., repls/ref) dl1.wb_rate 0.0000 # writeback rate (i.e., wrbks/ref) dl1.inv_rate 0.0000 # invalidation rate (i.e., invs/ref) ul2.accesses 488 # total number of accesses ul2.hits 226 # total number of hits ul2.misses 262 # total number of misses ul2.replacements 0 # total number of replacements ul2.writebacks 0 # total number of writebacks ul2.invalidations 0 # total number of invalidations ul2.miss_rate 0.5369 # miss rate (i.e., misses/ref) ul2.repl_rate 0.0000 # replacement rate (i.e., repls/ref) ul2.wb_rate 0.0000 # writeback rate (i.e., wrbks/ref) ul2.inv_rate 0.0000 # invalidation rate (i.e., invs/ref) itlb.accesses 3812 # total number of accesses itlb.hits 3800 # total number of hits itlb.misses 12 # total number of misses itlb.replacements 0 # total number of replacements 컴퓨터구조특론 I
itlb.writebacks 0 # total number of writebacks itlb.invalidations 0 # total number of invalidations itlb.miss_rate 0.0031 # miss rate (i.e., misses/ref) itlb.repl_rate 0.0000 # replacement rate (i.e., repls/ref) itlb.wb_rate 0.0000 # writeback rate (i.e., wrbks/ref) itlb.inv_rate 0.0000 # invalidation rate (i.e., invs/ref) dtlb.accesses 948 # total number of accesses dtlb.hits 944 # total number of hits dtlb.misses 4 # total number of misses dtlb.replacements 0 # total number of replacements dtlb.writebacks 0 # total number of writebacks dtlb.invalidations 0 # total number of invalidations dtlb.miss_rate 0.0042 # miss rate (i.e., misses/ref) dtlb.repl_rate 0.0000 # replacement rate (i.e., repls/ref) dtlb.wb_rate 0.0000 # writeback rate (i.e., wrbks/ref) dtlb.inv_rate 0.0000 # invalidation rate (i.e., invs/ref) sim_invalid_addrs 0 # total non-speculative bogus addresses seen (debug var) ld_text_base 0x00400000 # program text (code) segment base ld_text_size 70368 # program text (code) size in bytes ld_data_base 0x10000000 # program initialized data segment base ld_data_size 8192 # program init'ed `.data' and uninit'ed `.bss' size in bytes ld_stack_base 0x7fffc000 # program stack segment base (highest address in stack) 컴퓨터구조특론 I
ld_stack_size 16384 # program initial stack size ld_prog_entry 0x00400140 # program entry point (initial PC) ld_environ_base 0x7fff8000 # program environment base address address ld_target_big_endian 0 # target executable endian-ness, non-zero if big endian mem.page_count 22 # total number of pages allocated mem.page_mem 88k # total size of memory pages allocated mem.ptab_misses 22 # total first level page table misses mem.ptab_accesses 457684 # total page table accesses mem.ptab_miss_rate 0.0000 # first level page table miss rate 컴퓨터구조특론 I
Dlite! 디버거 사용 – dlitefactorial.txt 참조 sim: ** starting functional simulation ** [ 0] 0x00400140: lw r16,0(r29) DLite! > 0x004001f0: addiu r29,r29,-32 0x004001f8: sw r31,28(r29) 0x00400200: sw r30,24(r29) 0x00400208: addu r30,r0,r29 0x00400210: jal 0x400508 0x00400218: addiu r2,r0,3 0x00400220: sw r2,16(r30) 0x00400228: lw r4,16(r30) 0x00400230: jal 0x400290 0x00400238: sw r2,20(r30) 0x00400240: lui r4,0x1000 0x00400248: addiu r4,r4,0 0x00400250: lw r5,16(r30) 0x00400258: lw r6,20(r30) 0x00400260: jal 0x400730 0x00400268: addu r29,r0,r30 컴퓨터구조특론 I
DLite! > 0x00400290: addiu r29,r29,-24 0x00400298: sw r31,20(r29) 0x004002a0: sw r30,16(r29) 0x004002a8: addu r30,r0,r29 0x004002b0: sw r4,24(r30) 0x004002b8: lw r2,24(r30) 0x004002c0: addiu r3,r0,1 0x004002c8: bne r2,r3,0x4002e8 0x004002d0: addiu r2,r0,1 0x004002d8: j 0x400330 0x004002e0: j 0x400330 0x004002e8: lw r3,24(r30) 0x004002f0: addiu r2,r3,-1 0x004002f8: addu r4,r0,r2 0x00400300: jal 0x400290 0x00400308: lw r4,24(r30) DLite! > 3 factorial = 6 컴퓨터구조특론 I