diff options
author | Jiri Gaisler <jiri@gaisler.se> | 2019-05-22 21:56:59 +0200 |
---|---|---|
committer | Jiri Gaisler <jiri@gaisler.se> | 2019-05-27 10:35:06 +0200 |
commit | 6a742ad589f8a5967cb878e4065f70d93b90fb58 (patch) | |
tree | 7a71ab6385f0f177594ebe0be30f2c65f7e11497 | |
parent | Completed texi manual (diff) | |
download | sis-6a742ad589f8a5967cb878e4065f70d93b90fb58.tar.bz2 |
Add emulated L1 cache to SMP configurations
* Also improve timing accuracy for certain instructions
-rw-r--r-- | exec.c | 26 | ||||
-rw-r--r-- | func.c | 21 | ||||
-rw-r--r-- | riscv.c | 38 | ||||
-rw-r--r-- | riscv.h | 3 | ||||
-rw-r--r-- | sis.c | 3 | ||||
-rw-r--r-- | sis.h | 23 | ||||
-rw-r--r-- | sparc.c | 38 | ||||
-rw-r--r-- | sparc.h | 2 |
8 files changed, 117 insertions, 37 deletions
@@ -175,3 +175,29 @@ init_regs(sregs) sregs[i].bphit = 0; } } + +void +l1data_snoop(uint32 address, uint32 cpu) +{ + int i; + for (i=0; i<ncpu; i++) { + if (sregs[i].l1dtags[(address >> L1DLINEBITS) & L1DMASK] == (address >> L1DLINEBITS)) { + if (cpu != i) { + sregs[i].l1dtags[(address >> L1DLINEBITS) & L1DMASK] = 0; +// printf("l1 snoop hit : 0x%08X, %d %d\n", address, cpu, i); + } + } + } +} + +void +l1data_update(uint32 address, uint32 cpu) +{ + if (sregs[cpu].l1dtags[address >> L1DLINEBITS & L1DMASK] != (address >> L1DLINEBITS)) + { + sregs[cpu].l1dtags[(address >> L1DLINEBITS) & L1DMASK] = (address >> L1DLINEBITS); + sregs[cpu].hold += 17; + sregs[cpu].l1dmiss++; + } +} + @@ -505,6 +505,8 @@ reset_stat(sregs) sregs->nload = 0; sregs->nbranch = 0; ebase.simstart = ebase.simtime; + sregs->l1imiss = 0; + sregs->l1dmiss = 0; } @@ -542,7 +544,7 @@ show_stat(sregs) printf(" Simulator perf. : %.2f MIPS\n", (double)(ninst / ebase.tottime / 1E6)); printf(" Wall time : %.2f s\n\n", ebase.tottime); - printf (" Core MIPS MFLOPS CPI Util\n"); + printf (" Core MIPS MFLOPS CPI Util IHit DHit\n"); for (i=0; i<ncpu; i++) { #ifdef STAT iinst = sregs[i].ninst - sregs[i].finst - sregs[i].nload - sregs[i].nstore - @@ -550,12 +552,17 @@ show_stat(sregs) #endif stime = sregs[i].simtime - ebase.simstart + 1; /* Core simulated time */ - printf (" %d %5.2f %5.2f %5.2f %5.2f %%\n", i, + printf (" %d %5.2f %5.2f %5.2f %5.2f%% %5.2f%% %5.2f%%\n", i, ebase.freq * (double) (sregs[i].ninst - sregs[i].finst) / (double) (stime - sregs[i].pwdtime), ebase.freq * (double) sregs[i].finst / (double) (stime - sregs[i].pwdtime), (double) (stime - sregs[i].pwdtime) / (double) (sregs[i].ninst + 1), - 100.0 * (1.0 - ((double) sregs[i].pwdtime / (double) stime))); + 100.0 * (1.0 - ((double) sregs[i].pwdtime / (double) stime)), + (double) (sregs[i].ninst - sregs[i].l1imiss + 1) / + (double) (sregs[i].ninst + 1) * 100.0, + (double) (sregs[i].nload + sregs[i].nstore - sregs[i].l1dmiss + 1) / + (double) (sregs[i].nload + sregs[i].nstore + 1) * 100.0 + ) ; } #ifdef STAT @@ -1065,6 +1072,11 @@ run_sim_core(sregs, ntime, deb, dis) sregs->inst = *((uint32 *) &ramb[sregs->pc & RAM_MASK]); sregs->hold = 0; #endif + if (sregs->l1itags[(sregs->pc >> L1ILINEBITS) & L1IMASK] != (sregs->pc >> L1ILINEBITS)) { + sregs->hold = 17; + sregs->l1itags[(sregs->pc >> L1ILINEBITS) & L1IMASK] = (sregs->pc >> L1ILINEBITS); + sregs->l1imiss++; + } sregs->fhold = 0; if (!irq) { if (mexc) { @@ -1143,7 +1155,7 @@ run_sim_mp(icount, dis) int dis; { uint64 ntime, etime; - int deb, i; + int deb, i, j; int err_mode, bphit, wphit, oldcpu; err_mode = bphit = wphit = 0; @@ -1157,6 +1169,7 @@ run_sim_mp(icount, dis) while (icount > ebase.simtime) { ntime = ebase.simtime + delta; if (ntime > icount) ntime = icount; + if (ntime > ebase.evtime) ntime = ebase.evtime; for(i=0; i<ncpu; i++) { deb = dis || ebase.histlen || ebase.bptnum; etime = ntime; @@ -819,36 +819,44 @@ riscv_dispatch_instruction (sregs) sop2 = op2; sop2 = op1 * op2; sregs->g[rd] = sop2; + sregs->icnt = T_MUL; break; case 1: /* MULH */ sop64a = (int64) op1 *(int64) op2; sregs->g[rd] = (sop64a >> 32) & 0xffffffff; + sregs->icnt = T_MUL; break; case 2: /* MULHSU */ sop64a = (int64) op1 *(uint64) op2; sregs->g[rd] = (sop64a >> 32) & 0xffffffff; + sregs->icnt = T_MUL; break; case 3: /* MULHU */ op64a = (uint64) op1 *(uint64) op2; sregs->g[rd] = (op64a >> 32) & 0xffffffff; + sregs->icnt = T_MUL; break; case 4: /* DIV */ sop1 = op1; sop2 = op2; result = sop1 / sop2; sregs->g[rd] = result; + sregs->icnt = T_DIV; break; case 5: /* DIVU */ sregs->g[rd] = op1 / op2; + sregs->icnt = T_DIV; break; case 6: /* REM */ sop1 = op1; sop2 = op2; sop1 = sop1 % sop2; sregs->g[rd] = sop1; + sregs->icnt = T_DIV; break; case 7: /* REMU */ sregs->g[rd] = op1 % op2; + sregs->icnt = T_DIV; break; } break; @@ -858,9 +866,7 @@ riscv_dispatch_instruction (sregs) break; case OP_STORE: /* store instructions */ -#ifdef STAT sregs->nstore++; -#endif offset = EXTRACT_STYPE_IMM (sregs->inst); address = op1 + offset; wdata = &(sregs->g[rs2]); @@ -918,12 +924,15 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + l1data_snoop(address, sregs->cpu); + } break; case OP_FSW: /* F store instructions */ -#ifdef STAT sregs->nstore++; -#endif offset = EXTRACT_STYPE_IMM (sregs->inst); address = op1 + offset; wdata = &sregs->fsi[rs2 << 1]; @@ -972,11 +981,14 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + l1data_snoop(address, sregs->cpu); + } break; case OP_LOAD: /* load instructions */ -#ifdef STAT sregs->nload++; -#endif offset = EXTRACT_ITYPE_IMM (sregs->inst); address = op1 + offset; if (ebase.wprnum) @@ -1080,14 +1092,17 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + } break; case OP_AMO: /* atomic instructions */ address = op1; funct5 = (sregs->inst >> 27) & 0x1f; -#ifdef STAT sregs->nstore++; sregs->nload++; -#endif + sregs->icnt = T_AMO; switch (funct5) { case LRQ: @@ -1302,9 +1317,7 @@ riscv_dispatch_instruction (sregs) } break; case OP_FLOAD: /* float load instructions */ -#ifdef STAT sregs->nload++; -#endif offset = EXTRACT_ITYPE_IMM (sregs->inst); address = op1 + offset; if (ebase.wprnum) @@ -1367,6 +1380,10 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + } break; #ifdef FPU_ENABLED case OP_FPU: @@ -1841,6 +1858,7 @@ riscv_dispatch_instruction (sregs) break; #endif case OP_FENCE: + sregs->icnt = TRAP_C; break; default: sregs->trap = TRAP_ILLEG; @@ -26,6 +26,9 @@ #define FPU_D_ENABLED #define T_JALR 2 #define T_BMISS 2 +#define T_MUL 8 +#define T_DIV 35 +#define T_AMO 5 #define TRAP_IEXC 1 #define TRAP_ILLEG 2 @@ -184,6 +184,9 @@ main(argc, argv) if (!freq) freq = 14; } + if (ncpu > 1) + printf(" L1 cache: %dK/%dK, %d bytes/line \n", + (1 << (L1IBITS - 10)), (1 << (L1DBITS - 10)), (1 << L1ILINEBITS)); if (nfp) printf(" FPU disabled\n"); ebase.freq = freq; @@ -46,6 +46,19 @@ #define RAM_MASK (RAM_SIZE - 1) #define RAM_END (RAM_START + RAM_SIZE) +/* cache config */ + +#define L1IBITS 12 +#define L1ILINEBITS 5 +#define L1ITAGBITS (L1IBITS - L1ILINEBITS) +#define L1ITAGS (1 << (L1ITAGBITS)) +#define L1IMASK (L1ITAGS -1) +#define L1DBITS 12 +#define L1DLINEBITS 5 +#define L1DTAGBITS (L1DBITS - L1DLINEBITS) +#define L1DTAGS (1 << (L1DTAGBITS)) +#define L1DMASK (L1DTAGS -1) + /* type definitions */ typedef short int int16; /* 16-bit signed int */ @@ -113,8 +126,8 @@ struct pstate { uint64 finst; uint64 pwdtime; /* Cycles in power-down mode */ uint64 pwdstart; /* Start of power-down mode */ - uint64 nstore; /* Number of load instructions */ - uint64 nload; /* Number of store instructions */ + uint64 nstore; /* Number of store instructions */ + uint64 nload; /* Number of load instructions */ uint64 nannul; /* Number of annuled instructions */ uint64 nbranch; /* Number of branch instructions */ uint32 ildreg; /* Destination of last load instruction */ @@ -143,6 +156,10 @@ struct pstate { uint32 lrqa; uint32 bphit; + uint32 l1itags[L1ITAGS]; + uint64 l1imiss; + uint32 l1dtags[L1DTAGS]; + uint64 l1dmiss; }; struct evcell { @@ -291,6 +308,8 @@ extern int port; extern int sim_run; extern void int_handler(int sig); extern uint32 daddr; +extern void l1data_update(uint32 address, uint32 cpu); +extern void l1data_snoop(uint32 address, uint32 cpu); /* exec.c */ extern void init_regs (struct pstate *sregs); @@ -528,6 +528,7 @@ sparc_dispatch_instruction (sregs) case SMUL: { mul64 (rs1, operand2, &sregs->y, rdd, 1); + sregs->icnt = T_MUL; } break; case SMULCC: @@ -547,11 +548,13 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~PSR_Z; *rdd = result; + sregs->icnt = T_MUL; } break; case UMUL: { mul64 (rs1, operand2, &sregs->y, rdd, 0); + sregs->icnt = T_MUL; } break; case UMULCC: @@ -571,6 +574,7 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~PSR_Z; *rdd = result; + sregs->icnt = T_MUL; } break; case SDIV: @@ -582,6 +586,7 @@ sparc_dispatch_instruction (sregs) } div64 (sregs->y, rs1, operand2, rdd, 1); + sregs->icnt = T_DIV; } break; case SDIVCC: @@ -610,6 +615,7 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~(PSR_C | PSR_V); *rdd = result; + sregs->icnt = T_DIV; } break; case UDIV: @@ -621,6 +627,7 @@ sparc_dispatch_instruction (sregs) } div64 (sregs->y, rs1, operand2, rdd, 0); + sregs->icnt = T_DIV; } break; case UDIVCC: @@ -649,6 +656,7 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~(PSR_C | PSR_V); *rdd = result; + sregs->icnt = T_DIV; } break; case IXNOR: @@ -961,9 +969,7 @@ sparc_dispatch_instruction (sregs) break; } } -#ifdef STAT sregs->nstore++; -#endif } else { @@ -976,9 +982,7 @@ sparc_dispatch_instruction (sregs) break; } } -#ifdef STAT sregs->nload++; -#endif } /* Decode load/store instructions */ @@ -1015,9 +1019,7 @@ sparc_dispatch_instruction (sregs) { rdd[0] = ddata[0]; rdd[1] = ddata[1]; -#ifdef STAT sregs->nload++; /* Double load counts twice */ -#endif } break; @@ -1076,9 +1078,7 @@ sparc_dispatch_instruction (sregs) { sregs->trap = TRAP_DEXC; } -#ifdef STAT sregs->nload++; -#endif break; case LDSBA: case LDUBA: @@ -1192,9 +1192,7 @@ sparc_dispatch_instruction (sregs) rd ^= 1; #endif sregs->fsi[rd] = ddata[0]; -#ifdef STAT sregs->nload++; /* Double load counts twice */ -#endif rd ^= 1; sregs->fsi[rd] = ddata[1]; sregs->ltime = sregs->simtime + sregs->icnt + FLSTHOLD + @@ -1311,9 +1309,7 @@ sparc_dispatch_instruction (sregs) mexc = ms->memory_write (address, rdd, 3, &ws); sregs->hold += ws; sregs->icnt = T_STD; -#ifdef STAT sregs->nstore++; /* Double store counts twice */ -#endif if (mexc) { sregs->trap = TRAP_DEXC; @@ -1345,9 +1341,7 @@ sparc_dispatch_instruction (sregs) mexc = ms->memory_write (address, rdd, 3, &ws); sregs->hold += ws; sregs->icnt = T_STD; -#ifdef STAT sregs->nstore++; /* Double store counts twice */ -#endif if (mexc) { sregs->trap = TRAP_DEXC; @@ -1428,9 +1422,7 @@ sparc_dispatch_instruction (sregs) mexc = ms->memory_write (address, ddata, 3, &ws); sregs->hold += ws; sregs->icnt = T_STD; -#ifdef STAT sregs->nstore++; /* Double store counts twice */ -#endif if (mexc) { sregs->trap = TRAP_DEXC; @@ -1462,9 +1454,7 @@ sparc_dispatch_instruction (sregs) } else *rdd = data; -#ifdef STAT sregs->nload++; -#endif break; case CASA: asi = (sregs->inst >> 5) & 0x0ff; @@ -1497,9 +1487,7 @@ sparc_dispatch_instruction (sregs) } else *rdd = data; -#ifdef STAT sregs->nload++; -#endif break; default: @@ -1518,6 +1506,14 @@ sparc_dispatch_instruction (sregs) * last */ } #endif + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + if (op3 & 4) + { + l1data_snoop(address, sregs->cpu); + } + } break; default: @@ -1899,7 +1895,7 @@ sparc_execute_trap (sregs) sregs->npc = sregs->pc + 4; } - /* Increase simulator time */ + /* Increase simulator time and add some jitter */ sregs->icnt = TRAP_C; } @@ -14,6 +14,8 @@ #define T_LDST 4 #define T_JMPL 2 #define T_RETT 2 +#define T_MUL 5 +#define T_DIV 35 #define FSR_QNE 0x2000 #define FP_EXE_MODE 0 |