From 6a742ad589f8a5967cb878e4065f70d93b90fb58 Mon Sep 17 00:00:00 2001 From: Jiri Gaisler Date: Wed, 22 May 2019 21:56:59 +0200 Subject: Add emulated L1 cache to SMP configurations * Also improve timing accuracy for certain instructions --- exec.c | 26 ++++++++++++++++++++++++++ func.c | 21 +++++++++++++++++---- riscv.c | 38 ++++++++++++++++++++++++++++---------- riscv.h | 3 +++ sis.c | 3 +++ sis.h | 23 +++++++++++++++++++++-- sparc.c | 38 +++++++++++++++++--------------------- sparc.h | 2 ++ 8 files changed, 117 insertions(+), 37 deletions(-) diff --git a/exec.c b/exec.c index 67a16f5..433ab0d 100644 --- a/exec.c +++ b/exec.c @@ -175,3 +175,29 @@ init_regs(sregs) sregs[i].bphit = 0; } } + +void +l1data_snoop(uint32 address, uint32 cpu) +{ + int i; + for (i=0; i> L1DLINEBITS) & L1DMASK] == (address >> L1DLINEBITS)) { + if (cpu != i) { + sregs[i].l1dtags[(address >> L1DLINEBITS) & L1DMASK] = 0; +// printf("l1 snoop hit : 0x%08X, %d %d\n", address, cpu, i); + } + } + } +} + +void +l1data_update(uint32 address, uint32 cpu) +{ + if (sregs[cpu].l1dtags[address >> L1DLINEBITS & L1DMASK] != (address >> L1DLINEBITS)) + { + sregs[cpu].l1dtags[(address >> L1DLINEBITS) & L1DMASK] = (address >> L1DLINEBITS); + sregs[cpu].hold += 17; + sregs[cpu].l1dmiss++; + } +} + diff --git a/func.c b/func.c index 5269e0c..23c3481 100644 --- a/func.c +++ b/func.c @@ -505,6 +505,8 @@ reset_stat(sregs) sregs->nload = 0; sregs->nbranch = 0; ebase.simstart = ebase.simtime; + sregs->l1imiss = 0; + sregs->l1dmiss = 0; } @@ -542,7 +544,7 @@ show_stat(sregs) printf(" Simulator perf. : %.2f MIPS\n", (double)(ninst / ebase.tottime / 1E6)); printf(" Wall time : %.2f s\n\n", ebase.tottime); - printf (" Core MIPS MFLOPS CPI Util\n"); + printf (" Core MIPS MFLOPS CPI Util IHit DHit\n"); for (i=0; iinst = *((uint32 *) &ramb[sregs->pc & RAM_MASK]); sregs->hold = 0; #endif + if (sregs->l1itags[(sregs->pc >> L1ILINEBITS) & L1IMASK] != (sregs->pc >> L1ILINEBITS)) { + sregs->hold = 17; + sregs->l1itags[(sregs->pc >> L1ILINEBITS) & L1IMASK] = (sregs->pc >> L1ILINEBITS); + sregs->l1imiss++; + } sregs->fhold = 0; if (!irq) { if (mexc) { @@ -1143,7 +1155,7 @@ run_sim_mp(icount, dis) int dis; { uint64 ntime, etime; - int deb, i; + int deb, i, j; int err_mode, bphit, wphit, oldcpu; err_mode = bphit = wphit = 0; @@ -1157,6 +1169,7 @@ run_sim_mp(icount, dis) while (icount > ebase.simtime) { ntime = ebase.simtime + delta; if (ntime > icount) ntime = icount; + if (ntime > ebase.evtime) ntime = ebase.evtime; for(i=0; ig[rd] = sop2; + sregs->icnt = T_MUL; break; case 1: /* MULH */ sop64a = (int64) op1 *(int64) op2; sregs->g[rd] = (sop64a >> 32) & 0xffffffff; + sregs->icnt = T_MUL; break; case 2: /* MULHSU */ sop64a = (int64) op1 *(uint64) op2; sregs->g[rd] = (sop64a >> 32) & 0xffffffff; + sregs->icnt = T_MUL; break; case 3: /* MULHU */ op64a = (uint64) op1 *(uint64) op2; sregs->g[rd] = (op64a >> 32) & 0xffffffff; + sregs->icnt = T_MUL; break; case 4: /* DIV */ sop1 = op1; sop2 = op2; result = sop1 / sop2; sregs->g[rd] = result; + sregs->icnt = T_DIV; break; case 5: /* DIVU */ sregs->g[rd] = op1 / op2; + sregs->icnt = T_DIV; break; case 6: /* REM */ sop1 = op1; sop2 = op2; sop1 = sop1 % sop2; sregs->g[rd] = sop1; + sregs->icnt = T_DIV; break; case 7: /* REMU */ sregs->g[rd] = op1 % op2; + sregs->icnt = T_DIV; break; } break; @@ -858,9 +866,7 @@ riscv_dispatch_instruction (sregs) break; case OP_STORE: /* store instructions */ -#ifdef STAT sregs->nstore++; -#endif offset = EXTRACT_STYPE_IMM (sregs->inst); address = op1 + offset; wdata = &(sregs->g[rs2]); @@ -918,12 +924,15 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + l1data_snoop(address, sregs->cpu); + } break; case OP_FSW: /* F store instructions */ -#ifdef STAT sregs->nstore++; -#endif offset = EXTRACT_STYPE_IMM (sregs->inst); address = op1 + offset; wdata = &sregs->fsi[rs2 << 1]; @@ -972,11 +981,14 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + l1data_snoop(address, sregs->cpu); + } break; case OP_LOAD: /* load instructions */ -#ifdef STAT sregs->nload++; -#endif offset = EXTRACT_ITYPE_IMM (sregs->inst); address = op1 + offset; if (ebase.wprnum) @@ -1080,14 +1092,17 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + } break; case OP_AMO: /* atomic instructions */ address = op1; funct5 = (sregs->inst >> 27) & 0x1f; -#ifdef STAT sregs->nstore++; sregs->nload++; -#endif + sregs->icnt = T_AMO; switch (funct5) { case LRQ: @@ -1302,9 +1317,7 @@ riscv_dispatch_instruction (sregs) } break; case OP_FLOAD: /* float load instructions */ -#ifdef STAT sregs->nload++; -#endif offset = EXTRACT_ITYPE_IMM (sregs->inst); address = op1 + offset; if (ebase.wprnum) @@ -1367,6 +1380,10 @@ riscv_dispatch_instruction (sregs) default: sregs->trap = TRAP_ILLEG; } + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + } break; #ifdef FPU_ENABLED case OP_FPU: @@ -1841,6 +1858,7 @@ riscv_dispatch_instruction (sregs) break; #endif case OP_FENCE: + sregs->icnt = TRAP_C; break; default: sregs->trap = TRAP_ILLEG; diff --git a/riscv.h b/riscv.h index e39cddb..ff4eb1c 100644 --- a/riscv.h +++ b/riscv.h @@ -26,6 +26,9 @@ #define FPU_D_ENABLED #define T_JALR 2 #define T_BMISS 2 +#define T_MUL 8 +#define T_DIV 35 +#define T_AMO 5 #define TRAP_IEXC 1 #define TRAP_ILLEG 2 diff --git a/sis.c b/sis.c index 410551b..a1411ae 100644 --- a/sis.c +++ b/sis.c @@ -184,6 +184,9 @@ main(argc, argv) if (!freq) freq = 14; } + if (ncpu > 1) + printf(" L1 cache: %dK/%dK, %d bytes/line \n", + (1 << (L1IBITS - 10)), (1 << (L1DBITS - 10)), (1 << L1ILINEBITS)); if (nfp) printf(" FPU disabled\n"); ebase.freq = freq; diff --git a/sis.h b/sis.h index 111857b..ba90440 100644 --- a/sis.h +++ b/sis.h @@ -46,6 +46,19 @@ #define RAM_MASK (RAM_SIZE - 1) #define RAM_END (RAM_START + RAM_SIZE) +/* cache config */ + +#define L1IBITS 12 +#define L1ILINEBITS 5 +#define L1ITAGBITS (L1IBITS - L1ILINEBITS) +#define L1ITAGS (1 << (L1ITAGBITS)) +#define L1IMASK (L1ITAGS -1) +#define L1DBITS 12 +#define L1DLINEBITS 5 +#define L1DTAGBITS (L1DBITS - L1DLINEBITS) +#define L1DTAGS (1 << (L1DTAGBITS)) +#define L1DMASK (L1DTAGS -1) + /* type definitions */ typedef short int int16; /* 16-bit signed int */ @@ -113,8 +126,8 @@ struct pstate { uint64 finst; uint64 pwdtime; /* Cycles in power-down mode */ uint64 pwdstart; /* Start of power-down mode */ - uint64 nstore; /* Number of load instructions */ - uint64 nload; /* Number of store instructions */ + uint64 nstore; /* Number of store instructions */ + uint64 nload; /* Number of load instructions */ uint64 nannul; /* Number of annuled instructions */ uint64 nbranch; /* Number of branch instructions */ uint32 ildreg; /* Destination of last load instruction */ @@ -143,6 +156,10 @@ struct pstate { uint32 lrqa; uint32 bphit; + uint32 l1itags[L1ITAGS]; + uint64 l1imiss; + uint32 l1dtags[L1DTAGS]; + uint64 l1dmiss; }; struct evcell { @@ -291,6 +308,8 @@ extern int port; extern int sim_run; extern void int_handler(int sig); extern uint32 daddr; +extern void l1data_update(uint32 address, uint32 cpu); +extern void l1data_snoop(uint32 address, uint32 cpu); /* exec.c */ extern void init_regs (struct pstate *sregs); diff --git a/sparc.c b/sparc.c index d068d5c..1adb709 100644 --- a/sparc.c +++ b/sparc.c @@ -528,6 +528,7 @@ sparc_dispatch_instruction (sregs) case SMUL: { mul64 (rs1, operand2, &sregs->y, rdd, 1); + sregs->icnt = T_MUL; } break; case SMULCC: @@ -547,11 +548,13 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~PSR_Z; *rdd = result; + sregs->icnt = T_MUL; } break; case UMUL: { mul64 (rs1, operand2, &sregs->y, rdd, 0); + sregs->icnt = T_MUL; } break; case UMULCC: @@ -571,6 +574,7 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~PSR_Z; *rdd = result; + sregs->icnt = T_MUL; } break; case SDIV: @@ -582,6 +586,7 @@ sparc_dispatch_instruction (sregs) } div64 (sregs->y, rs1, operand2, rdd, 1); + sregs->icnt = T_DIV; } break; case SDIVCC: @@ -610,6 +615,7 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~(PSR_C | PSR_V); *rdd = result; + sregs->icnt = T_DIV; } break; case UDIV: @@ -621,6 +627,7 @@ sparc_dispatch_instruction (sregs) } div64 (sregs->y, rs1, operand2, rdd, 0); + sregs->icnt = T_DIV; } break; case UDIVCC: @@ -649,6 +656,7 @@ sparc_dispatch_instruction (sregs) sregs->psr &= ~(PSR_C | PSR_V); *rdd = result; + sregs->icnt = T_DIV; } break; case IXNOR: @@ -961,9 +969,7 @@ sparc_dispatch_instruction (sregs) break; } } -#ifdef STAT sregs->nstore++; -#endif } else { @@ -976,9 +982,7 @@ sparc_dispatch_instruction (sregs) break; } } -#ifdef STAT sregs->nload++; -#endif } /* Decode load/store instructions */ @@ -1015,9 +1019,7 @@ sparc_dispatch_instruction (sregs) { rdd[0] = ddata[0]; rdd[1] = ddata[1]; -#ifdef STAT sregs->nload++; /* Double load counts twice */ -#endif } break; @@ -1076,9 +1078,7 @@ sparc_dispatch_instruction (sregs) { sregs->trap = TRAP_DEXC; } -#ifdef STAT sregs->nload++; -#endif break; case LDSBA: case LDUBA: @@ -1192,9 +1192,7 @@ sparc_dispatch_instruction (sregs) rd ^= 1; #endif sregs->fsi[rd] = ddata[0]; -#ifdef STAT sregs->nload++; /* Double load counts twice */ -#endif rd ^= 1; sregs->fsi[rd] = ddata[1]; sregs->ltime = sregs->simtime + sregs->icnt + FLSTHOLD + @@ -1311,9 +1309,7 @@ sparc_dispatch_instruction (sregs) mexc = ms->memory_write (address, rdd, 3, &ws); sregs->hold += ws; sregs->icnt = T_STD; -#ifdef STAT sregs->nstore++; /* Double store counts twice */ -#endif if (mexc) { sregs->trap = TRAP_DEXC; @@ -1345,9 +1341,7 @@ sparc_dispatch_instruction (sregs) mexc = ms->memory_write (address, rdd, 3, &ws); sregs->hold += ws; sregs->icnt = T_STD; -#ifdef STAT sregs->nstore++; /* Double store counts twice */ -#endif if (mexc) { sregs->trap = TRAP_DEXC; @@ -1428,9 +1422,7 @@ sparc_dispatch_instruction (sregs) mexc = ms->memory_write (address, ddata, 3, &ws); sregs->hold += ws; sregs->icnt = T_STD; -#ifdef STAT sregs->nstore++; /* Double store counts twice */ -#endif if (mexc) { sregs->trap = TRAP_DEXC; @@ -1462,9 +1454,7 @@ sparc_dispatch_instruction (sregs) } else *rdd = data; -#ifdef STAT sregs->nload++; -#endif break; case CASA: asi = (sregs->inst >> 5) & 0x0ff; @@ -1497,9 +1487,7 @@ sparc_dispatch_instruction (sregs) } else *rdd = data; -#ifdef STAT sregs->nload++; -#endif break; default: @@ -1518,6 +1506,14 @@ sparc_dispatch_instruction (sregs) * last */ } #endif + if (ncpu > 1) + { + l1data_update(address, sregs->cpu); + if (op3 & 4) + { + l1data_snoop(address, sregs->cpu); + } + } break; default: @@ -1899,7 +1895,7 @@ sparc_execute_trap (sregs) sregs->npc = sregs->pc + 4; } - /* Increase simulator time */ + /* Increase simulator time and add some jitter */ sregs->icnt = TRAP_C; } diff --git a/sparc.h b/sparc.h index 440ecdd..19f3561 100644 --- a/sparc.h +++ b/sparc.h @@ -14,6 +14,8 @@ #define T_LDST 4 #define T_JMPL 2 #define T_RETT 2 +#define T_MUL 5 +#define T_DIV 35 #define FSR_QNE 0x2000 #define FP_EXE_MODE 0 -- cgit v1.2.3