diff --git a/SPIKE.hash b/SPIKE.hash index ccafc0db..3137a068 100644 --- a/SPIKE.hash +++ b/SPIKE.hash @@ -1 +1 @@ -9b0082a416a4f1967fda434c7129953fad77b2af +bc3222e351cdd645b6fd2605fd9611e3bc0d9cae diff --git a/software/gemmini-rocc-tests b/software/gemmini-rocc-tests index d68fe69c..bd9dbe0b 160000 --- a/software/gemmini-rocc-tests +++ b/software/gemmini-rocc-tests @@ -1 +1 @@ -Subproject commit d68fe69ce930dd18bf62ad28ab3015ef5087177d +Subproject commit bd9dbe0b0dcde33b5445711ed27c6840167c10bf diff --git a/src/main/scala/gemmini/Controller.scala b/src/main/scala/gemmini/Controller.scala index 2b7c9a82..d9a0f93a 100644 --- a/src/main/scala/gemmini/Controller.scala +++ b/src/main/scala/gemmini/Controller.scala @@ -52,8 +52,6 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] implicit val edge = outer.node.edges.out.head val tlb = Module(new FrontendTLB(2, tlb_size, dma_maxbytes)) (tlb.io.clients zip outer.spad.module.io.tlb).foreach(t => t._1 <> t._2) - tlb.io.exp.flush_skip := false.B - tlb.io.exp.flush_retry := false.B io.ptw.head <> tlb.io.ptw /*io.ptw.head.req <> tlb.io.ptw.req @@ -63,7 +61,7 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] tlb.io.ptw.pmp := io.ptw.head.pmp tlb.io.ptw.customCSRs := io.ptw.head.customCSRs*/ - spad.module.io.flush := tlb.io.exp.flush() + spad.module.io.flush := tlb.io.exp.flush /* //========================================================================= @@ -107,7 +105,30 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] // Incoming commands and ROB val rob = Module(new ROB(outer.config, new RoCCCommand)) - val raw_cmd = Queue(io.cmd) + val raw_cmd_q = Module(new Queue(new RoCCCommand, 2)) + val fence_stall = io.cmd.bits.inst.funct === FENCE_CMD && io.busy + raw_cmd_q.io.enq.valid := io.cmd.valid && io.resp.ready && !fence_stall + raw_cmd_q.io.enq.bits := io.cmd.bits + + io.resp.valid := io.cmd.valid && raw_cmd_q.io.enq.ready && !fence_stall + io.resp.bits.rd := io.cmd.bits.inst.rd + io.resp.bits.data := 0.U + + io.cmd.ready := io.resp.ready && raw_cmd_q.io.enq.ready && !fence_stall + + // When TLB is busy with exception, don't enqueue new instructions, instead use RD to pass back exception info + when (tlb.io.exp.interrupt) { + io.cmd.ready := true.B + raw_cmd_q.io.enq.valid := false.B + io.resp.valid := io.cmd.valid + io.resp.bits.data := tlb.io.exp.vaddr + } + + tlb.io.exp.flush := io.cmd.fire() && io.cmd.bits.inst.funct === FLUSH_CMD + + + + val raw_cmd = raw_cmd_q.io.deq // TODO replace 4,12,2 with parameters based on ROB size val (conv_cmd, loop_conv_unroller_busy) = LoopConv(raw_cmd, rob.io.ld_utilization, rob.io.st_utilization, rob.io.ex_utilization, @@ -300,8 +321,8 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] rob_completed_arb.io.out.ready := true.B // Wire up global RoCC signals - io.busy := raw_cmd.valid || loop_conv_unroller_busy || loop_matmul_unroller_busy || rob.io.busy || spad.module.io.busy || unrolled_cmd.valid || loop_cmd.valid || conv_cmd.valid - io.interrupt := tlb.io.exp.interrupt + io.busy := (raw_cmd.valid || loop_conv_unroller_busy || loop_matmul_unroller_busy || rob.io.busy || spad.module.io.busy || unrolled_cmd.valid || loop_cmd.valid || conv_cmd.valid) && !tlb.io.exp.interrupt + io.interrupt := false.B rob.io.solitary_preload := ex_controller.io.solitary_preload @@ -354,32 +375,15 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] val risc_funct = unrolled_cmd.bits.inst.funct val is_flush = risc_funct === FLUSH_CMD + val is_fence = risc_funct === FENCE_CMD /* val is_load = (funct === LOAD_CMD) || (funct === CONFIG_CMD && config_cmd_type === CONFIG_LOAD) val is_store = (funct === STORE_CMD) || (funct === CONFIG_CMD && config_cmd_type === CONFIG_STORE) val is_ex = (funct === COMPUTE_AND_FLIP_CMD || funct === COMPUTE_AND_STAY_CMD || funct === PRELOAD_CMD) || (funct === CONFIG_CMD && config_cmd_type === CONFIG_EX) */ - - when (is_flush) { - // val skip = compressed_cmd.bits.rs1(0) - val skip = unrolled_cmd.bits.rs1(0) - tlb.io.exp.flush_skip := skip - tlb.io.exp.flush_retry := !skip - - // compressed_cmd.ready := true.B // TODO should we wait for an acknowledgement from the TLB? - unrolled_cmd.ready := true.B // TODO should we wait for an acknowledgement from the TLB? - } - - .otherwise { - rob.io.alloc.valid := true.B - - when(rob.io.alloc.fire()) { - // compressed_cmd.ready := true.B - unrolled_cmd.ready := true.B - } - } - + unrolled_cmd.ready := is_fence || is_flush || rob.io.alloc.ready + rob.io.alloc.valid := !is_flush && !is_fence } /* diff --git a/src/main/scala/gemmini/FrontendTLB.scala b/src/main/scala/gemmini/FrontendTLB.scala index 73416816..819d9f57 100644 --- a/src/main/scala/gemmini/FrontendTLB.scala +++ b/src/main/scala/gemmini/FrontendTLB.scala @@ -17,11 +17,12 @@ class DecoupledTLBReq(val lgMaxSize: Int)(implicit p: Parameters) extends CoreBu } class TLBExceptionIO extends Bundle { + // interrupt means we are stalling loads and stores until a gemmini_flush command is received val interrupt = Output(Bool()) - val flush_retry = Input(Bool()) - val flush_skip = Input(Bool()) + // vaddr of faulting inst. LSB indicates is_Store + val vaddr = Output(UInt(64.W)) - def flush(dummy: Int = 0): Bool = flush_retry || flush_skip + val flush = Input(Bool()) } // TODO can we make TLB hits only take one cycle? @@ -30,7 +31,7 @@ class DecoupledTLB(entries: Int, maxSize: Int)(implicit edge: TLEdgeOut, p: Para val lgMaxSize = log2Ceil(maxSize) val io = new Bundle { - val req = Flipped(Valid(new DecoupledTLBReq(lgMaxSize))) + val req = Flipped(Decoupled(new DecoupledTLBReq(lgMaxSize))) val resp = new TLBResp val ptw = new TLBPTWIO @@ -38,7 +39,11 @@ class DecoupledTLB(entries: Int, maxSize: Int)(implicit edge: TLEdgeOut, p: Para } val interrupt = RegInit(false.B) + val interrupt_vaddr = Reg(UInt(64.W)) io.exp.interrupt := interrupt + io.exp.vaddr := interrupt_vaddr + + io.req.ready := !interrupt val tlb = Module(new TLB(false, lgMaxSize, TLBConfig(nSets=1, nWays=entries))) tlb.io.req.valid := io.req.valid @@ -46,7 +51,7 @@ class DecoupledTLB(entries: Int, maxSize: Int)(implicit edge: TLEdgeOut, p: Para io.resp := tlb.io.resp tlb.io.kill := false.B - tlb.io.sfence.valid := io.exp.flush() + tlb.io.sfence.valid := io.exp.flush tlb.io.sfence.bits.rs1 := false.B tlb.io.sfence.bits.rs2 := false.B tlb.io.sfence.bits.addr := DontCare @@ -54,13 +59,16 @@ class DecoupledTLB(entries: Int, maxSize: Int)(implicit edge: TLEdgeOut, p: Para io.ptw <> tlb.io.ptw tlb.io.ptw.status := io.req.bits.status - val exception = io.req.valid && Mux(io.req.bits.tlb_req.cmd === M_XRD, tlb.io.resp.pf.ld || tlb.io.resp.ae.ld, tlb.io.resp.pf.st || tlb.io.resp.ae.st) - when (exception) { interrupt := true.B } + val xcpt_ld = io.req.valid && (io.req.bits.tlb_req.cmd === M_XRD) && (tlb.io.resp.pf.ld || tlb.io.resp.ae.ld) + val xcpt_st = io.req.valid && (io.req.bits.tlb_req.cmd === M_XWR) && (tlb.io.resp.pf.st || tlb.io.resp.ae.st) + when (!interrupt && (xcpt_ld || xcpt_st)) { + interrupt := true.B + interrupt_vaddr := Cat(tlb.io.req.bits.vaddr >> 1, xcpt_st) + + } when (interrupt && tlb.io.sfence.fire()) { interrupt := false.B } - - assert(!io.exp.flush_retry || !io.exp.flush_skip, "TLB: flushing with both retry and skip at same time") } class FrontendTLBIO(implicit p: Parameters) extends CoreBundle { @@ -81,9 +89,7 @@ class FrontendTLB(nClients: Int, entries: Int, maxSize: Int) val lgMaxSize = log2Ceil(coreDataBytes) val tlbArb = Module(new RRArbiter(new DecoupledTLBReq(lgMaxSize), nClients)) val tlb = Module(new DecoupledTLB(entries, maxSize)) - tlb.io.req.valid := tlbArb.io.out.valid - tlb.io.req.bits := tlbArb.io.out.bits - tlbArb.io.out.ready := true.B + tlb.io.req <> tlbArb.io.out io.ptw <> tlb.io.ptw io.exp <> tlb.io.exp @@ -101,7 +107,7 @@ class FrontendTLB(nClients: Int, entries: Int, maxSize: Int) last_translated_vpn := req.bits.tlb_req.vaddr last_translated_ppn := tlb.io.resp.paddr } - when (io.exp.flush()) { + when (io.exp.flush) { last_translated_valid := false.B } diff --git a/src/main/scala/gemmini/GemminiISA.scala b/src/main/scala/gemmini/GemminiISA.scala index b49087c7..2bf48dad 100644 --- a/src/main/scala/gemmini/GemminiISA.scala +++ b/src/main/scala/gemmini/GemminiISA.scala @@ -31,6 +31,8 @@ object GemminiISA { val LOOP_CONV_WS_CONFIG_5 = 20.U // *weights | *output val LOOP_CONV_WS_CONFIG_6 = 21.U // *bias, *input + val FENCE_CMD = 127.U + // rs1[2:0] values val CONFIG_EX = 0.U val CONFIG_LOAD = 1.U