Skip to content

Commit

Permalink
Merge pull request #187 from ucb-bar/br-v0.6.2
Browse files Browse the repository at this point in the history
Release v0.6.3

* Bump to Chisel 3.5
* Fix bug with depthwise convolutions
  • Loading branch information
hngenc committed Jan 19, 2022
2 parents 7246123 + 2c07503 commit c47cb7f
Show file tree
Hide file tree
Showing 44 changed files with 285 additions and 353 deletions.
2 changes: 1 addition & 1 deletion CHIPYARD.hash
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ec1b075658fb92a624151536dd1de76bad94f51f
117624d8eea27bafd613eec09e9b9b3e31239e08
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,19 @@ Run these steps to install Chipyard and Spike (make sure to checkout the correct
```shell
git clone https://github.com/ucb-bar/chipyard.git
cd chipyard
git checkout ec1b075658fb92a624151536dd1de76bad94f51f
git checkout 117624d8eea27bafd613eec09e9b9b3e31239e08
./scripts/init-submodules-no-riscv-tools.sh
./scripts/build-toolchains.sh esp-tools

source env.sh

cd generators/gemmini
git fetch && git checkout v0.6.2
git fetch && git checkout v0.6.3
git submodule update

cd -
cd toolchains/esp-tools/riscv-isa-sim/build
git fetch && git checkout 79486d67f99fa739c8c1d5916c9b74d0417b53c4
git fetch && git checkout 090e82c473fd28b4eb2011ffcd771ead6076faab
make && make install
```

Expand Down
2 changes: 1 addition & 1 deletion SPIKE.hash
Original file line number Diff line number Diff line change
@@ -1 +1 @@
79486d67f99fa739c8c1d5916c9b74d0417b53c4
090e82c473fd28b4eb2011ffcd771ead6076faab
2 changes: 1 addition & 1 deletion software/gemmini-rocc-tests
24 changes: 9 additions & 15 deletions src/main/scala/gemmini/AccumulatorMem.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class AccumulatorReadReq[T <: Data](n: Int, shift_width: Int, scale_t: T) extend

val fromDMA = Bool()

override def cloneType: this.type = new AccumulatorReadReq(n, shift_width, scale_t.cloneType).asInstanceOf[this.type]
}

class AccumulatorReadResp[T <: Data: Arithmetic, U <: Data](fullDataType: Vec[Vec[T]], scale_t: U, shift_width: Int) extends Bundle {
Expand All @@ -24,14 +23,12 @@ class AccumulatorReadResp[T <: Data: Arithmetic, U <: Data](fullDataType: Vec[Ve
val relu6_shift = UInt(shift_width.W)
val act = UInt(2.W) // TODO magic number
val acc_bank_id = UInt(2.W) // TODO don't hardcode
override def cloneType: this.type = new AccumulatorReadResp(fullDataType.cloneType, scale_t, shift_width).asInstanceOf[this.type]
}

class AccumulatorReadIO[T <: Data: Arithmetic, U <: Data](n: Int, shift_width: Int, fullDataType: Vec[Vec[T]], scale_t: U) extends Bundle {
val req = Decoupled(new AccumulatorReadReq[U](n, shift_width, scale_t))
val resp = Flipped(Decoupled(new AccumulatorReadResp[T, U](fullDataType, scale_t, shift_width)))

override def cloneType: this.type = new AccumulatorReadIO(n, shift_width, fullDataType.cloneType, scale_t.cloneType).asInstanceOf[this.type]
}

class AccumulatorWriteReq[T <: Data: Arithmetic](n: Int, t: Vec[Vec[T]]) extends Bundle {
Expand All @@ -41,7 +38,6 @@ class AccumulatorWriteReq[T <: Data: Arithmetic](n: Int, t: Vec[Vec[T]]) extends
val mask = Vec(t.getWidth / 8, Bool()) // TODO Use aligned_to here
// val current_waddr = Flipped(Valid(UInt(log2Ceil(n).W))) // This is the raddr that is being fed into the SRAM right now

override def cloneType: this.type = new AccumulatorWriteReq(n, t).asInstanceOf[this.type]
}


Expand All @@ -60,7 +56,6 @@ class AccumulatorMemIO [T <: Data: Arithmetic, U <: Data](n: Int, t: Vec[Vec[T]]
val sum = Input(t.cloneType)
}

override def cloneType: this.type = new AccumulatorMemIO(n, t, scale_t, acc_sub_banks, use_shared_ext_mem).asInstanceOf[this.type]
}

class AccPipe[T <: Data : Arithmetic](latency: Int, t: T)(implicit ev: Arithmetic[T]) extends Module {
Expand Down Expand Up @@ -117,7 +112,7 @@ class AccumulatorMem[T <: Data, U <: Data](

val pipelined_writes = Reg(Vec(acc_latency, Valid(new AccumulatorWriteReq(n, t))))
val oldest_pipelined_write = pipelined_writes(acc_latency-1)
pipelined_writes(0).valid := io.write.fire()
pipelined_writes(0).valid := io.write.fire
pipelined_writes(0).bits := io.write.bits
for (i <- 1 until acc_latency) {
pipelined_writes(i) := pipelined_writes(i-1)
Expand Down Expand Up @@ -148,8 +143,8 @@ class AccumulatorMem[T <: Data, U <: Data](
mem.io.mask := oldest_pipelined_write.bits.mask
rdata_for_adder := mem.io.rdata
rdata_for_read_resp := mem.io.rdata
mem.io.raddr := Mux(io.write.fire() && io.write.bits.acc, io.write.bits.addr, io.read.req.bits.addr)
mem.io.ren := io.read.req.fire() || (io.write.fire() && io.write.bits.acc)
mem.io.raddr := Mux(io.write.fire && io.write.bits.acc, io.write.bits.addr, io.read.req.bits.addr)
mem.io.ren := io.read.req.fire || (io.write.fire && io.write.bits.acc)
} else {
val rmw_req = Wire(Decoupled(UInt()))
rmw_req.valid := io.write.valid && io.write.bits.acc
Expand Down Expand Up @@ -203,14 +198,13 @@ class AccumulatorMem[T <: Data, U <: Data](
val data = Vec(mask_len, mask_elem)
val mask = Vec(mask_len, Bool())
val addr = UInt(log2Ceil(n/acc_sub_banks).W)
override def cloneType: this.type = new W_Q_Entry(mask_len, mask_elem).asInstanceOf[this.type]
}

val w_q = Reg(Vec(nEntries, new W_Q_Entry(mask_len, mask_elem)))
for (e <- w_q) {
when (e.valid) {
assert(!(
io.write.fire() && io.write.bits.acc &&
io.write.fire && io.write.bits.acc &&
isThisBank(io.write.bits.addr) && getBankIdx(io.write.bits.addr) === e.addr &&
((io.write.bits.mask.asUInt & e.mask.asUInt) =/= 0.U)
), "you cannot accumulate to an AccumulatorMem address until previous writes to that address have completed")
Expand Down Expand Up @@ -276,7 +270,7 @@ class AccumulatorMem[T <: Data, U <: Data](
// 1. incoming reads for RMW
// 2. writes from RMW
// 3. incoming reads
when (rmw_req.fire() && isThisBank(rmw_req.bits)) {
when (rmw_req.fire && isThisBank(rmw_req.bits)) {
ren := true.B
when (isThisBank(only_read_req.bits)) {
only_read_req.ready := false.B
Expand All @@ -287,7 +281,7 @@ class AccumulatorMem[T <: Data, U <: Data](
only_read_req.ready := false.B
}
} .otherwise {
ren := isThisBank(only_read_req.bits) && only_read_req.fire()
ren := isThisBank(only_read_req.bits) && only_read_req.fire
raddr := getBankIdx(only_read_req.bits)
}

Expand All @@ -304,7 +298,7 @@ class AccumulatorMem[T <: Data, U <: Data](
q.io.enq.bits.act := RegNext(io.read.req.bits.act)
q.io.enq.bits.fromDMA := RegNext(io.read.req.bits.fromDMA)
q.io.enq.bits.acc_bank_id := DontCare
q.io.enq.valid := RegNext(io.read.req.fire())
q.io.enq.valid := RegNext(io.read.req.fire)

val p = q.io.deq

Expand All @@ -317,7 +311,7 @@ class AccumulatorMem[T <: Data, U <: Data](
io.read.resp.valid := p.valid
p.ready := io.read.resp.ready

val q_will_be_empty = (q.io.count +& q.io.enq.fire()) - q.io.deq.fire() === 0.U
val q_will_be_empty = (q.io.count +& q.io.enq.fire) - q.io.deq.fire === 0.U
io.read.req.ready := q_will_be_empty && (
// Make sure we aren't accumulating, which would take over both ports
!(io.write.valid && io.write.bits.acc) &&
Expand All @@ -333,5 +327,5 @@ class AccumulatorMem[T <: Data, U <: Data](
}

// assert(!(io.read.req.valid && io.write.en && io.write.acc), "reading and accumulating simultaneously is not supported")
assert(!(io.read.req.fire() && io.write.fire() && io.read.req.bits.addr === io.write.bits.addr), "reading from and writing to same address is not supported")
assert(!(io.read.req.fire && io.write.fire && io.read.req.bits.addr === io.write.bits.addr), "reading from and writing to same address is not supported")
}
15 changes: 5 additions & 10 deletions src/main/scala/gemmini/AccumulatorScale.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import Util._
class AccumulatorReadRespWithFullData[T <: Data: Arithmetic, U <: Data](fullDataType: Vec[Vec[T]], scale_t: U, shift_width: Int) extends Bundle {
val resp = new AccumulatorReadResp(fullDataType, scale_t, shift_width)
val full_data = fullDataType.cloneType
override def cloneType: this.type = new AccumulatorReadRespWithFullData(fullDataType.cloneType, scale_t, shift_width).asInstanceOf[this.type]
}


Expand All @@ -17,7 +16,6 @@ class AccumulatorScaleResp[T <: Data: Arithmetic](fullDataType: Vec[Vec[T]], rDa
val data = rDataType.cloneType
val acc_bank_id = UInt(2.W)
val fromDMA = Bool()
override def cloneType: this.type = new AccumulatorScaleResp(fullDataType, rDataType).asInstanceOf[this.type]
}

class AccumulatorScaleIO[T <: Data: Arithmetic, U <: Data](
Expand All @@ -26,8 +24,6 @@ class AccumulatorScaleIO[T <: Data: Arithmetic, U <: Data](
) extends Bundle {
val in = Flipped(Decoupled(new AccumulatorReadResp[T,U](fullDataType, scale_t, shift_width)))
val out = Decoupled(new AccumulatorScaleResp[T](fullDataType, rDataType))
override def cloneType: this.type = new AccumulatorScaleIO(fullDataType, scale_t,
shift_width, rDataType).asInstanceOf[this.type]
}

class AccScaleDataWithIndex[T <: Data: Arithmetic, U <: Data](t: T, u: U) extends Bundle {
Expand All @@ -40,7 +36,6 @@ class AccScaleDataWithIndex[T <: Data: Arithmetic, U <: Data](t: T, u: U) extend
val full_data = t.cloneType
val id = UInt(2.W) // TODO hardcoded
val index = UInt()
override def cloneType: this.type = new AccScaleDataWithIndex(t, u).asInstanceOf[this.type]
}

class AccScalePipe[T <: Data : Arithmetic, U <: Data](t: T, rDataType: Vec[Vec[T]], scale_func: (T, U) => T, scale_t: U, latency: Int, has_nonlinear_activations: Boolean)(implicit ev: Arithmetic[T]) extends Module {
Expand Down Expand Up @@ -123,7 +118,7 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data](
val tail_oh = RegInit(1.U(nEntries.W))
out.valid := Mux1H(head_oh.asBools, (regs zip completed_masks).map({case (r, c) => r.valid && c.reduce(_&&_)}))
out.bits := Mux1H(head_oh.asBools, out_regs)
when (out.fire()) {
when (out.fire) {
for (i <- 0 until nEntries) {
when (head_oh(i)) {
regs(i).valid := false.B
Expand All @@ -132,8 +127,8 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data](
head_oh := (head_oh << 1) | head_oh(nEntries-1)
}

io.in.ready := !Mux1H(tail_oh.asBools, regs.map(_.valid)) || (tail_oh === head_oh && out.fire())
when (io.in.fire()) {
io.in.ready := !Mux1H(tail_oh.asBools, regs.map(_.valid)) || (tail_oh === head_oh && out.fire)
when (io.in.fire) {
for (i <- 0 until nEntries) {
when (tail_oh(i)) {
regs(i).valid := true.B
Expand All @@ -160,7 +155,7 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data](
input.bits.relu6_shift := regs(i).bits.relu6_shift
input.bits.id := i.U
input.bits.index := w.U
when (input.fire()) {
when (input.fire) {
fired_masks(i)(w) := true.B
}
}
Expand All @@ -185,7 +180,7 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data](
if ((j*width+w) % num_scale_units == i) {
val id0 = w % io.in.bits.data(0).size
val id1 = w / io.in.bits.data(0).size
when (pipe_out.fire() && pipe_out.bits.id === j.U && pipe_out.bits.index === w.U) {
when (pipe_out.fire && pipe_out.bits.id === j.U && pipe_out.bits.index === w.U) {
out_regs(j).data (id1)(id0) := pipe_out.bits.data
out_regs(j).full_data(id1)(id0) := pipe_out.bits.full_data
completed_masks(j)(w) := true.B
Expand Down
20 changes: 10 additions & 10 deletions src/main/scala/gemmini/BeatMerger.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid

io.req.ready := !req.valid

io.in.ready := io.req.fire() || (req.valid && bytesRead =/= (1.U << req.bits.lg_len_req).asUInt())
io.in.ready := io.req.fire || (req.valid && bytesRead =/= (1.U << req.bits.lg_len_req).asUInt())

io.out.valid := req.valid && usefulBytesRead > bytesSent && (usefulBytesRead - bytesSent >= rowBytes ||
usefulBytesRead === req.bits.bytes_to_read)
Expand Down Expand Up @@ -92,7 +92,7 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid
req.pop()
}

when (io.out.fire()) {
when (io.out.fire) {
bytesSent := bytesSent_next

when (last_sending && bytesRead === (1.U << req.bits.lg_len_req).asUInt()) {
Expand All @@ -101,18 +101,18 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid
}
}

when (io.req.fire()) {
when (io.req.fire) {
req.push(io.req.bits)
bytesRead := 0.U
bytesSent := 0.U
}

when (io.in.fire()) {
val current_bytesRead = Mux(io.req.fire(), 0.U, bytesRead)
val current_bytesDiscarded = Mux(io.req.fire(), 0.U, bytesDiscarded)
val current_usefulBytesRead = Mux(io.req.fire(), 0.U, usefulBytesRead)
val current_shift = Mux(io.req.fire(), io.req.bits.shift, req.bits.shift)
val current_lg_len_req = Mux(io.req.fire(), io.req.bits.lg_len_req, req.bits.lg_len_req)
when (io.in.fire) {
val current_bytesRead = Mux(io.req.fire, 0.U, bytesRead)
val current_bytesDiscarded = Mux(io.req.fire, 0.U, bytesDiscarded)
val current_usefulBytesRead = Mux(io.req.fire, 0.U, usefulBytesRead)
val current_shift = Mux(io.req.fire, io.req.bits.shift, req.bits.shift)
val current_lg_len_req = Mux(io.req.fire, io.req.bits.lg_len_req, req.bits.lg_len_req)
val current_len_req = (1.U << current_lg_len_req).asUInt()

when (current_shift - current_bytesDiscarded <= beatBytes.U /* &&
Expand All @@ -127,7 +127,7 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid

bytesRead := satAdd(current_bytesRead, beatBytes.U, current_len_req)

when (!io.req.fire() && bytesSent === req.bits.bytes_to_read && last_reading) {
when (!io.req.fire && bytesSent === req.bits.bytes_to_read && last_reading) {
req.pop()
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/main/scala/gemmini/CmdFSM.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data]
status := DontCare

//==========================================================================
// Combinational Output Defaults
// Combinational Output Defaults
//==========================================================================
io.cmd.ready := false.B
io.tiler.valid := false.B
Expand All @@ -90,7 +90,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data]
io.busy := (state === s_EX_PENDING)

//==========================================================================
// FSM
// FSM
//==========================================================================
def reset_and_listen(): Unit = {
// Reset all data-validity
Expand All @@ -109,13 +109,13 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data]
// Wait for tiling/ execution to complete,
// let any further commands queue up
io.tiler.valid := true.B
when (io.tiler.fire()) {
when (io.tiler.fire) {
state := s_LISTENING
}
}.elsewhen (state === s_ERROR) {
// In s_ERROR state - only update based on RESET commands
io.cmd.ready := true.B
when (io.cmd.fire()) {
when (io.cmd.fire) {
val cmd = io.cmd.bits
val funct = cmd.inst.funct
when (funct === RESET) {
Expand All @@ -124,7 +124,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data]
}
}.otherwise { // s_LISTENING State
io.cmd.ready := true.B
when (io.cmd.fire()) {
when (io.cmd.fire) {
val cmd = io.cmd.bits
val funct = cmd.inst.funct
val rs1 = cmd.rs1
Expand All @@ -143,7 +143,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data]
// Signal to the Tiler, and move to our EXEC state
// FIXME: check all valid
io.tiler.valid := true.B
when (io.tiler.fire()) {
when (io.tiler.fire) {
state := s_LISTENING
}.otherwise {
state := s_EX_PENDING
Expand Down
5 changes: 2 additions & 3 deletions src/main/scala/gemmini/Controller.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class GemminiCmd(rob_entries: Int)(implicit p: Parameters) extends Bundle {
val cmd = new RoCCCommand
val rob_id = UDValid(UInt(log2Up(rob_entries).W))

override def cloneType: this.type = new GemminiCmd(rob_entries).asInstanceOf[this.type]
}

class Gemmini[T <: Data : Arithmetic, U <: Data, V <: Data](val config: GemminiArrayConfig[T, U, V])
Expand Down Expand Up @@ -389,7 +388,7 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data]
.otherwise {
reservation_station.io.alloc.valid := true.B

when(reservation_station.io.alloc.fire()) {
when(reservation_station.io.alloc.fire) {
// compressed_cmd.ready := true.B
unrolled_cmd.ready := true.B
}
Expand All @@ -414,5 +413,5 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data]
//=========================================================================
// Performance Counters Access
//=========================================================================

}
Loading

0 comments on commit c47cb7f

Please sign in to comment.