Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timing Fix for CHI critical path #183

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/main/scala/coupledL2/RequestArb.scala
Original file line number Diff line number Diff line change
Expand Up @@ -130,19 +130,19 @@ class RequestArb(implicit p: Parameters) extends L2Module {
(if (io.fromTXRSP.isDefined) io.fromTXRSP.get.blockSinkBReqEntrance else false.B)
val block_C = io.fromMSHRCtl.blockC_s1 || io.fromMainPipe.blockC_s1 || io.fromGrantBuffer.blockSinkReqEntrance.blockC_s1

val noFreeWay = Wire(Bool())
// val noFreeWay = Wire(Bool())

val sinkValids = VecInit(Seq(
io.sinkC.valid && !block_C,
io.sinkB.valid && !block_B,
io.sinkA.valid && !block_A && !noFreeWay
io.sinkA.valid && !block_A
)).asUInt

// TODO: A Hint is allowed to enter if !s2_ready for mcp2_stall

val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && s2_ready

io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) && !noFreeWay // SinkC prior to SinkA & SinkB
io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) // SinkC prior to SinkA & SinkB
io.sinkB.ready := sink_ready_basic && !block_B && !sinkValids(0) // SinkB prior to SinkA
io.sinkC.ready := sink_ready_basic && !block_C

Expand Down Expand Up @@ -195,13 +195,13 @@ class RequestArb(implicit p: Parameters) extends L2Module {
task_s2.valid := s1_fire
when(s1_fire) { task_s2.bits := task_s1.bits }

val sameSet_s2 = task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask && task_s2.bits.set === A_task.set
/* val sameSet_s2 = task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask && task_s2.bits.set === A_task.set
val sameSet_s3 = RegNext(task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask) &&
RegEnable(task_s2.bits.set, task_s2.valid) === A_task.set
val sameSetCnt = PopCount(VecInit(io.msInfo.map(s => s.valid && s.bits.set === A_task.set && s.bits.fromA) :+
sameSet_s2 :+ sameSet_s3).asUInt)
noFreeWay := sameSetCnt >= cacheParams.ways.U

*/
io.taskToPipe_s2 := task_s2

// MSHR task
Expand Down
20 changes: 17 additions & 3 deletions src/main/scala/coupledL2/RequestBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete
val mshrInfo = Vec(mshrsAll, Flipped(ValidIO(new MSHRInfo)))
val aMergeTask = ValidIO(new AMergeTask)
val mainPipeBlock = Input(Vec(2, Bool()))
/* Snoop task from arbiter at stage 2 */
val taskFromArb_s2 = Flipped(ValidIO(new TaskBundle()))

val ATag = Output(UInt(tagBits.W))
val ASet = Output(UInt(setBits.W))
Expand Down Expand Up @@ -143,8 +145,20 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete
io.aMergeTask.bits.id := mergeAId
io.aMergeTask.bits.task := in

/*
noFreeWay check: s2 + s3 + mshrs >= ways(L2)
*/
val task_s2 = io.taskFromArb_s2
val sameSet_s2 = task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask && task_s2.bits.set === io.ASet
val sameSet_s3 = RegNext(task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask) &&
RegEnable(task_s2.bits.set, task_s2.valid) === io.ASet
val sameSetCnt = PopCount(VecInit(io.mshrInfo.map(s => s.valid && s.bits.set === io.ASet && s.bits.fromA) :+
sameSet_s2 :+ sameSet_s3).asUInt)
val noFreeWay = sameSetCnt >= cacheParams.ways.U


// flow not allowed when full, or entries might starve
val canFlow = flow.B && !full && !conflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR
val canFlow = flow.B && !full && !conflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR && !noFreeWay
val doFlow = canFlow && io.out.ready
io.hasLatePF := latePrefetch(in) && io.in.valid && !sameAddr(in, RegNext(in))
io.hasMergeA := mergeA && io.in.valid && !sameAddr(in, RegNext(in))
Expand Down Expand Up @@ -177,7 +191,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete

entry.valid := true.B
// when Addr-Conflict / Same-Addr-Dependent / MainPipe-Block / noFreeWay-in-Set, entry not ready
entry.rdy := !conflict(in) && !mpBlock && !s1Block // && !Cat(depMask).orR
entry.rdy := !conflict(in) && !mpBlock && !s1Block && !noFreeWay// && !Cat(depMask).orR
entry.task := io.in.bits
entry.waitMP := Cat(
s1Block,
Expand Down Expand Up @@ -247,7 +261,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete
// update info
e.waitMS := waitMSUpdate
// e.depMask := depMaskUpdate
e.rdy := !waitMSUpdate.orR && !e.waitMP && !s1_Block
e.rdy := !waitMSUpdate.orR && !e.waitMP && !s1_Block && !noFreeWay
}
}

Expand Down
71 changes: 44 additions & 27 deletions src/main/scala/coupledL2/tl2chi/MSHRCtl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -120,46 +120,60 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
mshrSelector.io.idle := mshrs.map(m => !m.io.status.valid)
io.toMainPipe.mshr_alloc_ptr := OHToUInt(selectedMSHROH)

/*
rxrsp for PCredit timing is quite critical and break it here
*/
val rxrspValid = RegNext(io.resps.rxrsp.valid)
val rxrspInfo = RegNext(io.resps.rxrsp.respInfo)
val rxrspMshrId = RegNext( io.resps.rxrsp.mshrId)

/*
when PCrdGrant, give credit to one entry that:
1. got RetryAck and not Reissued
2. match srcID and PCrdType
3. use Round-Robin arbiter if multi-entry match
3. use fix priority mux if multi-entry match
4. use time out protection if up to 7 not granted
*/
val isPCrdGrant = io.resps.rxrsp.valid && (io.resps.rxrsp.respInfo.chiOpcode.get === PCrdGrant)
val isPCrdGrantReg = RegNext(isPCrdGrant)
val waitPCrdInfo = Wire(Vec(mshrsAll, new PCrdInfo))
// val pArb = Module(new RRArbiter(UInt(), mshrsAll))
val timeOutPri = VecInit(Seq.fill(16)(false.B))
val pCrdPri = VecInit(Seq.fill(16)(false.B))

val matchPCrdGrant = VecInit(waitPCrdInfo.map(p =>
isPCrdGrant && p.valid &&
p.srcID.get === io.resps.rxrsp.respInfo.srcID.get &&
p.pCrdType.get === io.resps.rxrsp.respInfo.pCrdType.get
))

/* pArb.io.in.zipWithIndex.foreach {
case (in, i) =>
in.valid := matchPCrdGrant(i)
in.bits := 0.U
val matchPCrdGrantReg = RegNext(matchPCrdGrant)
val pCrdFixPri = VecInit(PriorityEncoderOH(matchPCrdGrantReg)) //fix priority arbiter

// timeout protect
val counter = RegInit(VecInit(Seq.fill(mshrsAll)(0.U(log2Ceil(mshrsAll).W))))

for(i <- 0 until 16) {
when(matchPCrdGrantReg(i)) {
when(pCrdFixPri(i) || timeOutPri(i)) {
counter(i):=0.U
}.otherwise {
counter(i):= counter(i) + 1.U
}
}
}
pArb.io.out.ready := true.B
val pCrdRR = VecInit(UIntToOH(pArb.io.chosen))
val pCrdPri = VecInit((matchPCrdGrant.asUInt & pCrdRR.asUInt).asBools)
//val pCrdPri = VecInit(PriorityEncoderOH(matchPCrdGrant))
val pCrdIsWait = OHToUInt(pCrdPri)
*/
val timeOutOH = PriorityEncoderOH(counter.map(_>=7.U))
timeOutPri := VecInit(timeOutOH)

val timeOutHit = VecInit.tabulate(16)(i=>matchPCrdGrantReg(i) && timeOutPri(i))
val timeOutSel = timeOutHit.reduce(_|_)
pCrdPri := Mux(timeOutSel, timeOutPri, pCrdFixPri)

dontTouch (timeOutPri)
dontTouch (timeOutHit)
dontTouch (timeOutSel)
dontTouch (pCrdPri)

/*
Random arbiter if multi-entry match
*/
val lfsr = LFSR(16, true.B)
val idx = Random(16, lfsr)
val idxOH = VecInit(UIntToOH(idx))

val doubleReq = Fill(2, matchPCrdGrant.asUInt)
val doubleGnt = ~(doubleReq - idxOH.asUInt) & doubleReq
val gnt = doubleGnt(31,16) | doubleGnt(15,0)
val pCrdPri = VecInit(gnt.asBools)
val pCrdIsWait = OHToUInt(pCrdPri)

/* when PCrdGrant come before RetryAck, 16 entry CAM used to:
1. save {srcID, PCrdType}
Expand All @@ -171,7 +185,8 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
val pCamValids = Cat(pCam.map(_.valid))
val enqIdx = PriorityEncoder(~pCamValids.asUInt)

when (isPCrdGrant && !pCrdIsWait.orR){
// when (isPCrdGrant && !pCrdIsWait.orR){
when (isPCrdGrant){
pCam(enqIdx).valid := true.B
pCam(enqIdx).srcID.get := io.resps.rxrsp.respInfo.srcID.get
pCam(enqIdx).pCrdType.get := io.resps.rxrsp.respInfo.pCrdType.get
Expand Down Expand Up @@ -214,8 +229,10 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
m.io.resps.rxdat.valid := m.io.status.valid && io.resps.rxdat.valid && io.resps.rxdat.mshrId === i.U
m.io.resps.rxdat.bits := io.resps.rxdat.respInfo

m.io.resps.rxrsp.valid := (m.io.status.valid && io.resps.rxrsp.valid && !isPCrdGrant && io.resps.rxrsp.mshrId === i.U) || (isPCrdGrant && pCrdPri(i))
m.io.resps.rxrsp.bits := io.resps.rxrsp.respInfo
// m.io.resps.rxrsp.valid := (m.io.status.valid && io.resps.rxrsp.valid && !isPCrdGrant && io.resps.rxrsp.mshrId === i.U) || (isPCrdGrant && pCrdPri(i))
// m.io.resps.rxrsp.bits := io.resps.rxrsp.respInfo
m.io.resps.rxrsp.valid := (m.io.status.valid && rxrspValid && !isPCrdGrantReg && rxrspMshrId === i.U) || (isPCrdGrantReg && pCrdPri(i))
m.io.resps.rxrsp.bits := rxrspInfo

m.io.replResp.valid := io.replResp.valid && io.replResp.bits.mshrId === i.U
m.io.replResp.bits := io.replResp.bits
Expand All @@ -226,7 +243,7 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
m.io.aMergeTask.bits := io.aMergeTask.bits.task

waitPCrdInfo(i) := m.io.waitPCrdInfo
m.io.pCamPri := (pCamPri === i.U) && waitPCrdInfo(i).valid
m.io.pCamPri := 0.U /*(pCamPri === i.U) && waitPCrdInfo(i).valid*/
}
/* Reserve 1 entry for SinkB */
io.waitPCrdInfo <> waitPCrdInfo
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/coupledL2/tl2chi/Slice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle]
reqBuf.io.mshrInfo := mshrCtl.io.msInfo
reqBuf.io.mainPipeBlock := mainPipe.io.toReqBuf
reqBuf.io.s1Entrance := reqArb.io.s1Entrance
reqBuf.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2

mainPipe.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2
mainPipe.io.taskInfo_s1 := reqArb.io.taskInfo_s1
Expand Down
11 changes: 7 additions & 4 deletions src/main/scala/coupledL2/tl2chi/TXREQ.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,22 @@ class TXREQ(implicit p: Parameters) extends TL2CHIL2Module {
require(chiOpt.isDefined)

// TODO: an mshrsAll-entry queue is too much, evaluate for a proper size later
val queue = Module(new Queue(new CHIREQ, entries = mshrsAll, flow = true))
val queue = Module(new Queue(new CHIREQ, entries = mshrsAll, flow = false))

// Back pressure logic from TXREQ
val queueCnt = queue.io.count
// TODO: this may be imprecise, review this later
val pipeStatus_s1_s5 = io.pipeStatusVec
val pipeStatus_s2_s5 = pipeStatus_s1_s5.tail
val pipeStatus_s1 = pipeStatus_s1_s5.head
// inflightCnt equals the number of reqs on s2~s5 that may flow into TXREQ soon, plus queueCnt.
val pipeStatus_s2 = pipeStatus_s1_s5(1)
val s2ReturnCredit = pipeStatus_s2.valid && !(pipeStatus_s2.bits.mshrTask && pipeStatus_s2.bits.toTXREQ)
// inflightCnt equals the number of reqs on s2~s5 that may flow into TXREQ soon, plus queueCnt.
// The calculation of inflightCnt might be imprecise and leads to false positive back pressue.
val inflightCnt = PopCount(Cat(pipeStatus_s2_s5.map(s => s.valid && s.bits.mshrTask && s.bits.toTXREQ))) +
pipeStatus_s1.valid.asUInt +
// pipeStatus_s1.valid.asUInt +
queueCnt
s2ReturnCredit.asUInt + 1.U //Fix Timing: s1 always take credit and s2 return if not take
val noSpace = inflightCnt >= mshrsAll.U

io.toReqArb.blockMSHRReqEntrance := noSpace
Expand All @@ -73,4 +76,4 @@ class TXREQ(implicit p: Parameters) extends TL2CHIL2Module {
io.out.bits.tgtID := SAM(sam).lookup(io.out.bits.addr)
io.out.bits.size := log2Ceil(blockBytes).U(SIZE_WIDTH.W) // TODO
io.out.bits.addr := restoreAddressUInt(queue.io.deq.bits.addr, io.sliceId)
}
}
4 changes: 2 additions & 2 deletions src/main/scala/coupledL2/tl2chi/TXRSP.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class TXRSP(implicit p: Parameters) extends TL2CHIL2Module {
require(chiOpt.isDefined)

// TODO: an mshrsAll-entry queue is too much, evaluate for a proper size later
val queue = Module(new Queue(new CHIRSP, entries = mshrsAll, flow = true))
val queue = Module(new Queue(new CHIRSP, entries = mshrsAll, flow = false))

// Back pressure logic from TXRSP
val queueCnt = queue.io.count
Expand Down Expand Up @@ -88,4 +88,4 @@ class TXRSP(implicit p: Parameters) extends TL2CHIL2Module {
// TODO: Finish this
rsp
}
}
}
1 change: 1 addition & 0 deletions src/main/scala/coupledL2/tl2tl/Slice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] {
a_reqBuf.io.mshrInfo := mshrCtl.io.msInfo
a_reqBuf.io.mainPipeBlock := mainPipe.io.toReqBuf
a_reqBuf.io.s1Entrance := reqArb.io.s1Entrance
a_reqBuf.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2
sinkB.io.msInfo := mshrCtl.io.msInfo
sinkC.io.msInfo := mshrCtl.io.msInfo

Expand Down
Loading