Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RVA23 CMO: support RVA23 Zicbom & Zicboz #225

Merged
merged 15 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/scala/coupledL2/BaseSlice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ abstract class BaseSliceIO[T_OUT <: BaseOuterBundle](implicit p: Parameters) ext
// val msStatus = topDownOpt.map(_ => Vec(mshrsAll, ValidIO(new MSHRStatus)))
val dirResult = topDownOpt.map(_ => ValidIO(new DirResult))
val latePF = topDownOpt.map(_ => Output(Bool()))
val cmoReq = Flipped(DecoupledIO(new RVA23CMOReq()))
val cmoResp = DecoupledIO(new RVA23CMOResp())
}

abstract class BaseSlice[T_OUT <: BaseOuterBundle](implicit p: Parameters) extends L2Module {
Expand Down
14 changes: 14 additions & 0 deletions src/main/scala/coupledL2/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class TaskBundle(implicit p: Parameters) extends L2Bundle
val useProbeData = Bool() // data source, true for ReleaseBuf and false for RefillBuf
val mshrRetry = Bool() // is retry task for mshr conflict

// For CMO request
val cmoTask = Bool()

// For Intent
val fromL2pft = prefetchOpt.map(_ => Bool()) // Is the prefetch req from L2(BOP) or from L1 prefetch?
// If true, MSHR should send an ack to L2 prefetcher.
Expand Down Expand Up @@ -245,6 +248,7 @@ class FSMState(implicit p: Parameters) extends L2Bundle {
// val s_grantack = Bool() // respond grantack downwards, moved to GrantBuf
// val s_triggerprefetch = prefetchOpt.map(_ => Bool())
val s_retry = Bool() // need retry when conflict
val s_cmoresp = Bool() // resp upwards for finishing cmo inst

// wait
val w_rprobeackfirst = Bool()
Expand Down Expand Up @@ -318,6 +322,16 @@ class L2ToL1Hint(implicit p: Parameters) extends Bundle {
val isKeyword = Bool() // miss entry keyword
}

// custom l2 - l1 CMO inst req
class RVA23CMOReq(implicit p: Parameters) extends Bundle {
val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero
val address = UInt(64.W)
}
// custom l2 - l1 CMO inst resp(ack)
class RVA23CMOResp(implicit p: Parameters) extends Bundle {
val address = UInt(64.W)
}

// custom l2 - l1 tlb
// FIXME lyq: Tlbcmd and TlbExceptionBundle, how to use L1 corresponding bundles?
object TlbCmd {
Expand Down
21 changes: 21 additions & 0 deletions src/main/scala/coupledL2/CoupledL2.scala
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ trait HasCoupledL2Parameters {
def hasTPPrefetcher = prefetchers.exists(_.isInstanceOf[TPParameters])
def hasPrefetchBit = prefetchers.exists(_.hasPrefetchBit) // !! TODO.test this
def hasPrefetchSrc = prefetchers.exists(_.hasPrefetchSrc)
def hasRVA23CMO = cacheParams.hasRVA23CMO
def topDownOpt = if(cacheParams.elaboratedTopDown) Some(true) else None

def enableHintGuidedGrant = true
Expand Down Expand Up @@ -223,6 +224,8 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
if(hasReceiver) Some(BundleBridgeSink(Some(() => new PrefetchRecv))) else None
val tpmeta_source_node = if(hasTPPrefetcher) Some(BundleBridgeSource(() => DecoupledIO(new TPmetaReq))) else None
val tpmeta_sink_node = if(hasTPPrefetcher) Some(BundleBridgeSink(Some(() => ValidIO(new TPmetaResp)))) else None
val cmo_sink_node = if(hasRVA23CMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOReq)))) else None
val cmo_source_node = if(hasRVA23CMO) Some(BundleBridgeSource(Some(() => DecoupledIO(new RVA23CMOResp)))) else None

val managerPortParams = (m: TLSlavePortParameters) => TLSlavePortParameters.v1(
m.managers.map { m =>
Expand Down Expand Up @@ -439,9 +442,27 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
s.tlb_req.resp.ready := true.B
}

cmo_sink_node match {
case Some(x) =>
slice.io.cmoReq.valid := x.in.head._1.valid && bank_eq(x.in.head._1.bits.address >> offsetBits, i, bankBits)
slice.io.cmoReq.bits := x.in.head._1.bits
x.in.head._1.ready := slice.io.cmoReq.ready
case None =>
slice.io.cmoReq.valid := false.B
slice.io.cmoReq.bits.opcode := 0.U
slice.io.cmoReq.bits.address := 0.U
slice.io.cmoResp.ready := false.B
}

slice
}

cmo_source_node match {
case Some(x) =>
fastArb(slices.map(_.io.cmoResp), x.out.head._1, Some("cmo_resp"))
case None =>
}

// Refill hint
if (enableHintGuidedGrant) {
// for timing consideration, hint should latch one cycle before sending to L1
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/coupledL2/L2Param.scala
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ case class L2Param(
elaboratedTopDown: Boolean = true,
// env
FPGAPlatform: Boolean = false,
// CMO
hasRVA23CMO: Boolean = false,

// Network layer SAM
sam: Seq[(AddressSet, Int)] = Seq(AddressSet.everything -> 0)
Expand Down
16 changes: 12 additions & 4 deletions src/main/scala/coupledL2/RequestArb.scala
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class RequestArb(implicit p: Parameters) extends L2Module
val sinkB = Flipped(DecoupledIO(new TaskBundle))
val sinkC = Flipped(DecoupledIO(new TaskBundle))
val mshrTask = Flipped(DecoupledIO(new TaskBundle))
val cmoTask = if (hasRVA23CMO) Some(Flipped(DecoupledIO(new TaskBundle))) else None

/* read/write directory */
val dirRead_s1 = DecoupledIO(new DirRead()) // To directory, read meta/tag
Expand Down Expand Up @@ -92,6 +93,13 @@ class RequestArb(implicit p: Parameters) extends L2Module
val s2_ready = Wire(Bool())
val mshr_task_s1 = RegInit(0.U.asTypeOf(Valid(new TaskBundle())))

val cmo_task_s1 = Wire(Valid(new TaskBundle()))
cmo_task_s1.valid := (if (io.cmoTask.isDefined) io.dirRead_s1.ready && io.cmoTask.get.valid && resetFinish else false.B)
cmo_task_s1.bits := (if (io.cmoTask.isDefined) io.cmoTask.get.bits else 0.U.asTypeOf(new TaskBundle))
if (io.cmoTask.isDefined) {
io.cmoTask.get.ready := io.dirRead_s1.ready && resetFinish && s2_ready
}

val s1_needs_replRead = mshr_task_s1.valid && mshr_task_s1.bits.fromA && mshr_task_s1.bits.replTask && (
mshr_task_s1.bits.opcode(2, 1) === Grant(2, 1) ||
mshr_task_s1.bits.opcode === AccessAckData ||
Expand All @@ -101,7 +109,7 @@ class RequestArb(implicit p: Parameters) extends L2Module
/* ======== Stage 0 ======== */
// if mshr_task_s1 is replRead, it might stall and wait for dirRead.ready, so we block new mshrTask from entering
// TODO: will cause msTask path vacant for one-cycle after replRead, since not use Flow so as to avoid ready propagation
io.mshrTask.ready := !io.fromGrantBuffer.blockMSHRReqEntrance && !s1_needs_replRead && !(mshr_task_s1.valid && !s2_ready) &&
io.mshrTask.ready := !io.fromGrantBuffer.blockMSHRReqEntrance && !s1_needs_replRead && !(mshr_task_s1.valid && !s2_ready) && !cmo_task_s1.valid &&
(if (io.fromSourceC.isDefined) !io.fromSourceC.get.blockMSHRReqEntrance else true.B) &&
(if (io.fromTXDAT.isDefined) !io.fromTXDAT.get.blockMSHRReqEntrance else true.B) &&
(if (io.fromTXRSP.isDefined) !io.fromTXRSP.get.blockMSHRReqEntrance else true.B) &&
Expand Down Expand Up @@ -140,7 +148,7 @@ class RequestArb(implicit p: Parameters) extends L2Module

// TODO: A Hint is allowed to enter if !s2_ready for mcp2_stall

val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && s2_ready
val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && !cmo_task_s1.valid && s2_ready

io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) // SinkC prior to SinkA & SinkB
io.sinkB.ready := sink_ready_basic && !block_B && !sinkValids(0) // SinkB prior to SinkA
Expand All @@ -152,7 +160,7 @@ class RequestArb(implicit p: Parameters) extends L2Module

// mshr_task_s1 is s1_[reg]
// task_s1 is [wire] to s2_reg
val task_s1 = Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1)
val task_s1 = Mux(cmo_task_s1.valid, cmo_task_s1, Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1))
val s1_to_s2_valid = task_s1.valid && !mshr_replRead_stall

s1_cango := task_s1.valid && !mshr_replRead_stall
Expand All @@ -163,7 +171,7 @@ class RequestArb(implicit p: Parameters) extends L2Module

/* Meta read request */
// ^ only sinkA/B/C tasks need to read directory
io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1)
io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1 || cmo_task_s1.valid)
io.dirRead_s1.bits.set := task_s1.bits.set
io.dirRead_s1.bits.tag := task_s1.bits.tag
// invalid way which causes mshr_retry
Expand Down
74 changes: 74 additions & 0 deletions src/main/scala/coupledL2/SinkCMO.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/** *************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
* *************************************************************************************
*/

package coupledL2

import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.tilelink.TLMessages._
import org.chipsalliance.cde.config.Parameters
import utility.MemReqSource


// SinkCMO receives upwards CMO_Inst Req, and send it to RequestArb directly
class SinkCMO(implicit p: Parameters) extends L2Module {
val io = IO(new Bundle() {
val cmoReq = Flipped(DecoupledIO(new RVA23CMOReq()))
val task = DecoupledIO(new TaskBundle)
})

val task = Wire(new TaskBundle)
task := 0.U.asTypeOf(new TaskBundle)
task.cmoTask := true.B
task.channel := "b000".U
task.txChannel := 0.U
task.tag := parseAddress(io.cmoReq.bits.address)._1
task.set := parseAddress(io.cmoReq.bits.address)._2
task.off := parseAddress(io.cmoReq.bits.address)._3
task.alias.foreach(_ := 0.U)
task.vaddr.foreach(_ := 0.U)
task.isKeyword.foreach(_ := false.B)
task.opcode := io.cmoReq.bits.opcode
task.param := 0.U
task.size := 0.U
task.sourceId := 0.U(sourceIdBits.W)
task.bufIdx := 0.U(bufIdxBits.W)
task.needProbeAckData := false.B
task.mshrTask := false.B
task.mshrId := 0.U(mshrBits.W)
task.aliasTask.foreach(_ := false.B)
task.useProbeData := false.B
task.mshrRetry := false.B
task.fromL2pft.foreach(_ := false.B)
task.needHint.foreach(_ := false.B)
task.dirty := false.B
task.way := 0.U(wayBits.W)
task.meta := 0.U.asTypeOf(new MetaEntry)
task.metaWen := false.B
task.tagWen := false.B
task.dsWen := false.B
task.wayMask := Fill(cacheParams.ways, "b1".U)
task.reqSource := MemReqSource.NoWhere.id.U // Ignore
task.replTask := false.B
task.mergeA := false.B
task.aMergeTask := 0.U.asTypeOf(new MergeTaskBundle)

io.task.valid := io.cmoReq.valid
io.task.bits := task
io.cmoReq.ready := io.task.ready
}
55 changes: 38 additions & 17 deletions src/main/scala/coupledL2/tl2chi/MSHR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class MSHRTasks(implicit p: Parameters) extends TL2CHIL2Bundle {
val source_b = DecoupledIO(new SourceBReq)
val mainpipe = DecoupledIO(new TaskBundle) // To Mainpipe (SourceC or SourceD)
// val prefetchTrain = prefetchOpt.map(_ => DecoupledIO(new PrefetchTrain)) // To prefetcher
val cmoResp = DecoupledIO(new RVA23CMOResp()) // To L1 CMO_channel
}

class MSHRResps(implicit p: Parameters) extends TL2CHIL2Bundle {
Expand Down Expand Up @@ -156,6 +157,10 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
val snpToN = isSnpToN(req_chiOpcode)
val snpToB = isSnpToB(req_chiOpcode)

val req_cmoClean = req.cmoTask && req.opcode === 0.U
val req_cmoFlush = req.cmoTask && req.opcode === 1.U
val req_cmoInval = req.cmoTask && req.opcode === 2.U

/**
* About which snoop should echo SnpRespData[Fwded] instead of SnpResp[Fwded]:
* 1. When the snooped block is dirty, always echo SnpRespData[Fwded], except for SnpMakeInvalid*, SnpStash*,
Expand Down Expand Up @@ -203,10 +208,11 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
/* ======== Task allocation ======== */
// The first Release with AllowRetry = 1 is sent to main pipe, because the task needs to write DS.
// The second Release with AllowRetry = 0 is sent to TXREQ directly, because DS is already written.
val release_valid1 = !state.s_release && state.w_rprobeacklast && state.w_grantlast && state.w_replResp
val release_valid1 = (!state.s_release && state.w_rprobeacklast && state.w_grantlast && state.w_replResp) || (!state.s_release && state.w_rprobeacklast && state.w_replResp && (req_cmoClean || req_cmoFlush))
val release_valid2 = !state.s_reissue.getOrElse(false.B) && !state.w_releaseack && gotRetryAck && gotPCrdGrant
// Theoretically, data to be released is saved in ReleaseBuffer, so Acquire can be sent as soon as req enters mshr
io.tasks.txreq.valid := !state.s_acquire ||
// For cmo_clean/flush, dirty data should be released downward first, then Clean req can be sent
io.tasks.txreq.valid := !state.s_acquire && !((req_cmoClean || req_cmoFlush) && (!state.w_releaseack || !state.w_rprobeacklast)) ||
!state.s_reissue.getOrElse(false.B) && !state.w_grant && gotRetryAck && gotPCrdGrant ||
release_valid2
io.tasks.txrsp.valid := !state.s_compack.get && state.w_grantlast
Expand All @@ -224,6 +230,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
mp_cbwrdata_valid ||
mp_dct_valid
// io.tasks.prefetchTrain.foreach(t => t.valid := !state.s_triggerprefetch.getOrElse(true.B))
io.tasks.cmoResp.valid := !state.s_cmoresp && state.w_grantlast && state.w_rprobeacklast
io.tasks.cmoResp.bits.address := 0.U

when (
pending_grant_valid &&
Expand Down Expand Up @@ -301,23 +309,24 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
*/
val isWriteBackFull = isT(meta.state) && meta.dirty || probeDirty
val isEvict = !isWriteBackFull
oa.opcode := Mux(
release_valid2,
Mux(isWriteBackFull, WriteBackFull, Evict),
ParallelPriorityMux(Seq(
(req.opcode === AcquirePerm && req.param === NtoT) -> MakeUnique,
req_needT -> ReadUnique,
req_needB /* Default */ -> ReadNotSharedDirty
))
)
oa.opcode := ParallelPriorityMux(Seq(
req_cmoClean -> CleanShared,
req_cmoFlush -> CleanInvalid,
req_cmoInval -> MakeInvalid,
(release_valid2 && isWriteBackFull) -> WriteBackFull,
(release_valid2 && !isWriteBackFull) -> Evict,
(req.opcode === AcquirePerm && req.param === NtoT) -> MakeUnique,
req_needT -> ReadUnique,
req_needB /* Default */ -> ReadNotSharedDirty
))
oa.size := log2Ceil(blockBytes).U
oa.addr := Cat(Mux(release_valid2, dirResult.tag, req.tag), req.set, 0.U(offsetBits.W))
oa.ns := false.B
oa.likelyshared := false.B
oa.allowRetry := state.s_reissue.getOrElse(false.B)
oa.order := OrderEncodings.None
oa.pCrdType := Mux(!state.s_reissue.getOrElse(false.B), pcrdtype, 0.U)
oa.expCompAck := !release_valid2
oa.expCompAck := !release_valid2 && !req_cmoInval && !req_cmoClean && !req_cmoFlush
oa.memAttr := MemAttr(
cacheable = true.B,
allocate = !(release_valid2 && isEvict),
Expand Down Expand Up @@ -346,9 +355,13 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
Mux(snpToN, toN, toT)
),
Mux(
req_get && dirResult.hit && meta.state === TRUNK,
toB,
toN
req.cmoTask,
toN,
Mux(
req_get && dirResult.hit && meta.state === TRUNK,
toB,
toN
)
)
)
ob.alias.foreach(_ := meta.alias.getOrElse(0.U))
Expand Down Expand Up @@ -385,7 +398,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
mp_release.fromL2pft.foreach(_ := false.B)
mp_release.needHint.foreach(_ := false.B)
mp_release.dirty := false.B//meta.dirty && meta.state =/= INVALID || probeDirty
mp_release.metaWen := false.B
mp_release.metaWen := (req_cmoClean || req_cmoFlush) // when clean/flush, invalid line by mshr(when replace, invalid by directory)
mp_release.meta := MetaEntry()
mp_release.tagWen := false.B
mp_release.dsWen := true.B // write refillData to DS
Expand Down Expand Up @@ -786,6 +799,9 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
state.s_dct.get := true.B
}
}
when (io.tasks.cmoResp.fire) {
state.s_cmoresp := true.B
}

/* Handling response

Expand Down Expand Up @@ -817,6 +833,10 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
when (isToN(c_resp.bits.param)) {
probeGotN := true.B
}
when ((req_cmoClean || req_cmoFlush) && c_resp.bits.opcode === ProbeAckData) {
state.s_release := false.B
state.w_releaseack := false.B
}
}

val rxdatIsU = rxdat.bits.resp.get === UC
Expand Down Expand Up @@ -942,7 +962,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
state.s_compack.getOrElse(true.B) &&
state.s_cbwrdata.getOrElse(true.B) &&
state.s_reissue.getOrElse(true.B) &&
state.s_dct.getOrElse(true.B)
state.s_dct.getOrElse(true.B) &&
state.s_cmoresp
val no_wait = state.w_rprobeacklast && state.w_pprobeacklast && state.w_grantlast && state.w_releaseack && state.w_replResp
val will_free = no_schedule && no_wait
when (will_free && req_valid) {
Expand Down
4 changes: 4 additions & 0 deletions src/main/scala/coupledL2/tl2chi/MSHRCtl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes
val toTXREQ = DecoupledIO(new CHIREQ())
val toTXRSP = DecoupledIO(new CHIRSP()) // TODO: unify with main pipe, which should be TaskBundle
val toSourceB = DecoupledIO(new TLBundleB(edgeIn.bundle))
val cmoResp = DecoupledIO(new RVA23CMOResp())

/* to block sourceB from sending same-addr probe until GrantAck received */
val grantStatus = Input(Vec(grantBufInflightSize, new GrantStatus()))
Expand Down Expand Up @@ -260,6 +261,9 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes
sourceB.io.grantStatus := io.grantStatus
io.toSourceB <> sourceB.io.sourceB

/* CMO Ack upwards */
fastArb(mshrs.map(_.io.tasks.cmoResp), io.cmoResp, Some("cmo_resp"))

/* Arbitrate MSHR task to RequestArbiter */
fastArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task"))

Expand Down
Loading
Loading