diff --git a/src/main/scala/coupledL2/BaseSlice.scala b/src/main/scala/coupledL2/BaseSlice.scala index 1d23180d..26872a24 100644 --- a/src/main/scala/coupledL2/BaseSlice.scala +++ b/src/main/scala/coupledL2/BaseSlice.scala @@ -35,6 +35,8 @@ abstract class BaseSliceIO[T_OUT <: BaseOuterBundle](implicit p: Parameters) ext // val msStatus = topDownOpt.map(_ => Vec(mshrsAll, ValidIO(new MSHRStatus))) val dirResult = topDownOpt.map(_ => ValidIO(new DirResult)) val latePF = topDownOpt.map(_ => Output(Bool())) + val cmoReq = Flipped(DecoupledIO(new RVA23CMOReq())) + val cmoResp = DecoupledIO(new RVA23CMOResp()) } abstract class BaseSlice[T_OUT <: BaseOuterBundle](implicit p: Parameters) extends L2Module { diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index 133d34f3..f2854d54 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -79,6 +79,9 @@ class TaskBundle(implicit p: Parameters) extends L2Bundle val useProbeData = Bool() // data source, true for ReleaseBuf and false for RefillBuf val mshrRetry = Bool() // is retry task for mshr conflict + // For CMO request + val cmoTask = Bool() + // For Intent val fromL2pft = prefetchOpt.map(_ => Bool()) // Is the prefetch req from L2(BOP) or from L1 prefetch? // If true, MSHR should send an ack to L2 prefetcher. @@ -245,6 +248,7 @@ class FSMState(implicit p: Parameters) extends L2Bundle { // val s_grantack = Bool() // respond grantack downwards, moved to GrantBuf // val s_triggerprefetch = prefetchOpt.map(_ => Bool()) val s_retry = Bool() // need retry when conflict + val s_cmoresp = Bool() // resp upwards for finishing cmo inst // wait val w_rprobeackfirst = Bool() @@ -318,6 +322,16 @@ class L2ToL1Hint(implicit p: Parameters) extends Bundle { val isKeyword = Bool() // miss entry keyword } +// custom l2 - l1 CMO inst req +class RVA23CMOReq(implicit p: Parameters) extends Bundle { + val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero + val address = UInt(64.W) +} +// custom l2 - l1 CMO inst resp(ack) +class RVA23CMOResp(implicit p: Parameters) extends Bundle { + val address = UInt(64.W) +} + // custom l2 - l1 tlb // FIXME lyq: Tlbcmd and TlbExceptionBundle, how to use L1 corresponding bundles? object TlbCmd { diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index bb7a9b9f..c8eace6b 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -77,6 +77,7 @@ trait HasCoupledL2Parameters { def hasTPPrefetcher = prefetchers.exists(_.isInstanceOf[TPParameters]) def hasPrefetchBit = prefetchers.exists(_.hasPrefetchBit) // !! TODO.test this def hasPrefetchSrc = prefetchers.exists(_.hasPrefetchSrc) + def hasRVA23CMO = cacheParams.hasRVA23CMO def topDownOpt = if(cacheParams.elaboratedTopDown) Some(true) else None def enableHintGuidedGrant = true @@ -223,6 +224,8 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has if(hasReceiver) Some(BundleBridgeSink(Some(() => new PrefetchRecv))) else None val tpmeta_source_node = if(hasTPPrefetcher) Some(BundleBridgeSource(() => DecoupledIO(new TPmetaReq))) else None val tpmeta_sink_node = if(hasTPPrefetcher) Some(BundleBridgeSink(Some(() => ValidIO(new TPmetaResp)))) else None + val cmo_sink_node = if(hasRVA23CMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOReq)))) else None + val cmo_source_node = if(hasRVA23CMO) Some(BundleBridgeSource(Some(() => DecoupledIO(new RVA23CMOResp)))) else None val managerPortParams = (m: TLSlavePortParameters) => TLSlavePortParameters.v1( m.managers.map { m => @@ -439,9 +442,27 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has s.tlb_req.resp.ready := true.B } + cmo_sink_node match { + case Some(x) => + slice.io.cmoReq.valid := x.in.head._1.valid && bank_eq(x.in.head._1.bits.address >> offsetBits, i, bankBits) + slice.io.cmoReq.bits := x.in.head._1.bits + x.in.head._1.ready := slice.io.cmoReq.ready + case None => + slice.io.cmoReq.valid := false.B + slice.io.cmoReq.bits.opcode := 0.U + slice.io.cmoReq.bits.address := 0.U + slice.io.cmoResp.ready := false.B + } + slice } + cmo_source_node match { + case Some(x) => + fastArb(slices.map(_.io.cmoResp), x.out.head._1, Some("cmo_resp")) + case None => + } + // Refill hint if (enableHintGuidedGrant) { // for timing consideration, hint should latch one cycle before sending to L1 diff --git a/src/main/scala/coupledL2/L2Param.scala b/src/main/scala/coupledL2/L2Param.scala index 1cfcd13c..ecb7f3a3 100644 --- a/src/main/scala/coupledL2/L2Param.scala +++ b/src/main/scala/coupledL2/L2Param.scala @@ -104,6 +104,8 @@ case class L2Param( elaboratedTopDown: Boolean = true, // env FPGAPlatform: Boolean = false, + // CMO + hasRVA23CMO: Boolean = false, // Network layer SAM sam: Seq[(AddressSet, Int)] = Seq(AddressSet.everything -> 0) diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 88b6283a..3f028866 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -41,6 +41,7 @@ class RequestArb(implicit p: Parameters) extends L2Module val sinkB = Flipped(DecoupledIO(new TaskBundle)) val sinkC = Flipped(DecoupledIO(new TaskBundle)) val mshrTask = Flipped(DecoupledIO(new TaskBundle)) + val cmoTask = if (hasRVA23CMO) Some(Flipped(DecoupledIO(new TaskBundle))) else None /* read/write directory */ val dirRead_s1 = DecoupledIO(new DirRead()) // To directory, read meta/tag @@ -92,6 +93,13 @@ class RequestArb(implicit p: Parameters) extends L2Module val s2_ready = Wire(Bool()) val mshr_task_s1 = RegInit(0.U.asTypeOf(Valid(new TaskBundle()))) + val cmo_task_s1 = Wire(Valid(new TaskBundle())) + cmo_task_s1.valid := (if (io.cmoTask.isDefined) io.dirRead_s1.ready && io.cmoTask.get.valid && resetFinish else false.B) + cmo_task_s1.bits := (if (io.cmoTask.isDefined) io.cmoTask.get.bits else 0.U.asTypeOf(new TaskBundle)) + if (io.cmoTask.isDefined) { + io.cmoTask.get.ready := io.dirRead_s1.ready && resetFinish && s2_ready + } + val s1_needs_replRead = mshr_task_s1.valid && mshr_task_s1.bits.fromA && mshr_task_s1.bits.replTask && ( mshr_task_s1.bits.opcode(2, 1) === Grant(2, 1) || mshr_task_s1.bits.opcode === AccessAckData || @@ -101,7 +109,7 @@ class RequestArb(implicit p: Parameters) extends L2Module /* ======== Stage 0 ======== */ // if mshr_task_s1 is replRead, it might stall and wait for dirRead.ready, so we block new mshrTask from entering // TODO: will cause msTask path vacant for one-cycle after replRead, since not use Flow so as to avoid ready propagation - io.mshrTask.ready := !io.fromGrantBuffer.blockMSHRReqEntrance && !s1_needs_replRead && !(mshr_task_s1.valid && !s2_ready) && + io.mshrTask.ready := !io.fromGrantBuffer.blockMSHRReqEntrance && !s1_needs_replRead && !(mshr_task_s1.valid && !s2_ready) && !cmo_task_s1.valid && (if (io.fromSourceC.isDefined) !io.fromSourceC.get.blockMSHRReqEntrance else true.B) && (if (io.fromTXDAT.isDefined) !io.fromTXDAT.get.blockMSHRReqEntrance else true.B) && (if (io.fromTXRSP.isDefined) !io.fromTXRSP.get.blockMSHRReqEntrance else true.B) && @@ -140,7 +148,7 @@ class RequestArb(implicit p: Parameters) extends L2Module // TODO: A Hint is allowed to enter if !s2_ready for mcp2_stall - val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && s2_ready + val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && !cmo_task_s1.valid && s2_ready io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) // SinkC prior to SinkA & SinkB io.sinkB.ready := sink_ready_basic && !block_B && !sinkValids(0) // SinkB prior to SinkA @@ -152,7 +160,7 @@ class RequestArb(implicit p: Parameters) extends L2Module // mshr_task_s1 is s1_[reg] // task_s1 is [wire] to s2_reg - val task_s1 = Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1) + val task_s1 = Mux(cmo_task_s1.valid, cmo_task_s1, Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1)) val s1_to_s2_valid = task_s1.valid && !mshr_replRead_stall s1_cango := task_s1.valid && !mshr_replRead_stall @@ -163,7 +171,7 @@ class RequestArb(implicit p: Parameters) extends L2Module /* Meta read request */ // ^ only sinkA/B/C tasks need to read directory - io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1) + io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1 || cmo_task_s1.valid) io.dirRead_s1.bits.set := task_s1.bits.set io.dirRead_s1.bits.tag := task_s1.bits.tag // invalid way which causes mshr_retry diff --git a/src/main/scala/coupledL2/SinkCMO.scala b/src/main/scala/coupledL2/SinkCMO.scala new file mode 100644 index 00000000..c9d334b4 --- /dev/null +++ b/src/main/scala/coupledL2/SinkCMO.scala @@ -0,0 +1,74 @@ +/** ************************************************************************************* + * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences + * Copyright (c) 2020-2021 Peng Cheng Laboratory + * + * XiangShan is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * + * See the Mulan PSL v2 for more details. + * ************************************************************************************* + */ + +package coupledL2 + +import chisel3._ +import chisel3.util._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.tilelink.TLMessages._ +import org.chipsalliance.cde.config.Parameters +import utility.MemReqSource + + +// SinkCMO receives upwards CMO_Inst Req, and send it to RequestArb directly +class SinkCMO(implicit p: Parameters) extends L2Module { + val io = IO(new Bundle() { + val cmoReq = Flipped(DecoupledIO(new RVA23CMOReq())) + val task = DecoupledIO(new TaskBundle) + }) + + val task = Wire(new TaskBundle) + task := 0.U.asTypeOf(new TaskBundle) + task.cmoTask := true.B + task.channel := "b000".U + task.txChannel := 0.U + task.tag := parseAddress(io.cmoReq.bits.address)._1 + task.set := parseAddress(io.cmoReq.bits.address)._2 + task.off := parseAddress(io.cmoReq.bits.address)._3 + task.alias.foreach(_ := 0.U) + task.vaddr.foreach(_ := 0.U) + task.isKeyword.foreach(_ := false.B) + task.opcode := io.cmoReq.bits.opcode + task.param := 0.U + task.size := 0.U + task.sourceId := 0.U(sourceIdBits.W) + task.bufIdx := 0.U(bufIdxBits.W) + task.needProbeAckData := false.B + task.mshrTask := false.B + task.mshrId := 0.U(mshrBits.W) + task.aliasTask.foreach(_ := false.B) + task.useProbeData := false.B + task.mshrRetry := false.B + task.fromL2pft.foreach(_ := false.B) + task.needHint.foreach(_ := false.B) + task.dirty := false.B + task.way := 0.U(wayBits.W) + task.meta := 0.U.asTypeOf(new MetaEntry) + task.metaWen := false.B + task.tagWen := false.B + task.dsWen := false.B + task.wayMask := Fill(cacheParams.ways, "b1".U) + task.reqSource := MemReqSource.NoWhere.id.U // Ignore + task.replTask := false.B + task.mergeA := false.B + task.aMergeTask := 0.U.asTypeOf(new MergeTaskBundle) + + io.task.valid := io.cmoReq.valid + io.task.bits := task + io.cmoReq.ready := io.task.ready +} diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 4a335f4e..60e84772 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -39,6 +39,7 @@ class MSHRTasks(implicit p: Parameters) extends TL2CHIL2Bundle { val source_b = DecoupledIO(new SourceBReq) val mainpipe = DecoupledIO(new TaskBundle) // To Mainpipe (SourceC or SourceD) // val prefetchTrain = prefetchOpt.map(_ => DecoupledIO(new PrefetchTrain)) // To prefetcher + val cmoResp = DecoupledIO(new RVA23CMOResp()) // To L1 CMO_channel } class MSHRResps(implicit p: Parameters) extends TL2CHIL2Bundle { @@ -156,6 +157,10 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { val snpToN = isSnpToN(req_chiOpcode) val snpToB = isSnpToB(req_chiOpcode) + val req_cmoClean = req.cmoTask && req.opcode === 0.U + val req_cmoFlush = req.cmoTask && req.opcode === 1.U + val req_cmoInval = req.cmoTask && req.opcode === 2.U + /** * About which snoop should echo SnpRespData[Fwded] instead of SnpResp[Fwded]: * 1. When the snooped block is dirty, always echo SnpRespData[Fwded], except for SnpMakeInvalid*, SnpStash*, @@ -203,10 +208,11 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { /* ======== Task allocation ======== */ // The first Release with AllowRetry = 1 is sent to main pipe, because the task needs to write DS. // The second Release with AllowRetry = 0 is sent to TXREQ directly, because DS is already written. - val release_valid1 = !state.s_release && state.w_rprobeacklast && state.w_grantlast && state.w_replResp + val release_valid1 = (!state.s_release && state.w_rprobeacklast && state.w_grantlast && state.w_replResp) || (!state.s_release && state.w_rprobeacklast && state.w_replResp && (req_cmoClean || req_cmoFlush)) val release_valid2 = !state.s_reissue.getOrElse(false.B) && !state.w_releaseack && gotRetryAck && gotPCrdGrant // Theoretically, data to be released is saved in ReleaseBuffer, so Acquire can be sent as soon as req enters mshr - io.tasks.txreq.valid := !state.s_acquire || + // For cmo_clean/flush, dirty data should be released downward first, then Clean req can be sent + io.tasks.txreq.valid := !state.s_acquire && !((req_cmoClean || req_cmoFlush) && (!state.w_releaseack || !state.w_rprobeacklast)) || !state.s_reissue.getOrElse(false.B) && !state.w_grant && gotRetryAck && gotPCrdGrant || release_valid2 io.tasks.txrsp.valid := !state.s_compack.get && state.w_grantlast @@ -224,6 +230,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { mp_cbwrdata_valid || mp_dct_valid // io.tasks.prefetchTrain.foreach(t => t.valid := !state.s_triggerprefetch.getOrElse(true.B)) + io.tasks.cmoResp.valid := !state.s_cmoresp && state.w_grantlast && state.w_rprobeacklast + io.tasks.cmoResp.bits.address := 0.U when ( pending_grant_valid && @@ -301,15 +309,16 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { */ val isWriteBackFull = isT(meta.state) && meta.dirty || probeDirty val isEvict = !isWriteBackFull - oa.opcode := Mux( - release_valid2, - Mux(isWriteBackFull, WriteBackFull, Evict), - ParallelPriorityMux(Seq( - (req.opcode === AcquirePerm && req.param === NtoT) -> MakeUnique, - req_needT -> ReadUnique, - req_needB /* Default */ -> ReadNotSharedDirty - )) - ) + oa.opcode := ParallelPriorityMux(Seq( + req_cmoClean -> CleanShared, + req_cmoFlush -> CleanInvalid, + req_cmoInval -> MakeInvalid, + (release_valid2 && isWriteBackFull) -> WriteBackFull, + (release_valid2 && !isWriteBackFull) -> Evict, + (req.opcode === AcquirePerm && req.param === NtoT) -> MakeUnique, + req_needT -> ReadUnique, + req_needB /* Default */ -> ReadNotSharedDirty + )) oa.size := log2Ceil(blockBytes).U oa.addr := Cat(Mux(release_valid2, dirResult.tag, req.tag), req.set, 0.U(offsetBits.W)) oa.ns := false.B @@ -317,7 +326,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { oa.allowRetry := state.s_reissue.getOrElse(false.B) oa.order := OrderEncodings.None oa.pCrdType := Mux(!state.s_reissue.getOrElse(false.B), pcrdtype, 0.U) - oa.expCompAck := !release_valid2 + oa.expCompAck := !release_valid2 && !req_cmoInval && !req_cmoClean && !req_cmoFlush oa.memAttr := MemAttr( cacheable = true.B, allocate = !(release_valid2 && isEvict), @@ -346,9 +355,13 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { Mux(snpToN, toN, toT) ), Mux( - req_get && dirResult.hit && meta.state === TRUNK, - toB, - toN + req.cmoTask, + toN, + Mux( + req_get && dirResult.hit && meta.state === TRUNK, + toB, + toN + ) ) ) ob.alias.foreach(_ := meta.alias.getOrElse(0.U)) @@ -385,7 +398,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { mp_release.fromL2pft.foreach(_ := false.B) mp_release.needHint.foreach(_ := false.B) mp_release.dirty := false.B//meta.dirty && meta.state =/= INVALID || probeDirty - mp_release.metaWen := false.B + mp_release.metaWen := (req_cmoClean || req_cmoFlush) // when clean/flush, invalid line by mshr(when replace, invalid by directory) mp_release.meta := MetaEntry() mp_release.tagWen := false.B mp_release.dsWen := true.B // write refillData to DS @@ -786,6 +799,9 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { state.s_dct.get := true.B } } + when (io.tasks.cmoResp.fire) { + state.s_cmoresp := true.B + } /* Handling response @@ -817,6 +833,10 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { when (isToN(c_resp.bits.param)) { probeGotN := true.B } + when ((req_cmoClean || req_cmoFlush) && c_resp.bits.opcode === ProbeAckData) { + state.s_release := false.B + state.w_releaseack := false.B + } } val rxdatIsU = rxdat.bits.resp.get === UC @@ -942,7 +962,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { state.s_compack.getOrElse(true.B) && state.s_cbwrdata.getOrElse(true.B) && state.s_reissue.getOrElse(true.B) && - state.s_dct.getOrElse(true.B) + state.s_dct.getOrElse(true.B) && + state.s_cmoresp val no_wait = state.w_rprobeacklast && state.w_pprobeacklast && state.w_grantlast && state.w_releaseack && state.w_replResp val will_free = no_schedule && no_wait when (will_free && req_valid) { diff --git a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala index 8bee1d78..1a95b361 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala @@ -62,6 +62,7 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val toTXREQ = DecoupledIO(new CHIREQ()) val toTXRSP = DecoupledIO(new CHIRSP()) // TODO: unify with main pipe, which should be TaskBundle val toSourceB = DecoupledIO(new TLBundleB(edgeIn.bundle)) + val cmoResp = DecoupledIO(new RVA23CMOResp()) /* to block sourceB from sending same-addr probe until GrantAck received */ val grantStatus = Input(Vec(grantBufInflightSize, new GrantStatus())) @@ -260,6 +261,9 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes sourceB.io.grantStatus := io.grantStatus io.toSourceB <> sourceB.io.sourceB + /* CMO Ack upwards */ + fastArb(mshrs.map(_.io.tasks.cmoResp), io.cmoResp, Some("cmo_resp")) + /* Arbitrate MSHR task to RequestArbiter */ fastArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 3bffc211..9b48eac4 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -141,12 +141,17 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val meta_s3 = dirResult_s3.meta val req_s3 = task_s3.bits + val cmo_req_s3 = req_s3.cmoTask val mshr_req_s3 = req_s3.mshrTask - val sink_req_s3 = !mshr_req_s3 - val sinkA_req_s3 = !mshr_req_s3 && req_s3.fromA - val sinkB_req_s3 = !mshr_req_s3 && req_s3.fromB - val sinkC_req_s3 = !mshr_req_s3 && req_s3.fromC - + val sink_req_s3 = !mshr_req_s3 && !cmo_req_s3 + val sinkA_req_s3 = !mshr_req_s3 && !cmo_req_s3 && req_s3.fromA + val sinkB_req_s3 = !mshr_req_s3 && !cmo_req_s3 && req_s3.fromB + val sinkC_req_s3 = !mshr_req_s3 && !cmo_req_s3 && req_s3.fromC + + val cmo_clean_s3 = cmo_req_s3 && req_s3.opcode === 0.U + val cmo_flush_s3 = cmo_req_s3 && req_s3.opcode === 1.U + val cmo_inval_s3 = cmo_req_s3 && req_s3.opcode === 2.U + val req_acquire_s3 = sinkA_req_s3 && (req_s3.opcode === AcquireBlock || req_s3.opcode === AcquirePerm) val req_acquireBlock_s3 = sinkA_req_s3 && req_s3.opcode === AcquireBlock val req_prefetch_s3 = sinkA_req_s3 && req_s3.opcode === Hint @@ -227,7 +232,10 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val need_dct_s3_b = doFwd || doFwdHitRelease // DCT val need_mshr_s3_b = need_pprobe_s3_b || need_dct_s3_b - val need_mshr_s3 = need_mshr_s3_a || need_mshr_s3_b + val need_mshr_s3_cmo = cmo_inval_s3 || cmo_clean_s3 || cmo_flush_s3 + val need_probe_s3_cmo = (cmo_inval_s3 || cmo_clean_s3 || cmo_flush_s3) && meta_has_clients_s3 && dirResult_s3.hit + + val need_mshr_s3 = need_mshr_s3_a || need_mshr_s3_b || need_mshr_s3_cmo /* Signals to MSHR Ctl */ val alloc_state = WireInit(0.U.asTypeOf(new FSMState())) @@ -417,6 +425,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes ) val metaW_valid_s3_c = sinkC_req_s3 && dirResult_s3.hit val metaW_valid_s3_mshr = mshr_req_s3 && req_s3.metaWen && !(mshr_refill_s3 && retry) + val metaW_valid_s3_cmo = cmo_inval_s3 && dirResult_s3.hit require(clientBits == 1) val metaW_s3_a_alias = Mux( @@ -449,6 +458,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes ) // use merge_meta if mergeA val metaW_s3_mshr = Mux(req_s3.mergeA, req_s3.aMergeTask.meta, req_s3.meta) + val metaW_s3_cmo = MetaEntry() // invalid the block val metaW_way = Mux( mshr_refill_s3 && req_s3.replTask, @@ -457,15 +467,15 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes ) io.metaWReq.valid := !resetFinish || task_s3.valid && ( - metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr + metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr || metaW_valid_s3_cmo ) io.metaWReq.bits.set := Mux(resetFinish, req_s3.set, resetIdx) io.metaWReq.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) io.metaWReq.bits.wmeta := Mux( resetFinish, ParallelPriorityMux( - Seq(metaW_valid_s3_a, metaW_valid_s3_b, metaW_valid_s3_c, metaW_valid_s3_mshr), - Seq(metaW_s3_a, metaW_s3_b, metaW_s3_c, metaW_s3_mshr) + Seq(metaW_valid_s3_a, metaW_valid_s3_b, metaW_valid_s3_c, metaW_valid_s3_mshr, metaW_valid_s3_cmo), + Seq(metaW_s3_a, metaW_s3_b, metaW_s3_c, metaW_s3_mshr, metaW_s3_cmo) ), MetaEntry() ) @@ -805,6 +815,29 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes } } + when (req_s3.cmoTask) { + alloc_state.s_cmoresp := false.B + // need Acquire downwards + when (cmo_inval_s3 || cmo_clean_s3 || cmo_flush_s3) { + alloc_state.s_acquire := false.B + alloc_state.s_compack.get := true.B + alloc_state.w_grantfirst := false.B + alloc_state.w_grantlast := false.B + alloc_state.w_grant := false.B + } + // need Probe for clean client cache + when (need_probe_s3_cmo) { + alloc_state.s_rprobe := false.B + alloc_state.w_rprobeackfirst := false.B + alloc_state.w_rprobeacklast := false.B + } + // need Release dirty block downwards + when ((cmo_clean_s3 || cmo_flush_s3) && dirResult_s3.hit && meta_s3.dirty) { + alloc_state.s_release := false.B + alloc_state.w_releaseack := false.B + } + } + val d = Seq(d_s5, d_s4, d_s3) val txreq = Seq(txreq_s5, txreq_s4, txreq_s3) val txrsp = Seq(txrsp_s5, txrsp_s4, txrsp_s3) diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index d112a1b4..4d3310a1 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -41,6 +41,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] val sinkA = Module(new SinkA) val sinkC = Module(new SinkC) val grantBuf = Module(new GrantBuffer) + val sinkCMO = Module(new SinkCMO) /* Downwards CHI-related modules */ val txreq = Module(new TXREQ()) @@ -96,6 +97,8 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] reqArb.io.sinkB <> rxsnp.io.task reqArb.io.sinkC <> sinkC.io.task reqArb.io.mshrTask <> mshrCtl.io.mshrTask + reqArb.io.cmoTask.foreach(_ := sinkCMO.io.task) + if (!hasRVA23CMO) { sinkCMO.io.task.ready := false.B } reqArb.io.fromMSHRCtl := mshrCtl.io.toReqArb reqArb.io.fromMainPipe := mainPipe.io.toReqArb reqArb.io.fromGrantBuffer := grantBuf.io.toReqArb @@ -197,6 +200,8 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] sinkC.io.c <> inBuf.c(io.in.c) io.in.d <> inBuf.d(grantBuf.io.d) grantBuf.io.e <> inBuf.e(io.in.e) + sinkCMO.io.cmoReq <> io.cmoReq + io.cmoResp <> mshrCtl.io.cmoResp /* Connect downwards channels */ io.out.tx.req <> txreq.io.out diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index b7e410e1..51f15412 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -230,6 +230,7 @@ class MainPipe(implicit p: Parameters) extends L2Module { ms_task.snpHitRelease := false.B ms_task.snpHitReleaseWithData := false.B ms_task.snpHitReleaseIdx := 0.U + ms_task.cmoTask := false.B /* ======== Resps to SinkA/B/C Reqs ======== */ val sink_resp_s3 = WireInit(0.U.asTypeOf(Valid(new TaskBundle))) // resp for sinkA/B/C request that does not need to alloc mshr diff --git a/src/main/scala/coupledL2/tl2tl/SinkB.scala b/src/main/scala/coupledL2/tl2tl/SinkB.scala index d97b88f0..bea2a3cc 100644 --- a/src/main/scala/coupledL2/tl2tl/SinkB.scala +++ b/src/main/scala/coupledL2/tl2tl/SinkB.scala @@ -70,6 +70,7 @@ class SinkB(implicit p: Parameters) extends L2Module { task.snpHitRelease := false.B task.snpHitReleaseWithData := false.B task.snpHitReleaseIdx := 0.U + task.cmoTask := false.B task } val task = fromTLBtoTaskBundle(io.b.bits) diff --git a/src/main/scala/coupledL2/tl2tl/Slice.scala b/src/main/scala/coupledL2/tl2tl/Slice.scala index b36fd4c2..f88c5753 100644 --- a/src/main/scala/coupledL2/tl2tl/Slice.scala +++ b/src/main/scala/coupledL2/tl2tl/Slice.scala @@ -159,6 +159,9 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] { sinkC.io.c <> inBuf.c(io.in.c) io.in.d <> inBuf.d(grantBuf.io.d) grantBuf.io.e <> inBuf.e(io.in.e) + io.cmoReq.ready := false.B + io.cmoResp.valid := false.B + io.cmoResp.bits := 0.U.asTypeOf(new RVA23CMOResp) /* connect downward channels */ io.out.a <> outBuf.a(mshrCtl.io.sourceA)