From 3e8cf35e17b2ca09817a801a527d7de2065a9457 Mon Sep 17 00:00:00 2001 From: cai luoshan Date: Thu, 25 Jul 2024 13:13:03 +0800 Subject: [PATCH 01/12] RVA23 CMO: Support Clean/Flush/Inval Inst for L2 --- src/main/scala/coupledL2/BaseSlice.scala | 1 + src/main/scala/coupledL2/Common.scala | 10 +++ src/main/scala/coupledL2/CoupledL2.scala | 13 ++++ src/main/scala/coupledL2/RequestArb.scala | 13 +++- src/main/scala/coupledL2/SinkCMO.scala | 74 +++++++++++++++++++ src/main/scala/coupledL2/SourceB.scala | 2 +- src/main/scala/coupledL2/tl2chi/MSHR.scala | 39 ++++++---- .../scala/coupledL2/tl2chi/MainPipe.scala | 52 ++++++++++--- src/main/scala/coupledL2/tl2chi/Slice.scala | 3 + 9 files changed, 179 insertions(+), 28 deletions(-) create mode 100644 src/main/scala/coupledL2/SinkCMO.scala diff --git a/src/main/scala/coupledL2/BaseSlice.scala b/src/main/scala/coupledL2/BaseSlice.scala index 1d23180d..e0e09e7e 100644 --- a/src/main/scala/coupledL2/BaseSlice.scala +++ b/src/main/scala/coupledL2/BaseSlice.scala @@ -35,6 +35,7 @@ abstract class BaseSliceIO[T_OUT <: BaseOuterBundle](implicit p: Parameters) ext // val msStatus = topDownOpt.map(_ => Vec(mshrsAll, ValidIO(new MSHRStatus))) val dirResult = topDownOpt.map(_ => ValidIO(new DirResult)) val latePF = topDownOpt.map(_ => Output(Bool())) + val cmoReq = DecoupledIO(new RVA23CMOReq()) } abstract class BaseSlice[T_OUT <: BaseOuterBundle](implicit p: Parameters) extends L2Module { diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index f7189739..37228a3d 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -79,6 +79,9 @@ class TaskBundle(implicit p: Parameters) extends L2Bundle val useProbeData = Bool() // data source, true for ReleaseBuf and false for RefillBuf val mshrRetry = Bool() // is retry task for mshr conflict + // For CMO request + val cmoTask = Bool() + // For Intent val fromL2pft = prefetchOpt.map(_ => Bool()) // Is the prefetch req from L2(BOP) or from L1 prefetch? // If true, MSHR should send an ack to L2 prefetcher. @@ -284,6 +287,7 @@ class SourceBReq(implicit p: Parameters) extends L2Bundle { val opcode = UInt(3.W) val param = UInt(bdWidth.W) val alias = aliasBitsOpt.map(_ => UInt(aliasBitsOpt.get.W)) + val needData = UInt(1.W) } class BlockInfo(implicit p: Parameters) extends L2Bundle { @@ -319,6 +323,12 @@ class L2ToL1Hint(implicit p: Parameters) extends Bundle { val isKeyword = Bool() // miss entry keyword } +// custom l2 - l1 CMO inst req +class RVA23CMOReq(implicit p: Parameters) extends Bundle { + val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero + val address = UInt(64.W) +} + // custom l2 - l1 tlb // FIXME lyq: Tlbcmd and TlbExceptionBundle, how to use L1 corresponding bundles? object TlbCmd { diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index bb7a9b9f..5535906d 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -77,6 +77,7 @@ trait HasCoupledL2Parameters { def hasTPPrefetcher = prefetchers.exists(_.isInstanceOf[TPParameters]) def hasPrefetchBit = prefetchers.exists(_.hasPrefetchBit) // !! TODO.test this def hasPrefetchSrc = prefetchers.exists(_.hasPrefetchSrc) + def hasRVA23CMO = true def topDownOpt = if(cacheParams.elaboratedTopDown) Some(true) else None def enableHintGuidedGrant = true @@ -223,6 +224,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has if(hasReceiver) Some(BundleBridgeSink(Some(() => new PrefetchRecv))) else None val tpmeta_source_node = if(hasTPPrefetcher) Some(BundleBridgeSource(() => DecoupledIO(new TPmetaReq))) else None val tpmeta_sink_node = if(hasTPPrefetcher) Some(BundleBridgeSink(Some(() => ValidIO(new TPmetaResp)))) else None + val cmo_sink_node = if(hasRVA23CMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOReq)))) else None val managerPortParams = (m: TLSlavePortParameters) => TLSlavePortParameters.v1( m.managers.map { m => @@ -439,6 +441,17 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has s.tlb_req.resp.ready := true.B } + cmo_sink_node match { + case Some(x) => + slice.io.cmoReq.valid := x.in.head._1.valid && bank_eq(x.in.head._1.bits.address >> offsetBits, i, bankBits) + slice.io.cmoReq.bits := x.in.head._1.bits + x.in.head._1.ready := slice.io.cmoReq.ready + case None => + slice.io.cmoReq.valid := false.B + slice.io.cmoReq.bits.opcode := 0.U + slice.io.cmoReq.bits.address := 0.U + } + slice } diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 1fdd506d..b69cd03a 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -40,6 +40,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { val sinkB = Flipped(DecoupledIO(new TaskBundle)) val sinkC = Flipped(DecoupledIO(new TaskBundle)) val mshrTask = Flipped(DecoupledIO(new TaskBundle)) + val cmoTask = Flipped(DecoupledIO(new TaskBundle)) /* read/write directory */ val dirRead_s1 = DecoupledIO(new DirRead()) // To directory, read meta/tag @@ -91,6 +92,10 @@ class RequestArb(implicit p: Parameters) extends L2Module { val s2_ready = Wire(Bool()) val mshr_task_s1 = RegInit(0.U.asTypeOf(Valid(new TaskBundle()))) + val cmo_task_s1 = Wire(Valid(new TaskBundle())) + cmo_task_s1.valid := io.dirRead_s1.ready && io.cmoTask.valid && resetFinish + cmo_task_s1.bits := io.cmoTask.bits + val s1_needs_replRead = mshr_task_s1.valid && mshr_task_s1.bits.fromA && mshr_task_s1.bits.replTask && ( mshr_task_s1.bits.opcode(2, 1) === Grant(2, 1) || mshr_task_s1.bits.opcode === AccessAckData || @@ -100,7 +105,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { /* ======== Stage 0 ======== */ // if mshr_task_s1 is replRead, it might stall and wait for dirRead.ready, so we block new mshrTask from entering // TODO: will cause msTask path vacant for one-cycle after replRead, since not use Flow so as to avoid ready propagation - io.mshrTask.ready := !io.fromGrantBuffer.blockMSHRReqEntrance && !s1_needs_replRead && !(mshr_task_s1.valid && !s2_ready) + io.mshrTask.ready := !io.fromGrantBuffer.blockMSHRReqEntrance && !s1_needs_replRead && !(mshr_task_s1.valid && !s2_ready) && !cmo_task_s1.valid (if (io.fromSourceC.isDefined) !io.fromSourceC.get.blockMSHRReqEntrance else true.B) && (if (io.fromTXDAT.isDefined) !io.fromTXDAT.get.blockMSHRReqEntrance else true.B) && (if (io.fromTXRSP.isDefined) !io.fromTXRSP.get.blockMSHRReqEntrance else true.B) && @@ -139,7 +144,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { // TODO: A Hint is allowed to enter if !s2_ready for mcp2_stall - val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && s2_ready + val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && !cmo_task_s1.valid && s2_ready io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) // SinkC prior to SinkA & SinkB io.sinkB.ready := sink_ready_basic && !block_B && !sinkValids(0) // SinkB prior to SinkA @@ -151,7 +156,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { // mshr_task_s1 is s1_[reg] // task_s1 is [wire] to s2_reg - val task_s1 = Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1) + val task_s1 = Mux(cmo_task_s1.valid, cmo_task_s1, Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1)) val s1_to_s2_valid = task_s1.valid && !mshr_replRead_stall s1_cango := task_s1.valid && !mshr_replRead_stall @@ -162,7 +167,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { /* Meta read request */ // ^ only sinkA/B/C tasks need to read directory - io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1) + io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1 || cmo_task_s1.valid) io.dirRead_s1.bits.set := task_s1.bits.set io.dirRead_s1.bits.tag := task_s1.bits.tag // invalid way which causes mshr_retry diff --git a/src/main/scala/coupledL2/SinkCMO.scala b/src/main/scala/coupledL2/SinkCMO.scala new file mode 100644 index 00000000..c9d334b4 --- /dev/null +++ b/src/main/scala/coupledL2/SinkCMO.scala @@ -0,0 +1,74 @@ +/** ************************************************************************************* + * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences + * Copyright (c) 2020-2021 Peng Cheng Laboratory + * + * XiangShan is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * + * See the Mulan PSL v2 for more details. + * ************************************************************************************* + */ + +package coupledL2 + +import chisel3._ +import chisel3.util._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.tilelink.TLMessages._ +import org.chipsalliance.cde.config.Parameters +import utility.MemReqSource + + +// SinkCMO receives upwards CMO_Inst Req, and send it to RequestArb directly +class SinkCMO(implicit p: Parameters) extends L2Module { + val io = IO(new Bundle() { + val cmoReq = Flipped(DecoupledIO(new RVA23CMOReq())) + val task = DecoupledIO(new TaskBundle) + }) + + val task = Wire(new TaskBundle) + task := 0.U.asTypeOf(new TaskBundle) + task.cmoTask := true.B + task.channel := "b000".U + task.txChannel := 0.U + task.tag := parseAddress(io.cmoReq.bits.address)._1 + task.set := parseAddress(io.cmoReq.bits.address)._2 + task.off := parseAddress(io.cmoReq.bits.address)._3 + task.alias.foreach(_ := 0.U) + task.vaddr.foreach(_ := 0.U) + task.isKeyword.foreach(_ := false.B) + task.opcode := io.cmoReq.bits.opcode + task.param := 0.U + task.size := 0.U + task.sourceId := 0.U(sourceIdBits.W) + task.bufIdx := 0.U(bufIdxBits.W) + task.needProbeAckData := false.B + task.mshrTask := false.B + task.mshrId := 0.U(mshrBits.W) + task.aliasTask.foreach(_ := false.B) + task.useProbeData := false.B + task.mshrRetry := false.B + task.fromL2pft.foreach(_ := false.B) + task.needHint.foreach(_ := false.B) + task.dirty := false.B + task.way := 0.U(wayBits.W) + task.meta := 0.U.asTypeOf(new MetaEntry) + task.metaWen := false.B + task.tagWen := false.B + task.dsWen := false.B + task.wayMask := Fill(cacheParams.ways, "b1".U) + task.reqSource := MemReqSource.NoWhere.id.U // Ignore + task.replTask := false.B + task.mergeA := false.B + task.aMergeTask := 0.U.asTypeOf(new MergeTaskBundle) + + io.task.valid := io.cmoReq.valid + io.task.bits := task + io.cmoReq.ready := io.task.ready +} diff --git a/src/main/scala/coupledL2/SourceB.scala b/src/main/scala/coupledL2/SourceB.scala index 5fb75629..a1852747 100644 --- a/src/main/scala/coupledL2/SourceB.scala +++ b/src/main/scala/coupledL2/SourceB.scala @@ -59,7 +59,7 @@ class SourceB(implicit p: Parameters) extends L2Module { b.source := dcacheSourceIdStart b.address := Cat(task.tag, task.set, 0.U(offsetBits.W)) b.mask := Fill(beatBytes, 1.U(1.W)) - b.data := Cat(task.alias.getOrElse(0.U), 0.U(1.W)) // this is the same as HuanCun + b.data := Cat(task.alias.getOrElse(0.U), task.needData) //Cat(task.alias.getOrElse(0.U), 0.U(1.W)) // this is the same as HuanCun b.corrupt := false.B b } diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 911fed59..767cc6b2 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -159,6 +159,10 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { val snpToN = isSnpToN(req_chiOpcode) val snpToB = isSnpToB(req_chiOpcode) + val req_cmoClean = req.cmoTask && req.opcode === 0.U + val req_cmoFlush = req.cmoTask && req.opcode === 1.U + val req_cmoInval = req.cmoTask && req.opcode === 2.U + /** * About which snoop should echo SnpRespData[Fwded] instead of SnpResp[Fwded]: * 1. When the snooped block is dirty, always echo SnpRespData[Fwded], except for SnpMakeInvalid*, SnpStash*, @@ -209,7 +213,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { val release_valid1 = !state.s_release && state.w_rprobeacklast && state.w_grantlast && state.w_replResp val release_valid2 = !state.s_reissue.getOrElse(false.B) && !state.w_releaseack && gotRetryAck && gotPCrdGrant // Theoretically, data to be released is saved in ReleaseBuffer, so Acquire can be sent as soon as req enters mshr - io.tasks.txreq.valid := !state.s_acquire || + // For cmo_clean/flush, dirty data should be released downward first, then Clean req can be sent + io.tasks.txreq.valid := !state.s_acquire && !((req_cmoClean || req_cmoFlush) && !state.w_releaseack) || !state.s_reissue.getOrElse(false.B) && !state.w_grant && gotRetryAck && gotPCrdGrant || release_valid2 io.tasks.txrsp.valid := !state.s_compack.get && state.w_grantlast @@ -304,15 +309,16 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { */ val isWriteBackFull = isT(meta.state) && meta.dirty || probeDirty val isEvict = !isWriteBackFull - oa.opcode := Mux( - release_valid2, - Mux(isWriteBackFull, WriteBackFull, Evict), - ParallelPriorityMux(Seq( - (req.opcode === AcquirePerm && req.param === NtoT) -> MakeUnique, - req_needT -> ReadUnique, - req_needB /* Default */ -> ReadNotSharedDirty - )) - ) + oa.opcode := ParallelPriorityMux(Seq( + req_cmoClean -> CleanShared, + req_cmoFlush -> CleanInvalid, + req_cmoInval -> MakeInvalid, + (release_valid2 && isWriteBackFull) -> WriteBackFull, + (release_valid2 && !isWriteBackFull) -> Evict, + (req.opcode === AcquirePerm && req.param === NtoT) -> MakeUnique, + req_needT -> ReadUnique, + req_needB /* Default */ -> ReadNotSharedDirty + )) oa.size := log2Ceil(blockBytes).U oa.addr := Cat(Mux(release_valid2, dirResult.tag, req.tag), req.set, 0.U(offsetBits.W)) oa.ns := false.B @@ -320,7 +326,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { oa.allowRetry := state.s_reissue.getOrElse(false.B) oa.order := OrderEncodings.None oa.pCrdType := Mux(!state.s_reissue.getOrElse(false.B), pcrdtype, 0.U) - oa.expCompAck := !release_valid2 + oa.expCompAck := !release_valid2 && !req_cmoInval && !req_cmoClean && !req_cmoFlush oa.memAttr := MemAttr( cacheable = true.B, allocate = !(release_valid2 && isEvict), @@ -349,12 +355,17 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { Mux(snpToN, toN, toT) ), Mux( - req_get && dirResult.hit && meta.state === TRUNK, - toB, - toN + req.cmoTask, + toN, + Mux( + req_get && dirResult.hit && meta.state === TRUNK, + toB, + toN + ) ) ) ob.alias.foreach(_ := meta.alias.getOrElse(0.U)) + ob.needData := Mux(req_cmoInval, 1.U, 0.U) // probe L1 toN and donot writeback dirty data ob } diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 18915d92..02872333 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -142,12 +142,17 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module { val meta_s3 = dirResult_s3.meta val req_s3 = task_s3.bits + val cmo_req_s3 = req_s3.cmoTask val mshr_req_s3 = req_s3.mshrTask - val sink_req_s3 = !mshr_req_s3 - val sinkA_req_s3 = !mshr_req_s3 && req_s3.fromA - val sinkB_req_s3 = !mshr_req_s3 && req_s3.fromB - val sinkC_req_s3 = !mshr_req_s3 && req_s3.fromC - + val sink_req_s3 = !mshr_req_s3 && !cmo_req_s3 + val sinkA_req_s3 = !mshr_req_s3 && !cmo_req_s3 && req_s3.fromA + val sinkB_req_s3 = !mshr_req_s3 && !cmo_req_s3 && req_s3.fromB + val sinkC_req_s3 = !mshr_req_s3 && !cmo_req_s3 && req_s3.fromC + + val cmo_clean_s3 = cmo_req_s3 && req_s3.opcode === 0.U + val cmo_flush_s3 = cmo_req_s3 && req_s3.opcode === 1.U + val cmo_inval_s3 = cmo_req_s3 && req_s3.opcode === 2.U + val req_acquire_s3 = sinkA_req_s3 && (req_s3.opcode === AcquireBlock || req_s3.opcode === AcquirePerm) val req_acquireBlock_s3 = sinkA_req_s3 && req_s3.opcode === AcquireBlock val req_prefetch_s3 = sinkA_req_s3 && req_s3.opcode === Hint @@ -228,7 +233,10 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module { val need_dct_s3_b = doFwd || doFwdHitRelease // DCT val need_mshr_s3_b = need_pprobe_s3_b || need_dct_s3_b - val need_mshr_s3 = need_mshr_s3_a || need_mshr_s3_b + val need_mshr_s3_cmo = cmo_inval_s3 || cmo_clean_s3 || cmo_flush_s3 + val need_probe_s3_cmo = (cmo_inval_s3 || cmo_clean_s3 || cmo_flush_s3) && meta_has_clients_s3 && dirResult_s3.hit + + val need_mshr_s3 = need_mshr_s3_a || need_mshr_s3_b || need_mshr_s3_cmo /* Signals to MSHR Ctl */ val alloc_state = WireInit(0.U.asTypeOf(new FSMState())) @@ -418,6 +426,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module { ) val metaW_valid_s3_c = sinkC_req_s3 && dirResult_s3.hit val metaW_valid_s3_mshr = mshr_req_s3 && req_s3.metaWen && !(mshr_refill_s3 && retry) + val metaW_valid_s3_cmo = cmo_inval_s3 && dirResult_s3.hit require(clientBits == 1) val metaW_s3_a_alias = Mux( @@ -450,6 +459,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module { ) // use merge_meta if mergeA val metaW_s3_mshr = Mux(req_s3.mergeA, req_s3.aMergeTask.meta, req_s3.meta) + val metaW_s3_cmo = MetaEntry() // invalid the block val metaW_way = Mux( mshr_refill_s3 && req_s3.replTask, @@ -458,15 +468,15 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module { ) io.metaWReq.valid := !resetFinish || task_s3.valid && ( - metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr + metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr || metaW_valid_s3_cmo ) io.metaWReq.bits.set := Mux(resetFinish, req_s3.set, resetIdx) io.metaWReq.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) io.metaWReq.bits.wmeta := Mux( resetFinish, ParallelPriorityMux( - Seq(metaW_valid_s3_a, metaW_valid_s3_b, metaW_valid_s3_c, metaW_valid_s3_mshr), - Seq(metaW_s3_a, metaW_s3_b, metaW_s3_c, metaW_s3_mshr) + Seq(metaW_valid_s3_a, metaW_valid_s3_b, metaW_valid_s3_c, metaW_valid_s3_mshr, metaW_valid_s3_cmo), + Seq(metaW_s3_a, metaW_s3_b, metaW_s3_c, metaW_s3_mshr, metaW_s3_cmo) ), MetaEntry() ) @@ -806,6 +816,30 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module { } } + when (req_s3.cmoTask) { + alloc_state.s_refill := true.B + alloc_state.w_replResp := true.B + // need Acquire downwards + when (cmo_inval_s3 || cmo_clean_s3 || cmo_flush_s3) { + alloc_state.s_acquire := false.B + alloc_state.s_compack.get := true.B + alloc_state.w_grantfirst := false.B + alloc_state.w_grantlast := false.B + alloc_state.w_grant := false.B + } + // need Probe for clean client cache + when (need_probe_s3_cmo) { + alloc_state.s_rprobe := false.B + alloc_state.w_rprobeackfirst := false.B + alloc_state.w_rprobeacklast := false.B + } + // need Release dirty block downwards + when ((cmo_clean_s3 || cmo_flush_s3) && dirResult_s3.hit && meta_s3.dirty) { + alloc_state.s_release := false.B + alloc_state.w_releaseack := false.B + } + } + val d = Seq(d_s5, d_s4, d_s3) val txreq = Seq(txreq_s5, txreq_s4, txreq_s3) val txrsp = Seq(txrsp_s5, txrsp_s4, txrsp_s3) diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index e57f6756..aa62cc38 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -40,6 +40,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] val sinkA = Module(new SinkA) val sinkC = Module(new SinkC) val grantBuf = Module(new GrantBuffer) + val sinkCMO = Module(new SinkCMO) /* Downwards CHI-related modules */ val txreq = Module(new TXREQ()) @@ -95,6 +96,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] reqArb.io.sinkB <> rxsnp.io.task reqArb.io.sinkC <> sinkC.io.task reqArb.io.mshrTask <> mshrCtl.io.mshrTask + reqArb.io.cmoTask <> sinkCMO.io.task reqArb.io.fromMSHRCtl := mshrCtl.io.toReqArb reqArb.io.fromMainPipe := mainPipe.io.toReqArb reqArb.io.fromGrantBuffer := grantBuf.io.toReqArb @@ -198,6 +200,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] sinkC.io.c <> inBuf.c(io.in.c) io.in.d <> inBuf.d(grantBuf.io.d) grantBuf.io.e <> inBuf.e(io.in.e) + sinkCMO.io.cmoReq <> io.cmoReq /* Connect downwards channels */ io.out.tx.req <> txreq.io.out From a1c1429fb812fe28fbdc035e25365bc3313dc30d Mon Sep 17 00:00:00 2001 From: cai luoshan Date: Thu, 25 Jul 2024 16:19:58 +0800 Subject: [PATCH 02/12] RVA23 CMO: Add CMO Ack Channel to L1 --- src/main/scala/coupledL2/BaseSlice.scala | 1 + src/main/scala/coupledL2/Common.scala | 5 +++++ src/main/scala/coupledL2/CoupledL2.scala | 7 +++++++ src/main/scala/coupledL2/tl2chi/MSHR.scala | 8 +++++++- src/main/scala/coupledL2/tl2chi/MSHRCtl.scala | 4 ++++ src/main/scala/coupledL2/tl2chi/MainPipe.scala | 3 +-- src/main/scala/coupledL2/tl2chi/Slice.scala | 1 + 7 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/main/scala/coupledL2/BaseSlice.scala b/src/main/scala/coupledL2/BaseSlice.scala index e0e09e7e..ece8e12f 100644 --- a/src/main/scala/coupledL2/BaseSlice.scala +++ b/src/main/scala/coupledL2/BaseSlice.scala @@ -36,6 +36,7 @@ abstract class BaseSliceIO[T_OUT <: BaseOuterBundle](implicit p: Parameters) ext val dirResult = topDownOpt.map(_ => ValidIO(new DirResult)) val latePF = topDownOpt.map(_ => Output(Bool())) val cmoReq = DecoupledIO(new RVA23CMOReq()) + val cmoResp = DecoupledIO(new RVA23CMOResp()) } abstract class BaseSlice[T_OUT <: BaseOuterBundle](implicit p: Parameters) extends L2Module { diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index 37228a3d..26a8ab5a 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -249,6 +249,7 @@ class FSMState(implicit p: Parameters) extends L2Bundle { // val s_grantack = Bool() // respond grantack downwards, moved to GrantBuf // val s_triggerprefetch = prefetchOpt.map(_ => Bool()) val s_retry = Bool() // need retry when conflict + val s_cmoresp = Bool() // resp upwards for finishing cmo inst // wait val w_rprobeackfirst = Bool() @@ -328,6 +329,10 @@ class RVA23CMOReq(implicit p: Parameters) extends Bundle { val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero val address = UInt(64.W) } +// custom l2 - l1 CMO inst resp(ack) +class RVA23CMOResp(implicit p: Parameters) extends Bundle { + val address = UInt(64.W) +} // custom l2 - l1 tlb // FIXME lyq: Tlbcmd and TlbExceptionBundle, how to use L1 corresponding bundles? diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index 5535906d..2923153f 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -225,6 +225,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has val tpmeta_source_node = if(hasTPPrefetcher) Some(BundleBridgeSource(() => DecoupledIO(new TPmetaReq))) else None val tpmeta_sink_node = if(hasTPPrefetcher) Some(BundleBridgeSink(Some(() => ValidIO(new TPmetaResp)))) else None val cmo_sink_node = if(hasRVA23CMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOReq)))) else None + val cmo_source_node = if(hasRVA23CMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOResp)))) else None val managerPortParams = (m: TLSlavePortParameters) => TLSlavePortParameters.v1( m.managers.map { m => @@ -455,6 +456,12 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has slice } + cmo_source_node match { + case Some(x) => + fastArb(slices.map(_.io.cmoResp), x.out.head._1, Some("cmo_resp")) + case None => + } + // Refill hint if (enableHintGuidedGrant) { // for timing consideration, hint should latch one cycle before sending to L1 diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 767cc6b2..96ade63f 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -44,6 +44,7 @@ class MSHRTasks(implicit p: Parameters) extends TL2CHIL2Bundle { val source_b = DecoupledIO(new SourceBReq) val mainpipe = DecoupledIO(new TaskBundle) // To Mainpipe (SourceC or SourceD) // val prefetchTrain = prefetchOpt.map(_ => DecoupledIO(new PrefetchTrain)) // To prefetcher + val cmoResp = DecoupledIO(new RVA23CMOResp()) // To L1 CMO_channel } class MSHRResps(implicit p: Parameters) extends TL2CHIL2Bundle { @@ -232,6 +233,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { mp_cbwrdata_valid || mp_dct_valid // io.tasks.prefetchTrain.foreach(t => t.valid := !state.s_triggerprefetch.getOrElse(true.B)) + io.tasks.cmoResp.valid := !state.s_cmoresp && state.w_grantlast && state.w_rprobeacklast when ( pending_grant_valid && @@ -800,6 +802,9 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { state.s_dct.get := true.B } } + when (io.tasks.cmoResp.fire) { + state.s_cmoresp := true.B + } /* Handling response @@ -941,7 +946,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { state.s_compack.getOrElse(true.B) && state.s_cbwrdata.getOrElse(true.B) && state.s_reissue.getOrElse(true.B) && - state.s_dct.getOrElse(true.B) + state.s_dct.getOrElse(true.B) && + state.s_cmoresp val no_wait = state.w_rprobeacklast && state.w_pprobeacklast && state.w_grantlast && state.w_releaseack && state.w_replResp val will_free = no_schedule && no_wait when (will_free && req_valid) { diff --git a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala index c3b0546a..e18b78cb 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala @@ -64,6 +64,7 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module { val toTXREQ = DecoupledIO(new CHIREQ()) val toTXRSP = DecoupledIO(new CHIRSP()) // TODO: unify with main pipe, which should be TaskBundle val toSourceB = DecoupledIO(new TLBundleB(edgeIn.bundle)) + val cmoResp = DecoupledIO(new RVA23CMOResp()) /* to block sourceB from sending same-addr probe until GrantAck received */ val grantStatus = Input(Vec(grantBufInflightSize, new GrantStatus())) @@ -273,6 +274,9 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module { sourceB.io.grantStatus := io.grantStatus io.toSourceB <> sourceB.io.sourceB + /* CMO Ack upwards */ + fastArb(mshrs.map(_.io.tasks.cmoResp), io.cmoResp, Some("cmo_resp")) + /* Arbitrate MSHR task to RequestArbiter */ fastArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 02872333..41d2352c 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -817,8 +817,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module { } when (req_s3.cmoTask) { - alloc_state.s_refill := true.B - alloc_state.w_replResp := true.B + alloc_state.s_cmoresp := false.B // need Acquire downwards when (cmo_inval_s3 || cmo_clean_s3 || cmo_flush_s3) { alloc_state.s_acquire := false.B diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index aa62cc38..af1078a8 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -201,6 +201,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] io.in.d <> inBuf.d(grantBuf.io.d) grantBuf.io.e <> inBuf.e(io.in.e) sinkCMO.io.cmoReq <> io.cmoReq + io.cmoResp <> mshrCtl.io.cmoResp /* Connect downwards channels */ io.out.tx.req <> txreq.io.out From 193845a3e7f602efaa85117fc56049a78b8564d4 Mon Sep 17 00:00:00 2001 From: cai luoshan Date: Sat, 27 Jul 2024 15:19:43 +0800 Subject: [PATCH 03/12] RVA23 CMO: fix bugs for io connection --- src/main/scala/coupledL2/BaseSlice.scala | 2 +- src/main/scala/coupledL2/CoupledL2.scala | 4 ++-- src/main/scala/coupledL2/RequestArb.scala | 1 + src/main/scala/coupledL2/tl2chi/MSHR.scala | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/scala/coupledL2/BaseSlice.scala b/src/main/scala/coupledL2/BaseSlice.scala index ece8e12f..26872a24 100644 --- a/src/main/scala/coupledL2/BaseSlice.scala +++ b/src/main/scala/coupledL2/BaseSlice.scala @@ -35,7 +35,7 @@ abstract class BaseSliceIO[T_OUT <: BaseOuterBundle](implicit p: Parameters) ext // val msStatus = topDownOpt.map(_ => Vec(mshrsAll, ValidIO(new MSHRStatus))) val dirResult = topDownOpt.map(_ => ValidIO(new DirResult)) val latePF = topDownOpt.map(_ => Output(Bool())) - val cmoReq = DecoupledIO(new RVA23CMOReq()) + val cmoReq = Flipped(DecoupledIO(new RVA23CMOReq())) val cmoResp = DecoupledIO(new RVA23CMOResp()) } diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index 2923153f..24572247 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -225,7 +225,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has val tpmeta_source_node = if(hasTPPrefetcher) Some(BundleBridgeSource(() => DecoupledIO(new TPmetaReq))) else None val tpmeta_sink_node = if(hasTPPrefetcher) Some(BundleBridgeSink(Some(() => ValidIO(new TPmetaResp)))) else None val cmo_sink_node = if(hasRVA23CMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOReq)))) else None - val cmo_source_node = if(hasRVA23CMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOResp)))) else None + val cmo_source_node = if(hasRVA23CMO) Some(BundleBridgeSource(Some(() => DecoupledIO(new RVA23CMOResp)))) else None val managerPortParams = (m: TLSlavePortParameters) => TLSlavePortParameters.v1( m.managers.map { m => @@ -461,7 +461,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has fastArb(slices.map(_.io.cmoResp), x.out.head._1, Some("cmo_resp")) case None => } - + // Refill hint if (enableHintGuidedGrant) { // for timing consideration, hint should latch one cycle before sending to L1 diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index b69cd03a..999b62f9 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -95,6 +95,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { val cmo_task_s1 = Wire(Valid(new TaskBundle())) cmo_task_s1.valid := io.dirRead_s1.ready && io.cmoTask.valid && resetFinish cmo_task_s1.bits := io.cmoTask.bits + io.cmoTask.ready := io.dirRead_s1.ready && resetFinish && s2_ready val s1_needs_replRead = mshr_task_s1.valid && mshr_task_s1.bits.fromA && mshr_task_s1.bits.replTask && ( mshr_task_s1.bits.opcode(2, 1) === Grant(2, 1) || diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 96ade63f..c154257d 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -234,6 +234,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { mp_dct_valid // io.tasks.prefetchTrain.foreach(t => t.valid := !state.s_triggerprefetch.getOrElse(true.B)) io.tasks.cmoResp.valid := !state.s_cmoresp && state.w_grantlast && state.w_rprobeacklast + io.tasks.cmoResp.bits.address := 0.U when ( pending_grant_valid && From c7ccf0479a9e9bb46810630021b339198f9a74ee Mon Sep 17 00:00:00 2001 From: cai luoshan Date: Sat, 27 Jul 2024 15:26:11 +0800 Subject: [PATCH 04/12] TestTop: add CMO nodes --- src/test/scala/chi/TestTop.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/test/scala/chi/TestTop.scala b/src/test/scala/chi/TestTop.scala index 718d7dda..9b78cad2 100644 --- a/src/test/scala/chi/TestTop.scala +++ b/src/test/scala/chi/TestTop.scala @@ -116,6 +116,19 @@ class TestTop_CHIL2(numCores: Int = 1, numULAgents: Int = 0, banks: Int = 1)(imp l2.mmioBridge.mmioNode := mmioClientNode } + val l1_cmo_sender_nodes = (0 until numCores).map(_ => BundleBridgeSource(() => DecoupledIO(new RVA23CMOReq))) + val l1_cmo_recver_nodes = (0 until numCores).map(_ => BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOResp)))) + l1_cmo_sender_nodes.zip(l2_nodes).zipWithIndex.foreach { case ((cmo_sender, l2), i) => + l2.cmo_sink_node.foreach { sink => + sink := cmo_sender + } + } + l1_cmo_recver_nodes.zip(l2_nodes).zipWithIndex.foreach { case ((cmo_recver, l2), i) => + l2.cmo_source_node.foreach { source => + cmo_recver := source + } + } + lazy val module = new LazyModuleImp(this){ val timer = WireDefault(0.U(64.W)) val logEnable = WireDefault(false.B) From 1c7c7ee3f857907ea9b38fdd325d54eabcd20637 Mon Sep 17 00:00:00 2001 From: cai luoshan Date: Mon, 29 Jul 2024 11:38:14 +0800 Subject: [PATCH 05/12] TestTop: add CMO io port --- src/test/scala/chi/TestTop.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/test/scala/chi/TestTop.scala b/src/test/scala/chi/TestTop.scala index 9b78cad2..a6f561c0 100644 --- a/src/test/scala/chi/TestTop.scala +++ b/src/test/scala/chi/TestTop.scala @@ -151,6 +151,14 @@ class TestTop_CHIL2(numCores: Int = 1, numULAgents: Int = 0, banks: Int = 1)(imp } } } + l1_cmo_sender_nodes.zipWithIndex.foreach{ + case (node, i) => + node.makeIOs()(ValName(s"cmo_sender_port_$i")) + } + l1_cmo_recver_nodes.zipWithIndex.foreach{ + case (node, i) => + node.makeIOs()(ValName(s"cmo_recver_port_$i")) + } val io = IO(Vec(numCores, new Bundle() { val chi = new PortIO From e888f0c7181833b3c13fbb6649d6b986c11170fd Mon Sep 17 00:00:00 2001 From: cai luoshan Date: Thu, 15 Aug 2024 20:18:24 +0800 Subject: [PATCH 06/12] RVA23 CMO: fix bug for clean & flush, release dirty data before acq_clean --- src/main/scala/coupledL2/tl2chi/MSHR.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 9905237d..15764331 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -208,7 +208,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { /* ======== Task allocation ======== */ // The first Release with AllowRetry = 1 is sent to main pipe, because the task needs to write DS. // The second Release with AllowRetry = 0 is sent to TXREQ directly, because DS is already written. - val release_valid1 = !state.s_release && state.w_rprobeacklast && state.w_grantlast && state.w_replResp + val release_valid1 = (!state.s_release && state.w_rprobeacklast && state.w_grantlast && state.w_replResp) || (!state.s_release && state.w_rprobeacklast && state.w_replResp && (req_cmoClean || req_cmoFlush)) val release_valid2 = !state.s_reissue.getOrElse(false.B) && !state.w_releaseack && gotRetryAck && gotPCrdGrant // Theoretically, data to be released is saved in ReleaseBuffer, so Acquire can be sent as soon as req enters mshr // For cmo_clean/flush, dirty data should be released downward first, then Clean req can be sent From 8289ebde93726c63985b841dc0346d2cdf18f841 Mon Sep 17 00:00:00 2001 From: Cai Luoshan Date: Mon, 19 Aug 2024 10:32:55 +0800 Subject: [PATCH 07/12] RVA23 CMO: fix bug for clean & flush, wait for probeack from L1 before acq_clean --- src/main/scala/coupledL2/tl2chi/MSHR.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 15764331..d0210f16 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -212,7 +212,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { val release_valid2 = !state.s_reissue.getOrElse(false.B) && !state.w_releaseack && gotRetryAck && gotPCrdGrant // Theoretically, data to be released is saved in ReleaseBuffer, so Acquire can be sent as soon as req enters mshr // For cmo_clean/flush, dirty data should be released downward first, then Clean req can be sent - io.tasks.txreq.valid := !state.s_acquire && !((req_cmoClean || req_cmoFlush) && !state.w_releaseack) || + io.tasks.txreq.valid := !state.s_acquire && !((req_cmoClean || req_cmoFlush) && (!state.w_releaseack || !state.w_rprobeacklast)) || !state.s_reissue.getOrElse(false.B) && !state.w_grant && gotRetryAck && gotPCrdGrant || release_valid2 io.tasks.txrsp.valid := !state.s_compack.get && state.w_grantlast From 51d788a4eb15a2e0b268e36d9b7bf031fea0154e Mon Sep 17 00:00:00 2001 From: Cai Luoshan Date: Mon, 19 Aug 2024 11:30:39 +0800 Subject: [PATCH 08/12] RVA23 CMO: fix bug for clean & flush, Release when dirty data probeack from L1 before acq_clean --- src/main/scala/coupledL2/tl2chi/MSHR.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index d0210f16..1e171014 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -834,6 +834,10 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { when (isToN(c_resp.bits.param)) { probeGotN := true.B } + when ((req_cmoClean || req_cmoFlush) && c_resp.bits.opcode === ProbeAckData) { + state.s_release := false.B + state.w_releaseack := false.B + } } val rxdatIsU = rxdat.bits.resp.get === UC From 81bfba31f1d16473ea0605f93d8a838c1e395934 Mon Sep 17 00:00:00 2001 From: Cai Luoshan Date: Mon, 19 Aug 2024 17:01:32 +0800 Subject: [PATCH 09/12] RVA23 CMO: fix bug for clean & flush, invalid line by MSHR --- src/main/scala/coupledL2/tl2chi/MSHR.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 1e171014..63c771e9 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -399,7 +399,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { mp_release.fromL2pft.foreach(_ := false.B) mp_release.needHint.foreach(_ := false.B) mp_release.dirty := false.B//meta.dirty && meta.state =/= INVALID || probeDirty - mp_release.metaWen := false.B + mp_release.metaWen := (req_cmoClean || req_cmoFlush) // when clean/flush, invalid line by mshr(when replace, invalid by directory) mp_release.meta := MetaEntry() mp_release.tagWen := false.B mp_release.dsWen := true.B // write refillData to DS From 1c1df0afa013cb54cbade90587ca00d685f7ae38 Mon Sep 17 00:00:00 2001 From: Cai Luoshan Date: Wed, 21 Aug 2024 17:27:46 +0800 Subject: [PATCH 10/12] RVA23_CMO: close CMO for default test --- src/main/scala/coupledL2/Common.scala | 1 - src/main/scala/coupledL2/CoupledL2.scala | 3 ++- src/main/scala/coupledL2/RequestArb.scala | 10 +++++---- src/main/scala/coupledL2/SourceB.scala | 2 +- src/main/scala/coupledL2/tl2chi/MSHR.scala | 1 - src/main/scala/coupledL2/tl2chi/Slice.scala | 2 +- src/main/scala/coupledL2/tl2tl/MainPipe.scala | 1 + src/main/scala/coupledL2/tl2tl/SinkB.scala | 1 + src/main/scala/coupledL2/tl2tl/Slice.scala | 3 +++ src/test/scala/chi/TestTop.scala | 21 ------------------- 10 files changed, 15 insertions(+), 30 deletions(-) diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index d3a6eb00..6d74549e 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -287,7 +287,6 @@ class SourceBReq(implicit p: Parameters) extends L2Bundle { val opcode = UInt(3.W) val param = UInt(bdWidth.W) val alias = aliasBitsOpt.map(_ => UInt(aliasBitsOpt.get.W)) - val needData = UInt(1.W) } class BlockInfo(implicit p: Parameters) extends L2Bundle { diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index 24572247..cbc27150 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -77,7 +77,7 @@ trait HasCoupledL2Parameters { def hasTPPrefetcher = prefetchers.exists(_.isInstanceOf[TPParameters]) def hasPrefetchBit = prefetchers.exists(_.hasPrefetchBit) // !! TODO.test this def hasPrefetchSrc = prefetchers.exists(_.hasPrefetchSrc) - def hasRVA23CMO = true + def hasRVA23CMO = false def topDownOpt = if(cacheParams.elaboratedTopDown) Some(true) else None def enableHintGuidedGrant = true @@ -451,6 +451,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has slice.io.cmoReq.valid := false.B slice.io.cmoReq.bits.opcode := 0.U slice.io.cmoReq.bits.address := 0.U + slice.io.cmoResp.ready := false.B } slice diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 31cec466..236f4bbc 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -41,7 +41,7 @@ class RequestArb(implicit p: Parameters) extends L2Module val sinkB = Flipped(DecoupledIO(new TaskBundle)) val sinkC = Flipped(DecoupledIO(new TaskBundle)) val mshrTask = Flipped(DecoupledIO(new TaskBundle)) - val cmoTask = Flipped(DecoupledIO(new TaskBundle)) + val cmoTask = if (hasRVA23CMO) Some(Flipped(DecoupledIO(new TaskBundle))) else None /* read/write directory */ val dirRead_s1 = DecoupledIO(new DirRead()) // To directory, read meta/tag @@ -94,9 +94,11 @@ class RequestArb(implicit p: Parameters) extends L2Module val mshr_task_s1 = RegInit(0.U.asTypeOf(Valid(new TaskBundle()))) val cmo_task_s1 = Wire(Valid(new TaskBundle())) - cmo_task_s1.valid := io.dirRead_s1.ready && io.cmoTask.valid && resetFinish - cmo_task_s1.bits := io.cmoTask.bits - io.cmoTask.ready := io.dirRead_s1.ready && resetFinish && s2_ready + cmo_task_s1.valid := (if (io.cmoTask.isDefined) io.dirRead_s1.ready && io.cmoTask.get.valid && resetFinish else false.B) + cmo_task_s1.bits := (if (io.cmoTask.isDefined) io.cmoTask.get.bits else 0.U.asTypeOf(new TaskBundle)) + if (io.cmoTask.isDefined) { + io.cmoTask.get.ready := io.dirRead_s1.ready && resetFinish && s2_ready + } val s1_needs_replRead = mshr_task_s1.valid && mshr_task_s1.bits.fromA && mshr_task_s1.bits.replTask && ( mshr_task_s1.bits.opcode(2, 1) === Grant(2, 1) || diff --git a/src/main/scala/coupledL2/SourceB.scala b/src/main/scala/coupledL2/SourceB.scala index a1852747..5fb75629 100644 --- a/src/main/scala/coupledL2/SourceB.scala +++ b/src/main/scala/coupledL2/SourceB.scala @@ -59,7 +59,7 @@ class SourceB(implicit p: Parameters) extends L2Module { b.source := dcacheSourceIdStart b.address := Cat(task.tag, task.set, 0.U(offsetBits.W)) b.mask := Fill(beatBytes, 1.U(1.W)) - b.data := Cat(task.alias.getOrElse(0.U), task.needData) //Cat(task.alias.getOrElse(0.U), 0.U(1.W)) // this is the same as HuanCun + b.data := Cat(task.alias.getOrElse(0.U), 0.U(1.W)) // this is the same as HuanCun b.corrupt := false.B b } diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 63c771e9..9e2069b3 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -365,7 +365,6 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { ) ) ob.alias.foreach(_ := meta.alias.getOrElse(0.U)) - ob.needData := Mux(req_cmoInval, 1.U, 0.U) // probe L1 toN and donot writeback dirty data ob } diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index 37e8e620..b50d5b83 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -96,7 +96,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] reqArb.io.sinkB <> rxsnp.io.task reqArb.io.sinkC <> sinkC.io.task reqArb.io.mshrTask <> mshrCtl.io.mshrTask - reqArb.io.cmoTask <> sinkCMO.io.task + reqArb.io.cmoTask.foreach(_ := sinkCMO.io.task) reqArb.io.fromMSHRCtl := mshrCtl.io.toReqArb reqArb.io.fromMainPipe := mainPipe.io.toReqArb reqArb.io.fromGrantBuffer := grantBuf.io.toReqArb diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index b7e410e1..51f15412 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -230,6 +230,7 @@ class MainPipe(implicit p: Parameters) extends L2Module { ms_task.snpHitRelease := false.B ms_task.snpHitReleaseWithData := false.B ms_task.snpHitReleaseIdx := 0.U + ms_task.cmoTask := false.B /* ======== Resps to SinkA/B/C Reqs ======== */ val sink_resp_s3 = WireInit(0.U.asTypeOf(Valid(new TaskBundle))) // resp for sinkA/B/C request that does not need to alloc mshr diff --git a/src/main/scala/coupledL2/tl2tl/SinkB.scala b/src/main/scala/coupledL2/tl2tl/SinkB.scala index d97b88f0..bea2a3cc 100644 --- a/src/main/scala/coupledL2/tl2tl/SinkB.scala +++ b/src/main/scala/coupledL2/tl2tl/SinkB.scala @@ -70,6 +70,7 @@ class SinkB(implicit p: Parameters) extends L2Module { task.snpHitRelease := false.B task.snpHitReleaseWithData := false.B task.snpHitReleaseIdx := 0.U + task.cmoTask := false.B task } val task = fromTLBtoTaskBundle(io.b.bits) diff --git a/src/main/scala/coupledL2/tl2tl/Slice.scala b/src/main/scala/coupledL2/tl2tl/Slice.scala index b36fd4c2..f88c5753 100644 --- a/src/main/scala/coupledL2/tl2tl/Slice.scala +++ b/src/main/scala/coupledL2/tl2tl/Slice.scala @@ -159,6 +159,9 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] { sinkC.io.c <> inBuf.c(io.in.c) io.in.d <> inBuf.d(grantBuf.io.d) grantBuf.io.e <> inBuf.e(io.in.e) + io.cmoReq.ready := false.B + io.cmoResp.valid := false.B + io.cmoResp.bits := 0.U.asTypeOf(new RVA23CMOResp) /* connect downward channels */ io.out.a <> outBuf.a(mshrCtl.io.sourceA) diff --git a/src/test/scala/chi/TestTop.scala b/src/test/scala/chi/TestTop.scala index 68267ab7..401ac37e 100644 --- a/src/test/scala/chi/TestTop.scala +++ b/src/test/scala/chi/TestTop.scala @@ -117,19 +117,6 @@ class TestTop_CHIL2(numCores: Int = 1, numULAgents: Int = 0, banks: Int = 1, iss l2.mmioBridge.mmioNode := mmioClientNode } - val l1_cmo_sender_nodes = (0 until numCores).map(_ => BundleBridgeSource(() => DecoupledIO(new RVA23CMOReq))) - val l1_cmo_recver_nodes = (0 until numCores).map(_ => BundleBridgeSink(Some(() => DecoupledIO(new RVA23CMOResp)))) - l1_cmo_sender_nodes.zip(l2_nodes).zipWithIndex.foreach { case ((cmo_sender, l2), i) => - l2.cmo_sink_node.foreach { sink => - sink := cmo_sender - } - } - l1_cmo_recver_nodes.zip(l2_nodes).zipWithIndex.foreach { case ((cmo_recver, l2), i) => - l2.cmo_source_node.foreach { source => - cmo_recver := source - } - } - lazy val module = new LazyModuleImp(this){ val timer = WireDefault(0.U(64.W)) val logEnable = WireDefault(false.B) @@ -152,14 +139,6 @@ class TestTop_CHIL2(numCores: Int = 1, numULAgents: Int = 0, banks: Int = 1, iss } } } - l1_cmo_sender_nodes.zipWithIndex.foreach{ - case (node, i) => - node.makeIOs()(ValName(s"cmo_sender_port_$i")) - } - l1_cmo_recver_nodes.zipWithIndex.foreach{ - case (node, i) => - node.makeIOs()(ValName(s"cmo_recver_port_$i")) - } val io = IO(Vec(numCores, new Bundle() { val chi = new PortIO From a405e21aef7c71e1a3b2a70c8ca71907466abca5 Mon Sep 17 00:00:00 2001 From: Cai Luoshan Date: Wed, 21 Aug 2024 19:07:57 +0800 Subject: [PATCH 11/12] RVA23_CMO: fix compile bug for CHI version close cmo --- src/main/scala/coupledL2/tl2chi/Slice.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index b50d5b83..f3bce9f7 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -97,6 +97,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] reqArb.io.sinkC <> sinkC.io.task reqArb.io.mshrTask <> mshrCtl.io.mshrTask reqArb.io.cmoTask.foreach(_ := sinkCMO.io.task) + if (!hasRVA23CMO) { sinkCMO.io.task.ready := false.B } reqArb.io.fromMSHRCtl := mshrCtl.io.toReqArb reqArb.io.fromMainPipe := mainPipe.io.toReqArb reqArb.io.fromGrantBuffer := grantBuf.io.toReqArb From a00e16118003a1e3fef0bc036d0ac42f6d81a48b Mon Sep 17 00:00:00 2001 From: Cai Luoshan Date: Thu, 22 Aug 2024 11:38:41 +0800 Subject: [PATCH 12/12] Param: fix hasRVA23CMO config --- src/main/scala/coupledL2/CoupledL2.scala | 2 +- src/main/scala/coupledL2/L2Param.scala | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index cbc27150..c8eace6b 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -77,7 +77,7 @@ trait HasCoupledL2Parameters { def hasTPPrefetcher = prefetchers.exists(_.isInstanceOf[TPParameters]) def hasPrefetchBit = prefetchers.exists(_.hasPrefetchBit) // !! TODO.test this def hasPrefetchSrc = prefetchers.exists(_.hasPrefetchSrc) - def hasRVA23CMO = false + def hasRVA23CMO = cacheParams.hasRVA23CMO def topDownOpt = if(cacheParams.elaboratedTopDown) Some(true) else None def enableHintGuidedGrant = true diff --git a/src/main/scala/coupledL2/L2Param.scala b/src/main/scala/coupledL2/L2Param.scala index 1cfcd13c..ecb7f3a3 100644 --- a/src/main/scala/coupledL2/L2Param.scala +++ b/src/main/scala/coupledL2/L2Param.scala @@ -104,6 +104,8 @@ case class L2Param( elaboratedTopDown: Boolean = true, // env FPGAPlatform: Boolean = false, + // CMO + hasRVA23CMO: Boolean = false, // Network layer SAM sam: Seq[(AddressSet, Int)] = Seq(AddressSet.everything -> 0)