Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support temporal prefetcher #50

Merged
merged 11 commits into from
Aug 30, 2023
1 change: 1 addition & 0 deletions src/main/scala/coupledL2/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ class NestedWriteback(implicit p: Parameters) extends L2Bundle {

class PrefetchRecv extends Bundle {
val addr = UInt(64.W)
val pf_source = UInt(MemReqSource.reqSourceBits.W)
val addr_valid = Bool()
val l2_pf_en = Bool()
}
Expand Down
4 changes: 3 additions & 1 deletion src/main/scala/coupledL2/CoupledL2.scala
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ trait HasCoupledL2Parameters {
// Prefetch
val prefetchOpt = cacheParams.prefetch
val hasPrefetchBit = prefetchOpt.nonEmpty && prefetchOpt.get.hasPrefetchBit
val hasPrefetchSrc = prefetchOpt.nonEmpty && prefetchOpt.get.hasPrefetchSrc
val topDownOpt = if(cacheParams.elaboratedTopDown) Some(true) else None

val useFIFOGrantBuffer = true
Expand Down Expand Up @@ -268,7 +269,8 @@ class CoupledL2(implicit p: Parameters) extends LazyModule with HasCoupledL2Para
pf_recv_node match {
case Some(x) =>
prefetcher.get.io.recv_addr.valid := x.in.head._1.addr_valid
prefetcher.get.io.recv_addr.bits := x.in.head._1.addr
prefetcher.get.io.recv_addr.bits.addr := x.in.head._1.addr
prefetcher.get.io.recv_addr.bits.pfSource := x.in.head._1.pf_source
prefetcher.get.io_l2_pf_en := x.in.head._1.l2_pf_en
case None =>
prefetcher.foreach(_.io.recv_addr := 0.U.asTypeOf(ValidIO(UInt(64.W))))
Expand Down
8 changes: 6 additions & 2 deletions src/main/scala/coupledL2/Directory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import freechips.rocketchip.util.SetAssocLRU
import coupledL2.utils._
import utility.{ParallelPriorityMux, RegNextN}
import chipsalliance.rocketchip.config.Parameters
import coupledL2.prefetch.PfSource
import freechips.rocketchip.tilelink.TLMessages._

class MetaEntry(implicit p: Parameters) extends L2Bundle {
Expand All @@ -32,6 +33,7 @@ class MetaEntry(implicit p: Parameters) extends L2Bundle {
// TODO: record specific state of clients instead of just 1-bit
val alias = aliasBitsOpt.map(width => UInt(width.W)) // alias bits of client
val prefetch = if (hasPrefetchBit) Some(Bool()) else None // whether block is prefetched
val prefetchSrc = if (hasPrefetchSrc) Some(UInt(PfSource.pfSourceBits.W)) else None // prefetch source
val accessed = Bool()

def =/=(entry: MetaEntry): Bool = {
Expand All @@ -44,14 +46,16 @@ object MetaEntry {
val init = WireInit(0.U.asTypeOf(new MetaEntry))
init
}
def apply(dirty: Bool, state: UInt, clients: UInt, alias: Option[UInt],
prefetch: Bool = false.B, accessed: Bool = false.B)(implicit p: Parameters) = {
def apply(dirty: Bool, state: UInt, clients: UInt, alias: Option[UInt], prefetch: Bool = false.B,
pfsrc: UInt = PfSource.NoWhere.id.U, accessed: Bool = false.B
)(implicit p: Parameters) = {
val entry = Wire(new MetaEntry)
entry.dirty := dirty
entry.state := state
entry.clients := clients
entry.alias.foreach(_ := alias.getOrElse(0.U))
entry.prefetch.foreach(_ := prefetch)
entry.prefetchSrc.foreach(_ := pfsrc)
entry.accessed := accessed
entry
}
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/coupledL2/MSHR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import freechips.rocketchip.tilelink._
import freechips.rocketchip.tilelink.TLMessages._
import freechips.rocketchip.tilelink.TLPermissions._
import chipsalliance.rocketchip.config.Parameters
import coupledL2.prefetch.PrefetchTrain
import coupledL2.prefetch.{PfSource, PrefetchTrain}
import coupledL2.utils.XSPerfAccumulate

class MSHRTasks(implicit p: Parameters) extends L2Bundle {
Expand Down Expand Up @@ -375,6 +375,7 @@ class MSHR(implicit p: Parameters) extends L2Module {
),
alias = Some(aliasFinal),
prefetch = req_prefetch || dirResult.hit && meta_pft,
pfsrc = PfSource.fromMemReqSource(req.reqSource),
accessed = req_acquire || req_get
)
mp_grant.metaWen := true.B
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/coupledL2/SinkA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class SinkA(implicit p: Parameters) extends L2Module {
task.tagWen := false.B
task.dsWen := false.B
task.wayMask := 0.U(cacheParams.ways.W)
task.reqSource := MemReqSource.L2Prefetch.id.U
task.reqSource := req.pfSource
task.replTask := false.B
task.vaddr.foreach(_ := 0.U)
task
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/coupledL2/Slice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class Slice()(implicit p: Parameters) extends L2Module {
p.train <> mainPipe.io.prefetchTrain.get
sinkA.io.prefetchReq.get <> p.req
p.resp <> grantBuf.io.prefetchResp.get
p.recv_addr := 0.U.asTypeOf(ValidIO(UInt(64.W)))
p.recv_addr := 0.U.asTypeOf(p.recv_addr)
}

/* input & output signals */
Expand Down
74 changes: 66 additions & 8 deletions src/main/scala/coupledL2/TopDownMonitor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ package coupledL2
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import coupledL2.utils.{XSPerfAccumulate, XSPerfHistogram, XSPerfRolling}
import coupledL2.prefetch.PfSource
import coupledL2.utils._
import utility.MemReqSource

class TopDownMonitor()(implicit p: Parameters) extends L2Module {
Expand Down Expand Up @@ -96,6 +97,11 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
}
}

def reqFromCPU(r: DirResult): Bool = {
r.replacerInfo.reqSource === MemReqSource.CPULoadData.id.U ||
r.replacerInfo.reqSource === MemReqSource.CPUStoreData.id.U
}

for (i <- 0 until MemReqSource.ReqSourceCount.id) {
val sourceMatchVec = dirResultMatchVec(r => r.replacerInfo.reqSource === i.U)
val sourceMatchVecMiss = dirResultMatchVec(r => r.replacerInfo.reqSource === i.U && !r.hit)
Expand All @@ -110,17 +116,39 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
*/
// prefetch accuracy calculation
val l2prefetchSent = dirResultMatchVec(
r => (r.replacerInfo.reqSource === MemReqSource.L2Prefetch.id.U) && !r.hit
r => !r.hit &&
(r.replacerInfo.reqSource === MemReqSource.Prefetch2L2BOP.id.U ||
r.replacerInfo.reqSource === MemReqSource.Prefetch2L2SMS.id.U ||
r.replacerInfo.reqSource === MemReqSource.Prefetch2L2TP.id.U)
)
val l2prefetchSentBOP = dirResultMatchVec(
r => !r.hit && r.replacerInfo.reqSource === MemReqSource.Prefetch2L2BOP.id.U
)
val l2prefetchSentSMS = dirResultMatchVec(
r => !r.hit && r.replacerInfo.reqSource === MemReqSource.Prefetch2L2SMS.id.U
)
val l2prefetchSentTP = dirResultMatchVec(
r => !r.hit && r.replacerInfo.reqSource === MemReqSource.Prefetch2L2TP.id.U
)

val l2prefetchUseful = dirResultMatchVec(
r => (r.replacerInfo.reqSource === MemReqSource.CPULoadData.id.U
|| r.replacerInfo.reqSource === MemReqSource.CPUStoreData.id.U) &&
r.hit &&
r.meta.prefetch.getOrElse(false.B)
r => reqFromCPU(r) && r.hit && r.meta.prefetch.getOrElse(false.B)
)
val l2prefetchUsefulBOP = dirResultMatchVec(
r => reqFromCPU(r) && r.hit &&
r.meta.prefetch.getOrElse(false.B) && r.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) === PfSource.BOP.id.U
)
val l2prefetchUsefulSMS = dirResultMatchVec(
r => reqFromCPU(r) && r.hit &&
r.meta.prefetch.getOrElse(false.B) && r.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) === PfSource.SMS.id.U
)
val l2prefetchUsefulTP = dirResultMatchVec(
r => reqFromCPU(r) && r.hit &&
r.meta.prefetch.getOrElse(false.B) && r.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) === PfSource.TP.id.U
)

val l2demandRequest = dirResultMatchVec(
r => (r.replacerInfo.reqSource === MemReqSource.CPULoadData.id.U
|| r.replacerInfo.reqSource === MemReqSource.CPUStoreData.id.U)
r => reqFromCPU(r)
)
val l2prefetchLate = io.latePF

Expand All @@ -129,6 +157,21 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
PopCount(l2prefetchUseful), PopCount(l2prefetchSent),
1000, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyBOP",
PopCount(l2prefetchUsefulBOP), PopCount(l2prefetchSentBOP),
1000, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracySMS",
PopCount(l2prefetchUsefulSMS), PopCount(l2prefetchSentSMS),
1000, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyTP",
PopCount(l2prefetchUsefulTP), PopCount(l2prefetchSentTP),
1000, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchLate",
PopCount(l2prefetchLate), PopCount(l2prefetchUseful),
Expand All @@ -139,4 +182,19 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
PopCount(l2prefetchUseful), PopCount(l2demandRequest),
1000, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageBOP",
PopCount(l2prefetchUsefulBOP), PopCount(l2demandRequest),
1000, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageSMS",
PopCount(l2prefetchUsefulSMS), PopCount(l2demandRequest),
1000, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageTP",
PopCount(l2prefetchUsefulTP), PopCount(l2demandRequest),
1000, clock, reset
)
}
5 changes: 3 additions & 2 deletions src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package coupledL2.prefetch

import utility.SRAMTemplate
import utility.{MemReqSource, SRAMTemplate}
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
Expand All @@ -43,6 +43,7 @@ case class BOPParameters(
))
extends PrefetchParameters {
override val hasPrefetchBit: Boolean = true
override val hasPrefetchSrc: Boolean = true
override val inflightEntries: Int = 16
}

Expand Down Expand Up @@ -290,7 +291,7 @@ class BestOffsetPrefetch(implicit p: Parameters) extends BOPModule {

io.req.valid := req_valid
io.req.bits := req
io.req.bits.isBOP := true.B
io.req.bits.pfSource := MemReqSource.Prefetch2L2BOP.id.U
io.train.ready := scoreTable.io.req.ready && (!req_valid || io.req.ready)
io.resp.ready := rrTable.io.w.ready

Expand Down
22 changes: 22 additions & 0 deletions src/main/scala/coupledL2/prefetch/PrefetchParameters.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import coupledL2._
import utility.MemReqSource

trait PrefetchParameters {
val hasPrefetchBit: Boolean
val hasPrefetchSrc: Boolean
val inflightEntries: Int // max num of inflight prefetch reqs
}

Expand All @@ -33,3 +35,23 @@ trait HasPrefetchParameters extends HasCoupledL2Parameters {

abstract class PrefetchBundle(implicit val p: Parameters) extends Bundle with HasPrefetchParameters
abstract class PrefetchModule(implicit val p: Parameters) extends Module with HasPrefetchParameters

object PfSource extends Enumeration {
val NoWhere = Value("NoWhere")
val SMS = Value("SMS")
val BOP = Value("BOP")
val TP = Value("TP")

val PfSourceCount = Value("PfSourceCount")
val pfSourceBits = log2Ceil(PfSourceCount.id)

def fromMemReqSource(s: UInt): UInt = {
val pfsrc = WireInit(NoWhere.id.U.asTypeOf(UInt(pfSourceBits.W)))
switch(s) {
is (MemReqSource.Prefetch2L2BOP.id.U) { pfsrc := SMS.id.U }
is (MemReqSource.Prefetch2L2SMS.id.U) { pfsrc := BOP.id.U }
is (MemReqSource.Prefetch2L2TP.id.U) { pfsrc := TP.id.U }
}
pfsrc
}
}
11 changes: 6 additions & 5 deletions src/main/scala/coupledL2/prefetch/PrefetchReceiver.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink._
import coupledL2._
import utility.Pipeline
import utility.{MemReqSource, Pipeline}

// TODO: PrefetchReceiver is temporarily used since L1&L2 do not support Hint.
// TODO: Delete this after Hint is accomplished.

case class PrefetchReceiverParams(n: Int = 32) extends PrefetchParameters {
override val hasPrefetchBit: Boolean = true
override val hasPrefetchBit: Boolean = true
override val hasPrefetchSrc: Boolean = true
override val inflightEntries: Int = n
}

Expand All @@ -38,11 +39,11 @@ class PrefetchReceiver()(implicit p: Parameters) extends PrefetchModule {
io.train.ready := true.B
io.resp.ready := true.B

io.req.bits.tag := parseFullAddress(io.recv_addr.bits)._1
io.req.bits.set := parseFullAddress(io.recv_addr.bits)._2
io.req.bits.tag := parseFullAddress(io.recv_addr.bits.addr)._1
io.req.bits.set := parseFullAddress(io.recv_addr.bits.addr)._2
io.req.bits.needT := false.B
io.req.bits.isBOP := false.B
io.req.bits.source := 0.U // TODO: ensure source 0 is dcache
io.req.bits.pfSource := io.recv_addr.bits.pfSource
io.req.valid := io.recv_addr.valid

}
Loading
Loading