Skip to content

Commit

Permalink
Merge pull request #105 from StanfordVLSI/fpga_experiment
Browse files Browse the repository at this point in the history
Emulation updates
  • Loading branch information
sgherbst authored Jun 29, 2020
2 parents c932e74 + c170c95 commit d07ec12
Show file tree
Hide file tree
Showing 20 changed files with 793 additions and 113 deletions.
4 changes: 4 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ steps:
python3.7 -m venv venv
source venv/bin/activate
# scale down the size of the FFE and MLSD to fit on the regression FPGA
sed -i 's/\&ffe_length [[:digit:]]\+/\&ffe_length 4/g' config/system.yml
sed -i 's/\&estimate_depth [[:digit:]]\+/\&estimate_depth 4/g' config/system.yml
# run regression script
source regress.sh
Expand Down
14 changes: 14 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# configuration related to pull request comments
comment: no # do not comment PR with the result

coverage:
range: 50..90 # coverage lower than 50 is red, higher than 90 green, between color code

status:
project: # settings affecting project coverage
default:
target: auto # auto % coverage target
threshold: 5% # allow for 5% reduction of coverage without failing

# do not run coverage on patch nor changes
patch: false
67 changes: 0 additions & 67 deletions config/fpga/system_fpga.yml

This file was deleted.

26 changes: 25 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ def pytest_addoption(parser):
'--ser_port', default='/dev/ttyUSB2', type=str, help='USB serial path.'
)

parser.addoption(
'--ffe_length', default=4, type=int, help='Number of FFE coefficients per channel.'
)

parser.addoption(
'--emu_clk_freq', default=5.0e6, type=float, help='Frequency of emulator clock (Hz)'
)

parser.addoption(
'--prbs_test_dur', default=10.0, type=float, help='Length of time of the PRBS emulation test.'
)

@pytest.fixture
def dump_waveforms(request):
return request.config.getoption('--dump_waveforms')
Expand All @@ -23,4 +35,16 @@ def board_name(request):

@pytest.fixture
def ser_port(request):
return request.config.getoption('--ser_port')
return request.config.getoption('--ser_port')

@pytest.fixture
def ffe_length(request):
return request.config.getoption('--ffe_length')

@pytest.fixture
def emu_clk_freq(request):
return request.config.getoption('--emu_clk_freq')

@pytest.fixture
def prbs_test_dur(request):
return request.config.getoption('--prbs_test_dur')
2 changes: 1 addition & 1 deletion dragonphy/fpga_models/chan_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self, filename=None, **system_values):
view = system_values['view']

# read in the channel data
chan = Filter.from_file(get_file('build/fpga_models/adapt_fir/chan.npy'))
chan = Filter.from_file(get_file('build/chip_src/adapt_fir/chan.npy'))

# create a function
domain = [chan.t_vec[0], chan.t_vec[-1]]
Expand Down
15 changes: 11 additions & 4 deletions dragonphy/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ def remove_dup(seq):
def find_preferred_impl(cell_name, view_order, override):
# if there is a specific view desired for this cell, use it instead of the view order
if cell_name in override:
view_order = [override[cell_name]]
if isinstance(override[cell_name], Path):
return override[cell_name]
else:
view_order = [override[cell_name]]

# walk through the view names in order, checking to see if there are any matches in each
for view_name in view_order:
Expand Down Expand Up @@ -175,9 +178,12 @@ def get_deps_asic(cell_name=None, impl_file=None, process='tsmc16'):
# Return the dependencies
return deps

def get_deps_cpu_sim(cell_name=None, impl_file=None):
deps = []
def get_deps_cpu_sim(cell_name=None, impl_file=None, override=None):
# set defaults
if override is None:
override = {}

deps = []
deps += get_deps(
cell_name=cell_name,
impl_file=impl_file,
Expand All @@ -190,7 +196,8 @@ def get_deps_cpu_sim(cell_name=None, impl_file=None):
'DAVE_TIMEUNIT': '1fs',
'NCVLOG': None,
'SIMULATION': None # for MDLL simulation
}
},
override=override
)

return deps
Expand Down
14 changes: 14 additions & 0 deletions experiments/cpu_emu_comparison/V2T_clock_gen_S2D.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// simple model used for performance comparison with emulation

`timescale 1s/1fs

module V2T_clock_gen_S2D (
input wire logic in, // input signal
output wire logic out, // delayed output signal (+)
output reg outb // delayed output signal (-)
);

assign out = in;
assign outb = ~in;

endmodule
25 changes: 25 additions & 0 deletions experiments/cpu_emu_comparison/experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pathlib import Path
from dragonphy import *

THIS_DIR = Path(__file__).parent.resolve()
BUILD_DIR = THIS_DIR / 'build'

deps = get_deps_cpu_sim(
impl_file=THIS_DIR / 'test.sv',
override={
'snh': THIS_DIR / 'snh.sv',
'V2T_clock_gen_S2D': THIS_DIR / 'V2T_clock_gen_S2D.sv',
'stochastic_adc_PR': THIS_DIR / 'stochastic_adc_PR.sv',
'phase_interpolator': THIS_DIR / 'phase_interpolator.sv',
'input_divider': THIS_DIR / 'input_divider.sv',
'output_buffer': THIS_DIR / 'output_buffer.sv',
'mdll_r1_top': 'chip_stubs'
}
)
print(deps)

DragonTester(
ext_srcs=deps,
directory=BUILD_DIR,
dump_waveforms=False
).run()
26 changes: 26 additions & 0 deletions experiments/cpu_emu_comparison/input_divider.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// simple model used for performance comparison with emulation

`timescale 1s/1fs

module input_divider (
input wire logic in,
input wire logic in_mdll,
input wire logic sel_clk_source,
input wire logic en,
input wire logic en_meas,
input wire logic [2:0] ndiv,
input wire logic bypass_div,
input wire logic bypass_div2,
output wire logic out,
output wire logic out_meas
);
logic div_state = 1'b0;
always @(posedge in) begin
div_state <= ~div_state;
end

assign out = div_state;

// out_meas is unused
assign out_meas = 1'b0;
endmodule
24 changes: 24 additions & 0 deletions experiments/cpu_emu_comparison/output_buffer.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// simple model used for performance comparison with emulation

module output_buffer (
input [15:0] bufferend_signals,
input [3:0] sel_outbuff,
input [3:0] sel_trigbuff,
input en_outbuff,
input en_trigbuff,
input bypass_out_div,
input bypass_trig_div,
input [2:0] Ndiv_outbuff,
input [2:0] Ndiv_trigbuff,
output clock_out_p,
output clock_out_n,
output trigg_out_p,
output trigg_out_n
);

assign clock_out_p = 1'b0;
assign clock_out_n = 1'b0;
assign trigg_out_p = 1'b0;
assign trigg_out_n = 1'b0;

endmodule
67 changes: 67 additions & 0 deletions experiments/cpu_emu_comparison/phase_interpolator.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// simple model used for performance comparison with emulation

`timescale 1s/1fs

`include "iotype.sv"

module phase_interpolator #(
parameter Nbit = 9,
parameter Nctl_dcdl = 2,
parameter Nunit = 32,
parameter Nblender = 4
)(
input rstb,
input clk_in,
input clk_async,
input clk_encoder,
input disable_state,
input en_arb,
input en_cal,
input en_clk_sw,
input en_delay,
input en_ext_Qperi,
input en_gf,
input ctl_valid,
input [Nbit-1:0] ctl,
input [Nctl_dcdl-1:0] ctl_dcdl_sw,
input [Nctl_dcdl-1:0] ctl_dcdl_slice,
input [Nctl_dcdl-1:0] ctl_dcdl_clk_encoder,
input [Nunit-1:0] inc_del,
input [$clog2(Nunit)-1:0] ext_Qperi,
input [1:0] sel_pm_sign,
input en_pm,

output cal_out,
output reg clk_out_slice=1'b0,
output clk_out_sw,
output del_out,

output [$clog2(Nunit)-1:0] Qperi,
output [$clog2(Nunit)-1:0] max_sel_mux,
output cal_out_dmm,
output [19:0] pm_out
);

// delay clk_in to clk_out_slice

real delay_s;
always @(clk_in) begin
// compute the delay
delay_s = ((1.0*ctl)/(2.0**(Nbit)))*(250.0e-12);

// apply the delay
clk_out_slice <= #(delay_s*1s) clk_in;
end

// outputs that are not modeled

assign cal_out = 0;
assign clk_out_sw = 0;
assign del_out = 0;
assign Qperi = 0;
assign max_sel_mux = 0;
assign cal_out_dmm = 0;
assign pm_out = 0;

endmodule

24 changes: 24 additions & 0 deletions experiments/cpu_emu_comparison/results.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Jun 22, 2020
* simulation with 4x channels:
* PRBS test took 27.614053 seconds.
* Total bits: 600000
* Throughput: 21.7 kb/s

Jun 24, 2020
* Emulation with 16x channels on ZC706:
* PRBS test took 30.073444843292236 seconds.
* Total bits: 150258080
* 4.996 Mb/s
* Slice LUTs: 58678 / 218600
* Slice Registers: 24928 / 437200
* Slice: 19436 / 54650
* DSP: 299 / 900
* BRAM: 42.5 / 545
* Build time: 30m 35.161s with Vivado 2020.1 on Intel(R) Core(TM) i5-2320 CPU @ 3.00GHz, Ubuntu 18.04.2 LTS, 6 GB RAM
* use `cat /proc/cpuinfo`, `cat /proc/meminfo`, `lsb_release -a`
* Simulation with 16x channels:
* PRBS test took 42.509094 seconds.
* Total_bits: 608192
* Throughput: 14.3 kb/s
* r7cad-generic processor, CentOS Linux release 7.7.1908 (Core), 128 GB RAM
* /proc/cpuinfo did not display the real CPU information since r7cad-generic is a VM
24 changes: 24 additions & 0 deletions experiments/cpu_emu_comparison/snh.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// simple model used for performance comparison with emulation

`timescale 1s/1fs

`include "iotype.sv"

module snh import const_pack::Nout; (
input wire logic [Nout-1:0] clk, // sampling clocks of the first s&h sw group
input wire logic [Nout-1:0] clkb, // ~clkb
input `pwl_t in_p, // + signal input
input `pwl_t in_n, // - signal input
output `pwl_t out_p [Nout-1:0], // sampled (+) outputs
output `pwl_t out_n [Nout-1:0] // sampled (-) outputs
);

genvar i;
generate
for (i=0; i<Nout; i=i+1) begin
assign out_p[i] = in_p;
assign out_n[i] = in_n;
end
endgenerate

endmodule
Loading

0 comments on commit d07ec12

Please sign in to comment.