-
Notifications
You must be signed in to change notification settings - Fork 2
/
designtemplate.py
156 lines (137 loc) · 5.67 KB
/
designtemplate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from migen.fhdl.std import *
from migen.genlib.fsm import FSM, NextState, NextValue
from migen.fhdl import verilog
import riffa, replacementpolicies
from virtmem import VirtmemWrapper
class DesignTemplate(VirtmemWrapper):
"""Template design for implementing a hardware function that accesses virtual memory via the Virtmem interface.
This template follows the common pattern of a function that:
1. receives an argument struct via a RIFFA channel
2. executes a loop that reads from and writes to virtual memory
3. returns a result struct (and implied notification that the function has finished executing) via the same RIFFA channel
"""
def __init__(self, combined_interface_rx, combined_interface_tx, c_pci_data_width=32, wordsize=32, ptrsize=64, drive_clocks=True):
# init the Virtual memory module superclass with the same data sizes
# drive_clocks: simulation does not support multiple clock regions
VirtmemWrapper.__init__(self, combined_interface_rx=combined_interface_rx, combined_interface_tx=combined_interface_tx, c_pci_data_width=c_pci_data_width, wordsize=wordsize, ptrsize=ptrsize, drive_clocks=drive_clocks)
###
# get a channel for communication of base pointers etc.
# rx/tx variables
rx, tx = self.get_channel(2)
arg_struct_size = ?? # must be multiple of 32 (pad on SW side if necessary)
arg_struct = Signal(arg_struct_size)
res_struct_size = ?? # must be multiple of 32 (pad on SW side if necessary)
res_struct = Signal(res_struct_size)
# virtmem access variables
##TODO: give values to these variables
read_adr = Signal(ptrsize)
read_data = Signal(wordsize)
write_adr = Signal(ptrsize)
write_data = Signal(wordsize)
# function variables
done = Signal() # loop condition
fsm = FSM()
self.submodules += fsm
fsm.act("IDLE", # wait for instruction to start calculating
If(rx.start,
NextState("RECEIVE0")
)
)
# receive function arg struct
for n in range(max(1, arg_struct_size//c_pci_data_width)):
begin = [] if n==0 else arg_struct[:n*c_pci_data_width]
end = [] if n==arg_struct_size//c_pci_data_width-1 else arg_struct[(n+1)*c_pci_data_width:]
fsm.act("RECEIVE" + str(n),
rx.ack.eq(1),
If(rx.data_valid,
rx.data_ren.eq(1),
NextValue(arg_struct, Cat(begin, rx.data, end)),
NextState("RECEIVE" + str(n+1))
)
)
fsm.act("RECEIVE" + str(max(1, arg_struct_size//c_pci_data_width)),
##TODO: break up arg struct into members, pre-loop initializations
NextState("GET_DATA")
)
# execute function loop
fsm.act("GET_DATA", # read loop data from virtual memory
self.virtmem.virt_addr.eq(read_adr),
self.virtmem.req.eq(1),
self.virtmem.write_enable.eq(0),
If(self.virtmem.done,
self.virtmem.req.eq(0),
NextValue(read_data, self.virtmem.data_read),
NextState("CALCULATE")
)
)
fsm.act("CALCULATE",
##TODO: loop body
NextState("PUT_DATA")
)
fsm.act("PUT_DATA", # write loop modifications to virtual memory
self.virtmem.virt_addr.eq(write_adr),
self.virtmem.req.eq(1),
self.virtmem.write_enable.eq(1),
If(self.virtmem.done,
self.virtmem.req.eq(0),
If(~done, # loop
NextState("GET_DATA")
).Else( # end function body
NextState("FLUSH")
)
)
)
# flush virtmem cache modifications to main memory
fsm.act("FLUSH",
self.virtmem.flush_all.eq(1),
If(self.virtmem.done,
NextState("TRANSMIT_INIT")
)
)
# send function return struct
fsm.act("TRANSMIT_INIT", # start transmission
tx.start.eq(1),
tx.len.eq(res_struct_size//32),
tx.last.eq(1),
If(tx.ack,
NextState("TRANSMIT0")
)
)
for n in range(max(1, res_struct_size//c_pci_data_width)):
fsm.act("TRANSMIT" + str(n), # TX
tx.start.eq(1),
tx.len.eq(res_struct_size//32),
tx.last.eq(1),
tx.data_valid.eq(1),
tx.data.eq(res_struct[n*c_pci_data_width:min((n+1)*c_pci_data_width, res_struct_size)]),
If(tx.data_ren,
NextState("TRANSMIT" + str(n+1))
)
)
fsm.act("TRANSMIT" + str(max(1, res_struct_size//c_pci_data_width)), #transmission finished
##TODO: reset loop variables
NextState("IDLE")
)
def main():
c_pci_data_width = 128 # PCIe lane width
ptrsize = 64 # pointer size of the host system, 32 bit or 64 bit
wordsize = 32 # width of data port to design (any power of 2)
num_chnls = 4 # Virtmem takes 2 channels, add more for direct use, plus last one for loopback "are you there?" test
combined_interface_tx = riffa.Interface(data_width=c_pci_data_width, num_chnls=num_chnls)
combined_interface_rx = riffa.Interface(data_width=c_pci_data_width, num_chnls=num_chnls)
m = DesignTemplate(combined_interface_rx=combined_interface_rx, combined_interface_tx=combined_interface_tx, c_pci_data_width=c_pci_data_width, wordsize=wordsize, ptrsize=ptrsize)
# add a loopback to test responsiveness
test_rx, test_tx = m.get_channel(num_chnls - 1)
m.comb += test_rx.connect(test_tx)
m.cd_sys.clk.name_override="clk"
m.cd_sys.rst.name_override="rst"
for name in "ack", "last", "len", "off", "data", "data_valid", "data_ren":
getattr(combined_interface_rx, name).name_override="chnl_rx_{}".format(name)
getattr(combined_interface_tx, name).name_override="chnl_tx_{}".format(name)
combined_interface_rx.start.name_override="chnl_rx"
combined_interface_tx.start.name_override="chnl_tx"
m.rx_clk.name_override="chnl_rx_clk"
m.tx_clk.name_override="chnl_tx_clk"
print(verilog.convert(m, name="top", ios={getattr(combined_interface_rx, name) for name in ["start", "ack", "last", "len", "off", "data", "data_valid", "data_ren"]} | {getattr(combined_interface_tx, name) for name in ["start", "ack", "last", "len", "off", "data", "data_valid", "data_ren"]} | {m.rx_clk, m.tx_clk, m.cd_sys.clk, m.cd_sys.rst} ))
if __name__ == '__main__':
main()