some code cleanup, finished capacity-limited model

StanfordAHA · Nov 4, 2024 · 1b63c28 · 1b63c28
1 parent 6c426cf
commit 1b63c28
Show file tree

Hide file tree

Showing 3 changed files with 99 additions and 79 deletions.
diff --git a/ASPLOS_EXP/create_mflowgen_experiments.py b/ASPLOS_EXP/create_mflowgen_experiments.py
@@ -771,7 +771,7 @@ def create_config_graph(data, filename="scatterplot"):
 
     }
     plt.figure(figsize=(10, 10))
-    plt.scatter(config_size, Config, c = "blue")
+    plt.scatter(config_size, Config, c="blue")
 
     # Adding labels and title
     plt.xlabel('# Config Bits', fontsize=12)
@@ -1410,7 +1410,6 @@ def get_power_breakdown_file(file_path):
     top_idx = 0
     for i_, line in enumerate(all_file_content):
         if line.startswith('lakespec'):
-        # if 'lakespec' in line:
             top_idx = i_
             break
 
@@ -1537,7 +1536,6 @@ def get_power_breakdown_file(file_path):
                 for z_ in range(len(data_idx)):
                     memintf_dec_power[z_] += line_data[z_]
 
-
         assert num_matches <= 1, f"Line ({mod}) matched too many items...{num_matches}"
 
     other_power = [0.0, 0.0, 0.0, 0.0, 0.0]
@@ -1590,7 +1588,7 @@ def get_area_breakdown_dir(directory):
             design_point_path = os.path.join(design_path, design_point)
             man_info = get_manifest_info(design_point_path)
 
-            signoff_step  = find_step(design_point_path, 'cadence-innovus-signoff')
+            signoff_step = find_step(design_point_path, 'cadence-innovus-signoff')
             print(f"Found signoff step: {signoff_step}")
             relative_area_file = os.path.join(signoff_step,
                                               "reports",
@@ -1712,16 +1710,16 @@ def get_area_breakdown_file(file_path):
     if all_file_content is None:
         print(f"No signoff area report for {file_path}")
         return {
-        'total': 0.0,
-        'AG': 0.0,
-        'SG': 0.0,
-        'ID': 0.0,
-        'Port': 0.0,
-        'Storage': 0.0,
-        'Config': 0.0,
-        'MemintfDec': 0.0,
-        'MemoryPort': 0.0
-    }
+            'total': 0.0,
+            'AG': 0.0,
+            'SG': 0.0,
+            'ID': 0.0,
+            'Port': 0.0,
+            'Storage': 0.0,
+            'Config': 0.0,
+            'MemintfDec': 0.0,
+            'MemoryPort': 0.0
+        }
     num_lines = len(all_file_content)
     num_data_lines = num_lines - 3
     header = all_file_content[0]
@@ -1912,7 +1910,7 @@ def get_num_live_procs(proc_list):
                 "data_width": 16,
                 "clock_count_width": 64,
                 "dimensionality": 6
-                }
+            }
 
             # Dump a parameter file so that we can understand what the design has in it
             params_file = os.path.join(collect_override_path, "params.json")
@@ -2042,8 +2040,6 @@ def get_num_live_procs(proc_list):
 
         for filename in filtered_files:
             filename_no_ext = os.path.splitext(filename)[0]
-            # if filename_no_ext not in ["dual_port_rv", "simple_dual_port"]:
-                # continue
             filename_no_ext_f = f"{filename_no_ext}_{freq}"
 
             head_folder = os.path.join(pd_files_dir, filename_no_ext_f)
@@ -2057,11 +2053,7 @@ def get_num_live_procs(proc_list):
 
             for (storage_capacity, data_width, clock_count_width, dimensionality, fw) in all_test_pts:
 
-            # Now go through the different data points
-            # for storage_capacity in [512 * cap_scale, 1024 * cap_scale, 2048 * cap_scale]:
-            #     for data_width in [16]:
-            #         for clock_count_width in [64]:
-
+                # Now go through the different data points
                 params_dict = {
                     "design": filename_no_ext,
                     "frequency": freq,

diff --git a/ASPLOS_EXP/push_cache_model.py b/ASPLOS_EXP/push_cache_model.py
@@ -123,7 +123,7 @@ def read(self, ready=False):
 
         data_on_sram_output_tmp = self.data_on_sram_output
 
-        end_of_stream = self.get_curr_addr() == None
+        end_of_stream = self.get_curr_addr() is None
 
         # Data, valid
         return_val = (0, False)
@@ -150,8 +150,8 @@ def read(self, ready=False):
         # print(f"Next read address...{self.get_curr_addr()}")
 
         if end_of_stream is False:
-        # If the main memory hasn't been read yet, make sure to read it now
-        # Can't return any data this cycle...
+            # If the main memory hasn't been read yet, make sure to read it now
+            # Can't return any data this cycle...
             if self.already_read is False:
                 # print("Haven't read yet...")
                 self.already_read = True
@@ -332,5 +332,3 @@ def convert_to_wide(thin, fw):
         for i in range(len(read_data)):
             if gold_data[i] != read_data[i]:
                 print(f"MISMATCH\tINDEX: {i}\tGOLD: {gold_data[i]}\tSIM: {read_data[i]}")
-
-
diff --git a/ASPLOS_EXP/push_cache_model_limit_cap.py b/ASPLOS_EXP/push_cache_model_limit_cap.py
@@ -1,19 +1,19 @@
 from kratos import clog2
 from math import ceil
-import random
+import argparse
 
 
 class MainMemoryModel():
 
-    def __init__(self, fetch_width, capacity=4096) -> None:
+    def __init__(self, fetch_width, capacity=4096, verbose=False) -> None:
+        self.verbose = verbose
         self.fw = fetch_width
         self.fw_log_2 = clog2(fetch_width)
         self.capacity = capacity
         self.data_array = [0 for x in range(capacity)]
         # This should be a list
         self.addr_seq = None
 
-        # self.wcb = [None for x_ in range(2)]
         self.wcb = []
 
         self.wcb_num_items = 0
@@ -37,10 +37,8 @@ def __init__(self, fetch_width, capacity=4096) -> None:
         self.data_on_sram_output = False
 
     def write(self, data: list, address: int):
-
         # Need to make sure that the write data is the proper fetch width
         assert len(data) == self.fw
-
         self.data_array[address * self.fw:address * self.fw + self.fw] = data
 
     def get_curr_addr(self):
@@ -83,16 +81,17 @@ def read_wcb(self):
         outer_index_tag = self.get_outer_address(curr_addr)
         # Get inner index
         inner_index = self.get_inner_address(curr_addr)
-        print(f"Current addr: {curr_addr}")
-        print(f"Current outer index: {outer_index_tag}")
-        print(f"Current inner index: {inner_index}")
-        # print(self.addr_q)
-        print(f"Addr queue: {self.addr_q}")
+        if self.verbose:
+            print(f"Current addr: {curr_addr}")
+            print(f"Current outer index: {outer_index_tag}")
+            print(f"Current inner index: {inner_index}")
+            print(f"Addr queue: {self.addr_q}")
         # Now go through the wcb until we find a tag that matches, then read from that one
         found_match = False
         search_idx = 0
         while found_match is False:
-            print(self.wcb_tags)
+            if self.verbose:
+                print(self.wcb_tags)
             if self.wcb_tags[search_idx] == outer_index_tag:
                 found_match = True
                 ret_data_wide = self.wcb[search_idx]
@@ -104,14 +103,12 @@ def read_wcb(self):
 
     def read(self, ready=False):
 
-        print(f"On cycle...{self.cycle}")
+        if self.verbose:
+            print(f"On cycle...{self.cycle}")
         self.cycle += 1
 
         # Address is held internally
-
         # Handle all the output logic...
-
-        read_last_cycle_tmp = self.read_last_cycle
         last_read_addr_tmp = self.last_read_addr
 
         pop_addr_q = False
@@ -124,15 +121,17 @@ def read(self, ready=False):
 
         data_on_sram_output_tmp = self.data_on_sram_output
 
-        end_of_stream = self.get_curr_addr() == None
+        data_on_bus = data_on_sram_output_tmp
+        data_being_written = False
+
+        end_of_stream = self.get_curr_addr() is None
 
         # Data, valid
         return_val = (0, False)
         # If there is anything in the address queue, we have valid data at the out.
         if len(self.addr_q) > 0 and len(self.wcb) > 0:
-            print(self.wcb)
-            # print(f"Number outstanding requests...{len(self.addr_q)}")
-            # return_val = (self.wcb[self.addr_q[0]], True)
+            if self.verbose:
+                print(self.wcb)
             return_val = (self.read_wcb(), True)
             valid_out = True
 
@@ -142,23 +141,24 @@ def read(self, ready=False):
         # We only want to pop the output queue if ready and output is valid
         if ready is True and valid_out is True:
             pop_addr_q = True
-            if self.pop_q[0] and (self.wcb_num_items >= 3):
+            if self.pop_q[0] and (self.wcb_num_items == 2):
                 pop_wcb = True
 
-        # Handle memory->wcb path)
+        # We can determine if data is being written based on the pop wcb and current occupancy
+        # of the WCB and if there is data on the SRAM bus
+        data_being_written = data_on_bus and (pop_wcb or (self.wcb_num_items < 2))
 
-        # print(f"Last read address...{self.last_read_addr}")
-        # print(f"Next read address...{self.get_curr_addr()}")
+        # Handle memory->wcb path
 
         if end_of_stream is False:
-        # If the main memory hasn't been read yet, make sure to read it now
-        # Can't return any data this cycle...
+            # If the main memory hasn't been read yet, make sure to read it now
+            # Can't return any data this cycle...
             if self.already_read is False:
-                print("Haven't read yet...")
+                if self.verbose:
+                    print("Haven't read yet...")
                 self.already_read = True
                 self.last_read_addr = self.get_curr_addr()
                 self.next_addr()
-                # print(self.get_curr_addr())
                 self.read_last_cycle = True
                 self.data_on_sram_output = True
 
@@ -172,11 +172,14 @@ def read(self, ready=False):
 
             # If it has been read, want to know if the current address goes to a new word in main memory
             elif (self.last_read_addr >> self.fw_log_2) != (self.get_curr_addr() >> self.fw_log_2):
-                print("Outer address mismatch...")
-
-                # This means that this address will go to a new word, but we only want to make the read
-                # if there is room enough (num items < 3)
-                if self.wcb_num_items < 3 and (len(self.addr_q) < self.addr_q_max_size):
+                if self.verbose:
+                    print("Outer address mismatch...")
+                # CAPLIMIT: Only write the wcb if there is valid data on the sram bus AND:
+                #   1. Room in the conversion buffer
+                #   2. No room, but it is being popped...
+                # If it's a new address, only make the read if there's room on the sram bus or it's being written
+                # and there's room in the addr queue
+                if ((data_on_bus is False) or data_being_written) and (len(self.addr_q) < self.addr_q_max_size):
                     self.last_read_addr = self.get_curr_addr()
                     self.next_addr()
                     self.read_last_cycle = True
@@ -197,7 +200,6 @@ def read(self, ready=False):
 
             # If there's no need to read from main memory
             else:
-
                 # Just check if room in output queues
                 if len(self.addr_q) < self.addr_q_max_size:
                     self.last_read_addr = self.get_curr_addr()
@@ -220,8 +222,9 @@ def read(self, ready=False):
 
         # If there was a read last cycle, need to push the data into the WCB
         # --- In the capacity-limited version, this is actually not true. This just means that we had a read, so there
-        # will be data on the output register of the SRAM
-        if read_last_cycle_tmp:
+        # will be data on the output register of the SRAM (which we are using as the skid buffer)
+        # So actually is set based on if data is being written
+        if data_being_written:
             # self.wcb.append(self.data_array[last_read_addr_tmp * self.fw: last_read_addr_tmp * self.fw + self.fw])
             self.wcb.append(self.data_array[(last_read_addr_tmp >> self.fw_log_2) * self.fw: (last_read_addr_tmp >> self.fw_log_2) * self.fw + self.fw])
             # self.wcb.append(self.data_array[(last_read_addr_tmp >> self.fw_log_2): (last_read_addr_tmp >> self.fw_log_2) + self.fw])
@@ -234,7 +237,9 @@ def read(self, ready=False):
 
         # The data on sram flag is basically
         # Will get set to true when there is a read.
-        # Can only get set to false when it is written to the wcb (but can stay true if another read follows it up...)
+        # Can only get set to false when it is written to the wcb and no follow up read
+        if data_being_written and not self.read_last_cycle:
+            self.data_on_sram_output = False
 
         # End of cycle need to do popping
         if pop_wcb:
@@ -252,6 +257,10 @@ def read(self, ready=False):
         else:
             self.read_last_cycle = False
 
+        assert self.wcb_num_items <= 2
+        if self.verbose:
+            print(f"WCB NUM ITEMS: {self.wcb_num_items}")
+
         return return_val
         # return self.data_array[address * self.fw: address * self.fw + self.fw]
 
@@ -282,19 +291,23 @@ def convert_to_wide(thin, fw):
 
 if __name__ == "__main__":
 
-    print("Trying model (limiting capacity in WCB...)...")
-
-    fw = 4
+    parser = argparse.ArgumentParser(description='Push cache model...')
+    parser.add_argument('--address_space', type=int, default=64)
+    parser.add_argument('--fetch_width', type=int, default=4)
+    parser.add_argument("--verbose", action="store_true")
+    args = parser.parse_args()
 
-    address_space = 16
+    fw = args.fetch_width
+    address_space = args.address_space
+    verbose = args.verbose
 
     write_data_thin = [i for i in range(address_space)]
 
     # print(f"Thin data: {write_data_thin}")
     write_data_wide = convert_to_wide(write_data_thin, fw=fw)
     # print(f"Wide data: {write_data_wide}")
 
-    main_memory = MainMemoryModel(fetch_width=fw, capacity=4096)
+    main_memory = MainMemoryModel(fetch_width=fw, capacity=4096, verbose=verbose)
 
     for i_ in range(len(write_data_wide)):
         main_memory.write(write_data_wide[i_], address=i_)
@@ -303,25 +316,42 @@ def convert_to_wide(thin, fw):
 
     read_data = []
 
-    addr_seq = range(address_space)
+    # make conv address
+    # addr_seq = range(address_space)
+    base_conv = [0, 1, 2, 3]
+    addr_seq = [base_conv[z] + x for x in range(100) for z in range(len(base_conv))]
+    end_idx = addr_seq.index(address_space)
+    addr_seq = addr_seq[0:end_idx]
+    # addr_seq_iter = iter(addr_seq)
+    # addr_seq_list = [i for i in range(address_space)]
 
-    addr_seq_iter = iter(addr_seq)
-    addr_seq_list = [i for i in range(address_space)]
+    print(addr_seq)
 
-    main_memory.set_addr_seq(addr_seq=addr_seq_list)
+    main_memory.set_addr_seq(addr_seq=addr_seq)
+
+    gold_data = [write_data_thin[x] for x in addr_seq]
+    print(gold_data)
 
     # Need to do cycle-approximate simulation here...
     num_reads = 0
-    while num_reads < address_space:
-    # for i_ in range(len(addr_seq_list)):
-        # ready_choice = random.choice([True, False])
+    while num_reads < len(gold_data):
         ready_choice = True
         read_result_data, read_result_valid = main_memory.read(ready_choice)
-        # read_data_wide.append(main_memory.read(ready_choice))\
-        print(f"Ready: {ready_choice}\tValid: {read_result_valid}\tData: {read_result_data}")
+        if verbose:
+            print(f"Ready: {ready_choice}\tValid: {read_result_valid}\tData: {read_result_data}")
         if ready_choice is True and read_result_valid is True:
             read_data.append(read_result_data)
             num_reads += 1
 
-
-    print(f"Read data: {read_data}")
+    if verbose:
+        print(f"Gold data: {gold_data}")
+        print(f"Read data: {read_data}")
+
+    if gold_data == read_data:
+        print(f"SUCCESS: Simulation result matches gold")
+    elif len(gold_data) != len(read_data):
+        print(f"Data length mismatch --- Gold length: {len(gold_data)}\tSimulation length: {len(read_data)}")
+    else:
+        for i in range(len(read_data)):
+            if gold_data[i] != read_data[i]:
+                print(f"MISMATCH\tINDEX: {i}\tGOLD: {gold_data[i]}\tSIM: {read_data[i]}")