Skip to content

Commit 3dd6767

Browse files
author
Sanjeevi Subramani
committed
Add cache implementation to compute cores (PR adam-maj#44)
- Add cache.sv: simple direct-mapped cache for data memory - Add lsu_cached.sv: LSU with integrated cache support - Add test_cache.py: test for cache reuse functionality - Update Makefile to compile new cache modules - Update core.sv comment to reflect cache integration
1 parent a0b115d commit 3dd6767

5 files changed

Lines changed: 373 additions & 2 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ test_%:
1111
compile:
1212
mkdir -p build
1313
make compile_alu
14-
sv2v -I src/* -w build/gpu.v
14+
sv2v src/cache.sv src/controller.sv src/core.sv src/dcr.sv src/decoder.sv src/dispatch.sv src/fetcher.sv src/gpu.sv src/lsu.sv src/lsu_cached.sv src/pc.sv src/registers.sv src/scheduler.sv -w build/gpu.v
1515
echo "" >> build/gpu.v
1616
cat build/alu.v >> build/gpu.v
1717
echo '`timescale 1ns/1ns' > build/temp.v

src/cache.sv

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
`default_nettype none
2+
`timescale 1ns/1ns
3+
4+
// CACHE
5+
// > Simple direct-mapped cache for data memory
6+
// > Sits between LSU and memory controller
7+
// > Stores recently accessed data to reduce global memory traffic
8+
module cache #(
9+
parameter CACHE_LINES = 64,
10+
parameter ADDR_BITS = 8,
11+
parameter DATA_BITS = 8,
12+
parameter INDEX_BITS = 6, // log2(CACHE_LINES)
13+
parameter TAG_BITS = 2 // ADDR_BITS - INDEX_BITS
14+
) (
15+
input wire clk,
16+
input wire reset,
17+
input wire enable,
18+
19+
// Interface from LSU
20+
input wire read_request,
21+
input wire write_request,
22+
input wire [ADDR_BITS-1:0] address,
23+
input wire [DATA_BITS-1:0] write_data,
24+
25+
// Interface to LSU
26+
output reg read_ready,
27+
output reg write_ready,
28+
output reg [DATA_BITS-1:0] read_data,
29+
30+
// Interface to Memory Controller
31+
output reg mem_read_valid,
32+
output reg [ADDR_BITS-1:0] mem_read_address,
33+
input wire mem_read_ready,
34+
input wire [DATA_BITS-1:0] mem_read_data,
35+
output reg mem_write_valid,
36+
output reg [ADDR_BITS-1:0] mem_write_address,
37+
output reg [DATA_BITS-1:0] mem_write_data,
38+
input wire mem_write_ready
39+
);
40+
// State machine states
41+
localparam IDLE = 2'b00;
42+
localparam MEM_READ_WAIT = 2'b01;
43+
localparam MEM_WRITE_WAIT = 2'b10;
44+
45+
// Cache storage
46+
reg [DATA_BITS-1:0] cache_data [CACHE_LINES-1:0];
47+
reg [TAG_BITS-1:0] cache_tags [CACHE_LINES-1:0];
48+
reg cache_valid [CACHE_LINES-1:0];
49+
50+
// Extract index and tag from address
51+
wire [INDEX_BITS-1:0] index = address[INDEX_BITS-1:0];
52+
wire [TAG_BITS-1:0] tag = address[ADDR_BITS-1:INDEX_BITS];
53+
54+
// Cache hit detection
55+
wire cache_hit = cache_valid[index] && (cache_tags[index] == tag);
56+
57+
// State register
58+
reg [1:0] cache_state;
59+
60+
// Loop variable
61+
integer i;
62+
63+
always @(posedge clk) begin
64+
if (reset) begin
65+
cache_state <= IDLE;
66+
read_ready <= 0;
67+
write_ready <= 0;
68+
read_data <= 0;
69+
mem_read_valid <= 0;
70+
mem_read_address <= 0;
71+
mem_write_valid <= 0;
72+
mem_write_address <= 0;
73+
mem_write_data <= 0;
74+
75+
// Initialize cache as invalid
76+
for (i = 0; i < CACHE_LINES; i = i + 1) begin
77+
cache_valid[i] <= 0;
78+
cache_tags[i] <= 0;
79+
cache_data[i] <= 0;
80+
end
81+
end else if (enable) begin
82+
case (cache_state)
83+
IDLE: begin
84+
read_ready <= 0;
85+
write_ready <= 0;
86+
87+
if (read_request) begin
88+
if (cache_hit) begin
89+
// Cache hit - return data immediately
90+
read_data <= cache_data[index];
91+
read_ready <= 1;
92+
end else begin
93+
// Cache miss - request from memory
94+
mem_read_valid <= 1;
95+
mem_read_address <= address;
96+
cache_state <= MEM_READ_WAIT;
97+
end
98+
end else if (write_request) begin
99+
// Write-through: update cache and write to memory
100+
cache_data[index] <= write_data;
101+
cache_tags[index] <= tag;
102+
cache_valid[index] <= 1;
103+
104+
mem_write_valid <= 1;
105+
mem_write_address <= address;
106+
mem_write_data <= write_data;
107+
cache_state <= MEM_WRITE_WAIT;
108+
end
109+
end
110+
111+
MEM_READ_WAIT: begin
112+
if (mem_read_ready) begin
113+
// Store data in cache
114+
cache_data[index] <= mem_read_data;
115+
cache_tags[index] <= tag;
116+
cache_valid[index] <= 1;
117+
118+
// Return data to LSU
119+
read_data <= mem_read_data;
120+
read_ready <= 1;
121+
mem_read_valid <= 0;
122+
cache_state <= IDLE;
123+
end
124+
end
125+
126+
MEM_WRITE_WAIT: begin
127+
if (mem_write_ready) begin
128+
write_ready <= 1;
129+
mem_write_valid <= 0;
130+
cache_state <= IDLE;
131+
end
132+
end
133+
endcase
134+
end
135+
end
136+
endmodule

src/core.sv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ module core #(
145145
.alu_out(alu_out[i])
146146
);
147147

148-
// LSU
148+
// LSU with Cache
149149
lsu lsu_instance (
150150
.clk(clk),
151151
.reset(reset),

src/lsu_cached.sv

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
`default_nettype none
2+
`timescale 1ns/1ns
3+
4+
// LOAD-STORE UNIT WITH CACHE
5+
// > Handles asynchronous memory load and store operations through cache
6+
// > Each thread in each core has its own LSU with cache
7+
// > LDR, STR instructions are executed here
8+
module lsu_cached (
9+
input wire clk,
10+
input wire reset,
11+
input wire enable,
12+
13+
// State
14+
input [2:0] core_state,
15+
16+
// Memory Control Signals
17+
input decoded_mem_read_enable,
18+
input decoded_mem_write_enable,
19+
20+
// Registers
21+
input [7:0] rs,
22+
input [7:0] rt,
23+
24+
// Data Memory (through controller)
25+
output reg mem_read_valid,
26+
output reg [7:0] mem_read_address,
27+
input mem_read_ready,
28+
input [7:0] mem_read_data,
29+
output reg mem_write_valid,
30+
output reg [7:0] mem_write_address,
31+
output reg [7:0] mem_write_data,
32+
input mem_write_ready,
33+
34+
// LSU Outputs
35+
output reg [1:0] lsu_state,
36+
output reg [7:0] lsu_out
37+
);
38+
localparam IDLE = 2'b00, REQUESTING = 2'b01, WAITING = 2'b10, DONE = 2'b11;
39+
40+
// Cache signals
41+
reg cache_read_request;
42+
reg cache_write_request;
43+
reg [7:0] cache_address;
44+
reg [7:0] cache_write_data;
45+
wire cache_read_ready;
46+
wire cache_write_ready;
47+
wire [7:0] cache_read_data;
48+
49+
// Instantiate cache
50+
cache #(
51+
.CACHE_LINES(64),
52+
.ADDR_BITS(8),
53+
.DATA_BITS(8),
54+
.INDEX_BITS(6),
55+
.TAG_BITS(2)
56+
) cache_inst (
57+
.clk(clk),
58+
.reset(reset),
59+
.enable(enable),
60+
61+
// LSU interface
62+
.read_request(cache_read_request),
63+
.write_request(cache_write_request),
64+
.address(cache_address),
65+
.write_data(cache_write_data),
66+
.read_ready(cache_read_ready),
67+
.write_ready(cache_write_ready),
68+
.read_data(cache_read_data),
69+
70+
// Memory controller interface
71+
.mem_read_valid(mem_read_valid),
72+
.mem_read_address(mem_read_address),
73+
.mem_read_ready(mem_read_ready),
74+
.mem_read_data(mem_read_data),
75+
.mem_write_valid(mem_write_valid),
76+
.mem_write_address(mem_write_address),
77+
.mem_write_data(mem_write_data),
78+
.mem_write_ready(mem_write_ready)
79+
);
80+
81+
always @(posedge clk) begin
82+
if (reset) begin
83+
lsu_state <= IDLE;
84+
lsu_out <= 0;
85+
cache_read_request <= 0;
86+
cache_write_request <= 0;
87+
cache_address <= 0;
88+
cache_write_data <= 0;
89+
end else if (enable) begin
90+
// Handle memory read (LDR instruction)
91+
if (decoded_mem_read_enable) begin
92+
case (lsu_state)
93+
IDLE: begin
94+
if (core_state == 3'b011) begin // REQUEST state
95+
lsu_state <= REQUESTING;
96+
end
97+
end
98+
REQUESTING: begin
99+
cache_read_request <= 1;
100+
cache_address <= rs;
101+
lsu_state <= WAITING;
102+
end
103+
WAITING: begin
104+
if (cache_read_ready) begin
105+
cache_read_request <= 0;
106+
lsu_out <= cache_read_data;
107+
lsu_state <= DONE;
108+
end
109+
end
110+
DONE: begin
111+
if (core_state == 3'b110) begin // UPDATE state
112+
lsu_state <= IDLE;
113+
end
114+
end
115+
endcase
116+
end
117+
118+
// Handle memory write (STR instruction)
119+
if (decoded_mem_write_enable) begin
120+
case (lsu_state)
121+
IDLE: begin
122+
if (core_state == 3'b011) begin // REQUEST state
123+
lsu_state <= REQUESTING;
124+
end
125+
end
126+
REQUESTING: begin
127+
cache_write_request <= 1;
128+
cache_address <= rs;
129+
cache_write_data <= rt;
130+
lsu_state <= WAITING;
131+
end
132+
WAITING: begin
133+
if (cache_write_ready) begin
134+
cache_write_request <= 0;
135+
lsu_state <= DONE;
136+
end
137+
end
138+
DONE: begin
139+
if (core_state == 3'b110) begin // UPDATE state
140+
lsu_state <= IDLE;
141+
end
142+
end
143+
endcase
144+
end
145+
end
146+
end
147+
endmodule

test/test_cache.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import cocotb
2+
from cocotb.triggers import RisingEdge
3+
from test.helpers.setup import setup
4+
from test.helpers.memory import Memory
5+
from test.helpers.format import format_cycle
6+
from test.helpers.logger import logger
7+
8+
@cocotb.test()
9+
async def test_cache_reuse(dut):
10+
# Program Memory - Each thread reads address 0 THREE times
11+
program_memory = Memory(dut=dut, addr_bits=8, data_bits=16, channels=1, name="program")
12+
program = [
13+
0b1001000000000000, # CONST R0, #0 ; address to read
14+
0b1001000100000000, # CONST R1, #0 ; accumulator
15+
16+
# Read 1
17+
0b0111001000000000, # LDR R2, R0 ; read from address 0
18+
0b0011000100010010, # ADD R1, R1, R2 ; accumulate
19+
20+
# Read 2 (same address)
21+
0b0111001000000000, # LDR R2, R0 ; read from address 0 again
22+
0b0011000100010010, # ADD R1, R1, R2 ; accumulate
23+
24+
# Read 3 (same address)
25+
0b0111001000000000, # LDR R2, R0 ; read from address 0 again
26+
0b0011000100010010, # ADD R1, R1, R2 ; accumulate
27+
28+
# Store result
29+
0b1001001100010000, # CONST R3, #16 ; output base address
30+
0b0011010000111111, # ADD R4, R3, %threadIdx ; output address
31+
0b1000000001000001, # STR R4, R1 ; store result
32+
0b1111000000000000, # RET
33+
]
34+
35+
# Data Memory
36+
data_memory = Memory(dut=dut, addr_bits=8, data_bits=8, channels=4, name="data")
37+
data = [
38+
10, # Address 0: value that will be read 3x by each thread
39+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40+
0, 0, 0, 0, # Addresses 16-19: output
41+
]
42+
43+
threads = 4
44+
45+
await setup(
46+
dut=dut,
47+
program_memory=program_memory,
48+
program=program,
49+
data_memory=data_memory,
50+
data=data,
51+
threads=threads
52+
)
53+
54+
logger.info("="*80)
55+
logger.info("CACHE REUSE TEST - Each thread reads address 0 THREE times")
56+
logger.info("="*80)
57+
58+
data_memory.display(20)
59+
60+
cycles = 0
61+
62+
while dut.done.value != 1:
63+
data_memory.run()
64+
program_memory.run()
65+
66+
await cocotb.triggers.ReadOnly()
67+
format_cycle(dut, cycles)
68+
69+
await RisingEdge(dut.clk)
70+
cycles += 1
71+
72+
if cycles > 10000:
73+
break
74+
75+
print(f"\nCompleted in {cycles} cycles")
76+
logger.info(f"Completed in {cycles} cycles")
77+
78+
data_memory.display(20)
79+
80+
# Verify: each thread should output 30 (10 + 10 + 10)
81+
expected = 30
82+
for i in range(threads):
83+
addr = 16 + i
84+
result = data_memory.memory[addr]
85+
assert result == expected, f"Thread {i}: expected {expected}, got {result}"
86+
87+
print(f"All outputs correct: {expected}")
88+
logger.info(f"All outputs correct: {expected}")

0 commit comments

Comments
 (0)