E203数据冲突处理OITF
阅读原文时间:2023年07月13日阅读:2

流水线的数据冲突分为三类:WAR,RAW,WAW

https://wenku.baidu.com/view/e066926d48d7c1c708a14508.html

  • WAR: write after read 相关性,又称先读后写相关性。比如下面的指令序列,第一条指令会读取x4,第二条指令会写x4。在流水线中,如果第二条指令比第一条指令先写x4,则第一条指令就会读出错误的值。

add x5, x4,x6

add x4, x3, x2

  • WAW: write after write 相关性,又称先写后写相关性。比如下面的指令序列,两条指令都会写x5。在流水线中,如果第二条指令比第一条指令先写x5,就会引起逻辑错误。

add x5, x4,x6

add x5, x3, x2

  • RAW:read after write相关性,又称先写后读相关性。比如下面指令序列,如果第二条指令,在第一条指令写x5之前,第二条指令先读x4,就会引起逻辑错误。

add x5, x4,x6

add x4, x5, x2

由于蜂鸟E200系列是按序派遣,按顺序写回的微架构,在指令派遣时候就已经从通用寄存器数组中读取了源操作数。后续执行的指令写回regfile的操作不可能影响到前面指令的读取,所以不可能发生WAR相关性造成的数据冲突。

正在派遣的指令处在流水线的第二级,假设之前派遣的指令是单周期指令,则前序指令肯定已经完成了执行且将结果写回了Regfile。因此正在派遣的指令不可能会发生RAW数据冲突。但是假设之前派遣的指令是多周期指令(长指令),由于指令需要多个周期才能写回结果。因此正在派遣的指令可能会产生前序相关的RAW相关性。

正在派遣的指令处在流水线的第二级,假设之前派遣的指令是单周期指令,则前序指令肯定已经完成了执行且将结果写回了Regfile。因此正在派遣的指令不可能会发生WAW数据冲突。但是假设之前派遣的指令是多周期指令(长指令),由于指令需要多个周期才能写回结果。因此正在派遣的指令可能会产生前序相关的WAW相关性。

为了能检测出长指令的RAW和WAW相关性,蜂鸟E200使用了一个outstanding instruction track fifo(OITF)模块。在流水线的派遣(Dispatch)点,每一次派遣一个长指令,则会在OITF中分配一个表项(Entry),在这个表项中会存储该长指令的结果寄存器索引。在流水线的写回(Write-back)点,每次按顺序写回一个长指令之后,就会将此指令在OITF中的表项移除。

每条指令派遣时,都会将本指令的源操作数和目的操作数寄存器索引和OITF中的各个表项进行比对,从而判断本指令是否与已经被派遣出,且尚未写回的长指令产生RAW和WAW相关性。如果产生相关性,则stall住当前指令的派遣。如果没有RAW和WAW相关性,且该指令为多周期长指令,把该指令写入OITF,如果OITF是full,则仍要stall住管线,等待OITF释放空间后,再写入并派遣。

在writeback模块,会进行长指令写回仲裁,长指令写回regfile后,会释放OITF中相应的表项。

OITF代码如下,如果fifo full,则dis_ready=0, 与dispatch模块握手失败,不会发送新的dispatch进来。如果不为空,会发送新的指令进来进行判断。

`include "e203_defines.v"

module e203_exu_oitf (
output dis_ready,

input dis_ena, //dispatch a long instruction enable signal
input ret_ena, //write back a long instruction enable signal

output [`E203_ITAG_WIDTH-1:0] dis_ptr, //write pointer
output [`E203_ITAG_WIDTH-1:0] ret_ptr, //read pointer

output [`E203_RFIDX_WIDTH-1:0] ret_rdidx,
output ret_rdwen,
output ret_rdfpu,
output [`E203_PC_SIZE-1:0] ret_pc,

input disp_i_rs1en, // enable if current dispatch instruction fetch first source operand
input disp_i_rs2en, // …
input disp_i_rs3en, // …
input disp_i_rdwen, // enable if current dispatch instruction write back to register
input disp_i_rs1fpu, // enable if current dispath instruction need to read float gpr
input disp_i_rs2fpu, //…
input disp_i_rs3fpu, //…
input disp_i_rdfpu, //enable if current dipatch instruction need to write back to float register files.
//register index
input [`E203_RFIDX_WIDTH-1:0] disp_i_rs1idx,
input [`E203_RFIDX_WIDTH-1:0] disp_i_rs2idx,
input [`E203_RFIDX_WIDTH-1:0] disp_i_rs3idx,
input [`E203_RFIDX_WIDTH-1:0] disp_i_rdidx,
input [`E203_PC_SIZE -1:0] disp_i_pc, //pc of current dispatch instruciotn

output oitfrd_match_disprs1, //dispatch instruction rs1 is same as any item of result register in oitf
output oitfrd_match_disprs2, //…
output oitfrd_match_disprs3, //…
output oitfrd_match_disprd, //dispatch instruction rd is same as any item of result register in oitf.
//if empty, no conflict
output oitf_empty,
input clk,
input rst_n
);

wire [`E203_OITF_DEPTH-1:0] vld_set;
wire [`E203_OITF_DEPTH-1:0] vld_clr;
wire [`E203_OITF_DEPTH-1:0] vld_ena;
wire [`E203_OITF_DEPTH-1:0] vld_nxt;
wire [`E203_OITF_DEPTH-1:0] vld_r; //if it is valid signal in all item
wire [`E203_OITF_DEPTH-1:0] rdwen_r;// if it is write back register in all item
wire [`E203_OITF_DEPTH-1:0] rdfpu_r; //result register in all item if are float
wire [`E203_RFIDX_WIDTH-1:0] rdidx_r[`E203_OITF_DEPTH-1:0]; //register index in all items
// The PC here is to be used at wback stage to track out the
// PC of exception of long-pipe instruction
wire [`E203_PC_SIZE-1:0] pc_r[`E203_OITF_DEPTH-1:0];

wire alc_ptr_ena = dis_ena; //dispatch a long instruction enable signal, as write pointer enable signal
wire ret_ptr_ena = ret_ena; //write back a long instruction enable signal, as read pointer enable signal

wire oitf_full ;

wire [`E203_ITAG_WIDTH-1:0] alc_ptr_r; //write pointer, long instruction dispatch
wire [`E203_ITAG_WIDTH-1:0] ret_ptr_r; //read pointer, long instruction write back

generate
if(`E203_OITF_DEPTH > 1) begin: depth_gt1//{
//extra mark bit for write full
wire alc_ptr_flg_r;
wire alc_ptr_flg_nxt = ~alc_ptr_flg_r;
wire alc_ptr_flg_ena = (alc_ptr_r == ($unsigned(`E203_OITF_DEPTH-1))) & alc_ptr_ena;

  sirv\_gnrl\_dfflr #(1) alc\_ptr\_flg\_dfflrs(alc\_ptr\_flg\_ena, alc\_ptr\_flg\_nxt, alc\_ptr\_flg\_r, clk, rst\_n);

  wire \[\`E203\_ITAG\_WIDTH-1:0\] alc\_ptr\_nxt;  
  //if write to fifo depth, write ptr = 0,otherwise write ptr = write ptr + 1  
  assign alc\_ptr\_nxt = alc\_ptr\_flg\_ena ? \`E203\_ITAG\_WIDTH'b0 : (alc\_ptr\_r + 1'b1);

  sirv\_gnrl\_dfflr #(\`E203\_ITAG\_WIDTH) alc\_ptr\_dfflrs(alc\_ptr\_ena, alc\_ptr\_nxt, alc\_ptr\_r, clk, rst\_n);

  //extra mark bit for read empty  
  wire ret\_ptr\_flg\_r;  
  wire ret\_ptr\_flg\_nxt = ~ret\_ptr\_flg\_r;  
  wire ret\_ptr\_flg\_ena = (ret\_ptr\_r == ($unsigned(\`E203\_OITF\_DEPTH-1))) & ret\_ptr\_ena;

  sirv\_gnrl\_dfflr #(1) ret\_ptr\_flg\_dfflrs(ret\_ptr\_flg\_ena, ret\_ptr\_flg\_nxt, ret\_ptr\_flg\_r, clk, rst\_n);

  wire \[\`E203\_ITAG\_WIDTH-1:0\] ret\_ptr\_nxt;  
  //if read to fifo depth, read ptr = 0, otherwise read prt = read prt + 1  
  assign ret\_ptr\_nxt = ret\_ptr\_flg\_ena ? \`E203\_ITAG\_WIDTH'b0 : (ret\_ptr\_r + 1'b1);

  sirv\_gnrl\_dfflr #(\`E203\_ITAG\_WIDTH) ret\_ptr\_dfflrs(ret\_ptr\_ena, ret\_ptr\_nxt, ret\_ptr\_r, clk, rst\_n);  
  //empty, full mark  
  assign oitf\_empty = (ret\_ptr\_r == alc\_ptr\_r) &   (ret\_ptr\_flg\_r == alc\_ptr\_flg\_r);  
  assign oitf\_full  = (ret\_ptr\_r == alc\_ptr\_r) & (~(ret\_ptr\_flg\_r == alc\_ptr\_flg\_r));  

end//}
else begin: depth_eq1//}{
assign alc_ptr_r =1'b0;
assign ret_ptr_r =1'b0;
assign oitf_empty = ~vld_r[0];
assign oitf_full = vld_r[0];
end//}
endgenerate//}

assign ret_ptr = ret_ptr_r;
assign dis_ptr = alc_ptr_r;

////
//// // If the OITF is not full, or it is under retiring, then it is ready to accept new dispatch
//// assign dis_ready = (~oitf_full) | ret_ena;
// To cut down the loop between ALU write-back valid --> oitf_ret_ena --> oitf_ready ---> dispatch_ready --- > alu_i_valid
// we exclude the ret_ena from the ready signal
assign dis_ready = (~oitf_full);

wire [`E203_OITF_DEPTH-1:0] rd_match_rs1idx;
wire [`E203_OITF_DEPTH-1:0] rd_match_rs2idx;
wire [`E203_OITF_DEPTH-1:0] rd_match_rs3idx;
wire [`E203_OITF_DEPTH-1:0] rd_match_rdidx;

genvar i;
generate //{
for (i=0; i<`E203_OITF_DEPTH; i=i+1) begin:oitf_entries//{
//every time, assign a item and write pointer same as current i, then
//valid set is high
assign vld_set[i] = alc_ptr_ena & (alc_ptr_r == i);
//every time, assign a item and read pointer same as current i, then
//valid clr is high
assign vld_clr[i] = ret_ptr_ena & (ret_ptr_r == i);
assign vld_ena[i] = vld_set[i] | vld_clr[i];
assign vld_nxt[i] = vld_set[i] | (~vld_clr[i]);

    sirv\_gnrl\_dfflr #(1) vld\_dfflrs(vld\_ena\[i\], vld\_nxt\[i\], vld\_r\[i\], clk, rst\_n);  
    //Payload only set, no need to clear  
    sirv\_gnrl\_dffl #(\`E203\_RFIDX\_WIDTH) rdidx\_dfflrs(vld\_set\[i\], disp\_i\_rdidx, rdidx\_r\[i\], clk);  
    sirv\_gnrl\_dffl #(\`E203\_PC\_SIZE    ) pc\_dfflrs   (vld\_set\[i\], disp\_i\_pc   , pc\_r\[i\]   , clk);  
    sirv\_gnrl\_dffl #(1)                 rdwen\_dfflrs(vld\_set\[i\], disp\_i\_rdwen, rdwen\_r\[i\], clk);  
    sirv\_gnrl\_dffl #(1)                 rdfpu\_dfflrs(vld\_set\[i\], disp\_i\_rdfpu, rdfpu\_r\[i\], clk);  
    //compare dispatch source operand with result register in fifo  
    assign rd\_match\_rs1idx\[i\] = vld\_r\[i\] & rdwen\_r\[i\] & disp\_i\_rs1en & (rdfpu\_r\[i\] == disp\_i\_rs1fpu) & (rdidx\_r\[i\] == disp\_i\_rs1idx);  
    assign rd\_match\_rs2idx\[i\] = vld\_r\[i\] & rdwen\_r\[i\] & disp\_i\_rs2en & (rdfpu\_r\[i\] == disp\_i\_rs2fpu) & (rdidx\_r\[i\] == disp\_i\_rs2idx);  
    assign rd\_match\_rs3idx\[i\] = vld\_r\[i\] & rdwen\_r\[i\] & disp\_i\_rs3en & (rdfpu\_r\[i\] == disp\_i\_rs3fpu) & (rdidx\_r\[i\] == disp\_i\_rs3idx);  
    assign rd\_match\_rdidx \[i\] = vld\_r\[i\] & rdwen\_r\[i\] & disp\_i\_rdwen & (rdfpu\_r\[i\] == disp\_i\_rdfpu ) & (rdidx\_r\[i\] == disp\_i\_rdidx );

  end//}  

endgenerate//}
//rs1 in fifo, so RAW relative
assign oitfrd_match_disprs1 = |rd_match_rs1idx;
//rs2 in fifo, so RAW relative
assign oitfrd_match_disprs2 = |rd_match_rs2idx;
//rs3 in fifo, so RAW relative
assign oitfrd_match_disprs3 = |rd_match_rs3idx;
//rd in fifo, so WAW relative
assign oitfrd_match_disprd = |rd_match_rdidx ;

assign ret_rdidx = rdidx_r[ret_ptr];
assign ret_pc = pc_r [ret_ptr];
assign ret_rdwen = rdwen_r[ret_ptr];
assign ret_rdfpu = rdfpu_r[ret_ptr];

endmodule