`timescale 1ps / 1ps module exp #( parameter integer D_W = 32, parameter integer FP_BITS = 30 ) ( input logic clk, input logic rst, input logic enable, input logic in_valid, input logic signed [D_W-1:0] qin, // exp input input logic signed [D_W-1:0] qb, // coefficient input logic signed [D_W-1:0] qc, // coefficient input logic signed [D_W-1:0] qln2, // coefficient input logic signed [D_W-1:0] qln2_inv,// coefficient output logic out_valid, output logic signed [D_W-1:0] qout // exp output ); // 64 bit logic signed[63:0] qin1, qin2; logic signed[63:0] fp_mul, z, z1, z2, z3, z4; logic signed[63:0] qp, qp1, ql, ql_mul, ql_sum; logic signed[63:0] qc1, qc2; // Pipeline valid logic [5:0] valid; always_ff @(posedge clk) begin if (rst) begin qin1 <= 0; qin2 <= 0; fp_mul <= 0; z <= 0; z1 <= 0; z2 <= 0; z3 <= 0; z4 <= 0; qp <= 0; qp1 <= 0; ql <= 0; ql_mul <= 0; ql_sum <= 0; qc1 <= 0; qc2 <= 0; qout <= 0; valid <= 0; out_valid <= 0; end else begin if(enable) begin valid <= {valid[5:0], in_valid}; // Stage 1 qin1 <= qin; fp_mul <= qin * qln2_inv; // Stage 2 qin2 <= qin1; z <= fp_mul >> FP_BITS; // Stage 3 z1 <= z; qp <= qin2 - (z * qln2); // Stage 4 z2 <= z1; ql_sum <= qp + qb; qp1 <= qp; qc1 <= qc; // Stage 5 z3 <= z2; ql_mul <= ql_sum * qp1; qc2 <= qc; // stage 6 z4 <= z3; ql <= ql_mul + qc2; // stage 7 qout <= ql >> z4; out_valid <= valid[5]; // Stage 4 // z2 <= z1; // ql <= (qp + qb) * qp + qc; // qout <= ql >> z2; // out_valid <= valid[3]; end end end endmodule