`default_nettype none
module rsf440_fir
(
  /*****************************************************************************
  * system
  *****************************************************************************/
  input  wire         rst_n,
  input  wire         clk,
  
  /*****************************************************************************
  * RX
  *****************************************************************************/
  input  wire         rxenable,  
  output reg          in_rx_ready,
  input  wire [ 7:0]  in_rx_data,
  input  wire         in_rx_valid,
  output reg  [ 6:0]  out_rx_data,
  output reg          out_rx_valid,
  
  /*****************************************************************************
  * TX
  *****************************************************************************/
  input  wire         txenable,
  input  wire [ 1:0]  txshift,
  input  wire [ 6:0]  in_tx_data, 
  input  wire         in_tx_valid,
  output reg  [11:0]  out_tx_data,
  output reg          out_tx_valid
);
 
  /*****************************************************************************    
  * TAP                                                                             
  ******************************************************************************
  * rsf440 = [-9 -10 -14 -18 -20 -21 -19 -13  -3  13  33  58  87 118 150 181 209 \  
  *          231 248 256 256 248 231 209 181 150 118  87  58  33  13  -3 -13 -19 \  
  *          -21 -20 -18 -14 -10  -9   0   0   0   0 ]                              
  ******************************************************************************
  * datapath filter max dynamic
  * 
  * rx : [-128 127 ] * 336 = [-43008   42672] -> 17 bits          
  * tx : [-64  63  ] * 372 = [-23808   23436] -> 16 bits          
  * 
  * the data path is sized on 17 bits
  ******************************************************************************
  * output max dynamic
  * 
  * rx   : ([-128 127 ]*336+512)/1024 = [  -41.50  +42.17] ->   7->7 bits, no sat.       
  *
  * tx x1: ([ -64  63 ]*372+32)/64    = [ -371.50  366.69] -> 10->12 bits, no sat          
  * tx x2: ([ -64  63 ]*372+16)/32    = [ -743.50  732.88] -> 11->12 bits, no sat          
  * tx x4: ([ -64  63 ]*372+ 8)/16    = [-1487.50 1465.25] -> 12->12 bits, no sat          
  * tx x8: ([ -64  63 ]*372+ 4)/8     = [-2975.50 2930.00] -> 13->12 bits, require sat.          
  *                                                                               
  *****************************************************************************/    
  localparam    H00=10'h3f7, /*   -9 */
                H01=10'h3f6, /*  -10 */
                H02=10'h3f2, /*  -14 */
                H03=10'h3ee, /*  -18 */
                H04=10'h3ec, /*  -20 */
                H05=10'h3eb, /*  -21 */
                H06=10'h3ed, /*  -19 */
                H07=10'h3f3, /*  -13 */
                H08=10'h3fd, /*   -3 */
                H09=10'h00d, /*   13 */
                H10=10'h021, /*   33 */
                H11=10'h03a, /*   58 */
                H12=10'h057, /*   87 */
                H13=10'h076, /*  118 */
                H14=10'h096, /*  150 */
                H15=10'h0b5, /*  181 */
                H16=10'h0d1, /*  209 */
                H17=10'h0e7, /*  231 */
                H18=10'h0f8, /*  248 */
                H19=10'h100, /*  256 */
                H20=10'h100, /*  256 */
                H21=10'h0f8, /*  248 */
                H22=10'h0e7, /*  231 */
                H23=10'h0d1, /*  209 */
                H24=10'h0b5, /*  181 */
                H25=10'h096, /*  150 */
                H26=10'h076, /*  118 */
                H27=10'h057, /*   87 */
                H28=10'h03a, /*   58 */
                H29=10'h021, /*   33 */
                H30=10'h00d, /*   13 */
                H31=10'h3fd, /*   -3 */
                H32=10'h3f3, /*  -13 */
                H33=10'h3ed, /*  -19 */
                H34=10'h3eb, /*  -21 */
                H35=10'h3ec, /*  -20 */
                H36=10'h3ee, /*  -18 */
                H37=10'h3f2, /*  -14 */
                H38=10'h3f6, /*  -10 */
                H39=10'h3f7, /*   -9 */
                H40=10'h000, /*    0 */
                H41=10'h000, /*    0 */
                H42=10'h000, /*    0 */
                H43=10'h000; /*    0 */
  
  localparam  IDLE=2'd0,RX=2'd1,TX=2'd2;
  
  /* wires */
  reg  [ 7:0] in_tap;
  reg  [ 9:0] coef0,coef1,coef2,coef3;
  reg  [15:0] mul0,mul1,mul2,mul3;
  reg  [16:0] rnd_fix;
  reg  [16:0] sum_filt;
  reg  [11:0] tx_filt_rnd3,tx_filt_rnd4;
  reg  [ 6:0] rx_filt;
  reg  [11:0] tx_filt;
  reg  [ 3:0] n_cycle;
  /* flops */
  reg  [ 3:0] cycle;
  reg  [ 7:0] t0,t1,t2,t3;
  reg  [ 1:0] state;
  
  always @(*)
  begin
    /* TAP */
    if(rxenable)
    begin
      /* RX input */
      in_tap  = in_rx_data & {8{in_rx_valid}};
      /* RX rounding fix */
      rnd_fix = 17'd512;
      /* RX coefficients selection */               
      case(cycle)                                   
        4'd0:    {coef0,coef1,coef2,coef3} = {H10,H21,H32,H43};  
        4'd1:    {coef0,coef1,coef2,coef3} = {H09,H20,H31,H42}; 
        4'd2:    {coef0,coef1,coef2,coef3} = {H08,H19,H30,H41};  
        4'd3:    {coef0,coef1,coef2,coef3} = {H07,H18,H29,H40};     
        4'd4:    {coef0,coef1,coef2,coef3} = {H06,H17,H28,H39};
        4'd5:    {coef0,coef1,coef2,coef3} = {H05,H16,H27,H38};
        4'd6:    {coef0,coef1,coef2,coef3} = {H04,H15,H26,H37};
        4'd7:    {coef0,coef1,coef2,coef3} = {H03,H14,H25,H36};
        4'd8:    {coef0,coef1,coef2,coef3} = {H02,H13,H24,H35};
        4'd9:    {coef0,coef1,coef2,coef3} = {H01,H12,H23,H34};
        default: {coef0,coef1,coef2,coef3} = {H00,H11,H22,H33}; 
      endcase                                
    end                                                    
    else                                                         
    begin
      /* TX input */
      in_tap = {in_tx_data[6],in_tx_data} & {8{in_tx_valid}};
      /* TX rounding fix */
      case(txshift)
        2'd0:    rnd_fix = 17'd32;
        2'd1:    rnd_fix = 17'd16;
        2'd2:    rnd_fix = 17'd8;
        default: rnd_fix = 17'd4;
      endcase
      /* TX coefficients selection */
      case(cycle)
        4'd0:    {coef0,coef1,coef2,coef3} = {H00,H10,H20,H30};  // 311
        4'd1:    {coef0,coef1,coef2,coef3} = {H01,H11,H21,H31};  // 319
        4'd2:    {coef0,coef1,coef2,coef3} = {H02,H12,H22,H32};  // 345
        4'd3:    {coef0,coef1,coef2,coef3} = {H03,H13,H23,H33};  // 326
        4'd4:    {coef0,coef1,coef2,coef3} = {H04,H14,H24,H34};  // 372<
        4'd5:    {coef0,coef1,coef2,coef3} = {H05,H15,H25,H35};  // 372<
        4'd6:    {coef0,coef1,coef2,coef3} = {H06,H16,H26,H36};  // 364
        4'd7:    {coef0,coef1,coef2,coef3} = {H07,H17,H27,H37};  // 345
        4'd8:    {coef0,coef1,coef2,coef3} = {H08,H18,H28,H38};  // 319
        default: {coef0,coef1,coef2,coef3} = {H09,H19,H29,H39};  // 311
      endcase
    end
      
    /* samples weighting */
    mul0 = $signed(coef0) * $signed(t0);
    mul1 = $signed(coef1) * $signed(t1);
    mul2 = $signed(coef2) * $signed(t2);
    mul3 = $signed(coef3) * $signed(t3);
    
    /* sum */ 
    sum_filt = {{1{mul0[15]}},mul0} + 
               {{1{mul1[15]}},mul1} +
               {{1{mul2[15]}},mul2} + 
               {{1{mul3[15]}},mul3} +
               rnd_fix;
  
    /* tx saturation after rnd3 */
    if(sum_filt[16:14]==3'b111 || sum_filt[16:14]==3'b000)
      tx_filt_rnd3 = sum_filt[14:3];
    else
      tx_filt_rnd3 = {sum_filt[16],{11{~sum_filt[16]}}};
  
    /* saturation after rnd4 */
    if(sum_filt[16:15]==2'b11 || sum_filt[16:15]==2'b00)
      tx_filt_rnd4 = sum_filt[15:4];
    else
      tx_filt_rnd4 = {sum_filt[16],{11{~sum_filt[16]}}};

    /* output */
    rx_filt = sum_filt[16:10];
    case(txshift)                                     
      2'd0:    tx_filt = {sum_filt[16],sum_filt[16:6]};  
      2'd1:    tx_filt = sum_filt[16:5];                 
      2'd2:    tx_filt = tx_filt_rnd4;                   
      default: tx_filt = tx_filt_rnd3;                   
    endcase                                           
    
    /* cycle */
    n_cycle = cycle + 4'd1;
  end
  
  reg t0_valid,t1_valid,t2_valid,t3_valid;
  reg in_rx_valid_1t;
  always @(posedge clk, negedge rst_n)
  begin
    if(!rst_n)
    begin
      state         <= IDLE;
      cycle         <= 4'd0;
      t0            <= 8'b0;
      t1            <= 8'b0;
      t2            <= 8'b0;
      t3            <= 8'b0;
      t0_valid      <= 1'd0;
      t1_valid      <= 1'd0;
      t2_valid      <= 1'd0;
      t3_valid      <= 1'd0;
      in_rx_ready   <= 1'b0;
      in_rx_valid_1t<= 1'b0;
      out_rx_data   <= 7'b0;
      out_rx_valid  <= 1'b0;
      out_tx_data   <= 12'd0;
      out_tx_valid  <= 1'b0;
    end
    else if(!txenable && !rxenable)
    begin
      state         <= IDLE;
      cycle         <= 4'd0;
      t0            <= 8'b0;
      t1            <= 8'b0;
      t2            <= 8'b0;
      t3            <= 8'b0;
      t0_valid      <= 1'd0;
      t1_valid      <= 1'd0;
      t2_valid      <= 1'd0;
      t3_valid      <= 1'd0;
      in_rx_ready   <= 1'b0;
      in_rx_valid_1t<= 1'b0;
      out_rx_data   <= 7'b0;
      out_rx_valid  <= 1'b0;
      out_tx_data   <= 12'd0;
      out_tx_valid  <= 1'b0;
    end
    else
    begin
      in_rx_valid_1t <= in_rx_valid;
    
      case(state)
        RX:
        begin
          /* fifo fetch */
          if(cycle==4'd9)
            in_rx_ready <= 1'b0;
          else
            in_rx_ready <= 1'b1;
          
          /* reset cycle */
          if(cycle==4'd10)
            cycle <= 4'd0;
          else
            cycle <= n_cycle;
          
          /* tap */
          if(cycle!=4'd10)
            {t0,t1,t2,t3} <= {in_tap,t0,t1,t2};
        
          /* output */
          t0_valid     <= in_rx_valid;
          t1_valid     <= t0_valid;
          t2_valid     <= t1_valid;
          t3_valid     <= t2_valid;
          out_rx_valid <= t0_valid|t1_valid|t2_valid|t3_valid;
          out_rx_data  <= rx_filt;
        end
        
        TX:
        begin
          /* reset cycle */
          if(cycle==4'd10)
          begin
            out_tx_valid <= 1'b0;
            cycle <= 4'd0;
          end
          else
          begin
            cycle <= n_cycle;
            out_tx_valid <= t0_valid|t1_valid|t2_valid|t3_valid;
          end
          
          /* tap */
          {t0,t1,t2,t3} <= {in_tap,t0,t1,t2};
          
          /* output */
          t0_valid     <= in_tx_valid;
          t1_valid     <= t0_valid;
          t2_valid     <= t1_valid;
          t3_valid     <= t2_valid;
          out_tx_data  <= tx_filt;
        end
        
        default: /* IDLE */
        begin
          if(rxenable)
          begin
            if(in_rx_valid_1t)
            begin
              cycle         <= 4'd9;
              in_rx_ready   <= 1'b1;
              state         <= RX;
            end
          end
          else if(txenable)
          begin
            if(in_tx_valid)
            begin
              cycle         <= 4'd0;
              {t0,t1,t2,t3} <= {in_tap,t0,t1,t2};
              t0_valid      <= 1'b1;
              state         <= TX;
            end
          end
        end
      endcase
    end
  end
  
`ifdef RW_SIMU_ON  
  reg [8*8-1:0] str_state;
  always @(*)
    case(state)
      IDLE:    str_state = "IDLE";
      RX:      str_state = "RX";
      TX:      str_state = "TX";
      default: str_state = "ERROR";
    endcase
`endif  
endmodule
`default_nettype wire
