//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//  Copyright (C) by RivieraWaves.
//  This module is a confidential and proprietary property of RivieraWaves
//  and a possession or use of this module requires written permission
//  from RivieraWaves.
//----------------------------------------------------------------------------
// $Author: cvandeburie $
// Company          : RivieraWaves
//----------------------------------------------------------------------------
// $Revision: 38365 $
// $Date: 2019-04-09 15:19:07 +0200 (Tue, 09 Apr 2019) $
// ---------------------------------------------------------------------------
// Dependencies     :                                                       
// Description      : Time Domain Frequency Offset - Auto Correlation Calc
// Simulation Notes :                                                       
// Synthesis Notes  :                                                       
// Application Note :                                                       
// Simulator        :                                                       
// Parameters       :                                                       
// Terms & concepts :                                                       
// Bugs             :                                                       
// Open issues and future enhancements :                                    
// References       :                                                       
// Revision History :                                                       
// ---------------------------------------------------------------------------
//                                                                          
// $HeadURL: https://dpereira@svn.frso.rivierawaves.com/svn/rw_wlan_nx/branches/Projects/WLAN_HE_REF_IP/HW/WLAN_HE_REF_IP_20_40MHZ/IPs/HW/TOP11ax/PHYSUBSYS/HDMCORE/OFDMACORE/OFDMRXCORE/OFDMRXTD/TDFO/verilog/rtl/TDFOCorrVal.v $
//
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
module TDFOCorrVal #(parameter TDFOIPWIDTH = 10, //Input Datawidth
                     parameter TDFOCVWIDTH = 16  //Datawidth of Auto Correlation Value
                    )(

            ///////////////////////////////////////////////
            // Inputs
            ///////////////////////////////////////////////
            //Clock and Reset
            input   wire                                   nPhyRst, // Active LOW Reset
            input   wire                                   PhyClk,  // PHY Clock

            //Control Signals
            input   wire                                   CorrEn,      //Enables Correlation
            input   wire                                   MovAvgEn,    //Enables Moving Avg Block
            input   wire                                   MovAvgClr,   //Clear Mov Avg Value
            input   wire                                   TDFOEstMode, //0 = Coarse TDFO; 1 = Fine
            input   wire                                   DataValid,   //Qualifies Input Data

            //Data
            //Current Data
            input   wire signed     [TDFOIPWIDTH-1:0]      DataInReRx0, //Real Comp of Rx Chain 0
            input   wire signed     [TDFOIPWIDTH-1:0]      DataInImRx0, //Imag Comp of Rx Chain 0
`ifdef RW_NX_DERIV_PATH1
            input   wire signed     [TDFOIPWIDTH-1:0]      DataInReRx1, //Real Comp of Rx Chain 1
            input   wire signed     [TDFOIPWIDTH-1:0]      DataInImRx1, //Imag Comp of Rx Chain 1
`endif

            //Buffered Data (from TBE delay line)
            input   wire signed     [TDFOIPWIDTH-1:0]      DataDBufReRx0, //Buffered DataInReRx0 
            input   wire signed     [TDFOIPWIDTH-1:0]      DataDBufImRx0, //Buffered DataInImRx0
`ifdef RW_NX_DERIV_PATH1
            input   wire signed     [TDFOIPWIDTH-1:0]      DataDBufReRx1, //Buffered DataInReRx1
            input   wire signed     [TDFOIPWIDTH-1:0]      DataDBufImRx1, //Buffered DataInImRx1
`endif

            ///////////////////////////////////////////////
            // Outputs
            ///////////////////////////////////////////////
            //Control Signals
            output  wire                                   CorrValOutValid, //Qualifies CorrValOut

`ifdef RW_NX_TDFO_NSR_EN
            // Partial results for NSR computation
            output  reg signed  [(TDFOIPWIDTH*2)-1:0]      corrLTFsReRx0,
            output  reg signed  [(TDFOIPWIDTH*2)-1:0]      corrLTFsImRx0, // Sign inverted compared to Matlab, but no impact as used to compute a modulus
`ifdef RW_NX_DERIV_PATH1
            output  reg signed  [(TDFOIPWIDTH*2)-1:0]      corrLTFsReRx1,
            output  reg signed  [(TDFOIPWIDTH*2)-1:0]      corrLTFsImRx1,
`endif
            output wire         [21:0]                     totalPower,
`endif

            //Data
            //Final Correlation Value after Moving Average Delay Buffer
            output  reg signed  [TDFOCVWIDTH-1:0]          CorrValOutRe, //Real Component
            output  reg signed  [TDFOCVWIDTH-1:0]          CorrValOutIm  //Imaginary Component
            );

//////////////////////////////////////////////////////////////////////////////
// Local Parameters Declarations
//////////////////////////////////////////////////////////////////////////////
localparam MULTOUTWIDTH = TDFOIPWIDTH*2;    //Multiplier Output Width with MSB dropped
`ifdef RW_NX_DERIV_PATH1
localparam ADD1OUTWIDTH = MULTOUTWIDTH+1;   //Width of Rx paths Adder
`else
localparam ADD1OUTWIDTH = MULTOUTWIDTH;     //One Rx path, no Adder
`endif
localparam CORRWIDTH    = 15;
localparam MOVAVGWIDTH  = CORRWIDTH  + 6;   //Width of Moving Average Block
localparam ACCUWIDTH    = MULTOUTWIDTH + 6;

//TDFO Mode
localparam COARSE = 1'b0;

//Multiplier Type
localparam MUL_TYPE = 0;

// General param
localparam     signed  [TDFOIPWIDTH-1:0]   CONST_ZERO_TDFOIPWIDTH     = {TDFOIPWIDTH{1'b0}};
localparam     signed  [MOVAVGWIDTH-1:0]   CONST_ZERO_MOVAVGWIDTH     = {MOVAVGWIDTH{1'b0}};
localparam     signed  [CORRWIDTH-1:0]     CONST_ZERO_CORRWIDTH       = {CORRWIDTH{1'b0}};
localparam     signed  [CORRWIDTH+2:0]     CONST_ZERO_CORRWIDTH_P3    = {{(CORRWIDTH + 3)}{1'b0}};
localparam                                 BYPASS_PARAM               =  1'b0;


//////////////////////////////////////////////////////////////////////////////
//  Internal Wires Declarations
//////////////////////////////////////////////////////////////////////////////
wire    signed     [ADD1OUTWIDTH-1:0]  CorrSumRndRe;
wire    signed     [ADD1OUTWIDTH-1:0]  CorrSumRndIm;
wire    signed     [CORRWIDTH-1:0]     CorrSumIntRe;
wire    signed     [CORRWIDTH-1:0]     CorrSumIntIm;
wire    signed     [CORRWIDTH+2:0]     MovAvgAccReCoarse;
wire    signed     [CORRWIDTH+2:0]     MovAvgAccReFine;
wire    signed     [CORRWIDTH+2:0]     MovAvgAccImCoarse;
wire    signed     [CORRWIDTH+2:0]     MovAvgAccImFine;
wire    signed     [CORRWIDTH-1:0]     MovAvgSatRe;
wire    signed     [CORRWIDTH-1:0]     MovAvgSatIm;
reg                                    CorrValSumValid;

wire    signed     [MULTOUTWIDTH:0]    DataMultReRx0; // before MSB drop
wire    signed     [MULTOUTWIDTH:0]    DataMultImRx0;
`ifdef RW_NX_DERIV_PATH1
wire    signed     [MULTOUTWIDTH:0]    DataMultReRx1;
wire    signed     [MULTOUTWIDTH:0]    DataMultImRx1;
`endif

wire                                   ByPass;

genvar i;

//////////////////////////////////////////////////////////////////////////////
// Internal Registers Declarations
//////////////////////////////////////////////////////////////////////////////
reg     signed     [TDFOIPWIDTH-1:0]     AReRx0Reg;
reg     signed     [TDFOIPWIDTH-1:0]     AImRx0Reg;
reg     signed     [TDFOIPWIDTH-1:0]     BReRx0Reg;
reg     signed     [TDFOIPWIDTH-1:0]     BImRx0Reg;
`ifdef RW_NX_DERIV_PATH1
reg     signed     [TDFOIPWIDTH-1:0]     AReRx1Reg;
reg     signed     [TDFOIPWIDTH-1:0]     AImRx1Reg;
reg     signed     [TDFOIPWIDTH-1:0]     BReRx1Reg;
reg     signed     [TDFOIPWIDTH-1:0]     BImRx1Reg;
`endif

reg     signed     [MULTOUTWIDTH-1:0]    DataMultReRx0Reg;
reg     signed     [MULTOUTWIDTH-1:0]    DataMultImRx0Reg;
`ifdef RW_NX_DERIV_PATH1
reg     signed     [MULTOUTWIDTH-1:0]    DataMultReRx1Reg;
reg     signed     [MULTOUTWIDTH-1:0]    DataMultImRx1Reg;
`endif

reg     signed     [MOVAVGWIDTH-1:0]     MovAvgAccRe;
reg     signed     [MOVAVGWIDTH-1:0]     MovAvgAccIm;

reg     signed     [CORRWIDTH+2:0]       MovAvgRndRe;
reg     signed     [CORRWIDTH+2:0]       MovAvgRndIm;

reg     signed     [CORRWIDTH-1:0]       CorrValSumDBRe[15:0];
reg     signed     [CORRWIDTH-1:0]       CorrValSumDBIm[15:0];

reg     signed     [CORRWIDTH-1:0]       CorrValSumReTemp;
reg     signed     [CORRWIDTH-1:0]       CorrValSumImTemp;

reg                [2:0]                 MultMuxCount;
wire               [2:0]                 MultMuxCountMax;

`ifdef RW_NX_TDFO_NSR_EN
reg     signed     [ACCUWIDTH-1:0]       LTFsAccReRx0;
reg     signed     [ACCUWIDTH-1:0]       LTFsAccImRx0;
reg     signed     [ACCUWIDTH:0]         totalPowerX2;
wire    signed     [(TDFOIPWIDTH*2)-1:0] NextCorrLTFsReRx0;
wire    signed     [(TDFOIPWIDTH*2)-1:0] NextCorrLTFsImRx0;
`ifdef RW_NX_DERIV_PATH1
reg     signed     [ACCUWIDTH-1:0]       LTFsAccReRx1;
reg     signed     [ACCUWIDTH-1:0]       LTFsAccImRx1;
wire    signed     [TDFOIPWIDTH*2:0]     NextCorrLTFsReRx1;
wire    signed     [TDFOIPWIDTH*2:0]     NextCorrLTFsImRx1;
`endif
`endif

//////////////////////////////////////////////////////////////////////////////
// Begining of Logic part
//////////////////////////////////////////////////////////////////////////////

//This block computes the Auto Correlation value for the Time Domain 
//Frequency Offset (TDFO) Estimation procedure. It expects the current inputs 
//corresponding to all the Rx Chains as well as the buffered input.
//The TBE delay line is used to implement all the delay buffers required.
//For Coarse TDFO, the Buffer depth at the input is 16.
//For Fine TDFO, the buffer depth at the input is 64.
//The Moving Average Delay Buffer is implemented with 16 delay line.
//This block computes the sum of the auto correlation value and sends
//it to the Moving Average Delay Buffer. It also computes the final auto
//correlation value in both the Coarse and the Fine. Except for the 
//buffer depths, the coarse and the fine TDFO estimation have a similar flow.

// During Fine FO estimation, this block also computes the total power for all 
// Rx paths, and the CorrLTFs signal per Rx path, later used to compute the signal power.
// As the input data changes every 6 clock cycles, it is muxed to the multipliers inputs
// to compute the different needed values.

assign MultMuxCountMax = 3'd6; // reached only at end of window,
                               // else MultMuxCount goes from 5 to 0 due to DataValid=1  

//Counter for multipliers muxing
always @ (posedge PhyClk or negedge nPhyRst)
   begin: Mult_Mux_Blk
      if (nPhyRst == 1'b0) begin
         MultMuxCount <= 3'd4;
      end
      else if (CorrEn == 1'b1)begin
         if (DataValid==1'b1) 
            MultMuxCount <= 3'd0;
         else if (MultMuxCount<MultMuxCountMax) 
            MultMuxCount <= MultMuxCount+3'd1;
      end else begin
         MultMuxCount <= 3'd4;
      end
   end //Mult_Mux_Blk


//Register the multiplier inputs, using DataIn, DataInDBuf, and conjugates as needed
//(1) MultMuxCount = default: A = Data,      B = conj(DataDBuf), compute     Data * conj(DataDBuf)
//(2) MultMuxCount = 0      : A = Data,      B = conj(Data),     compute     Data * conj(Data)
//(3) MultMuxCount = 1      : A = DataDBuf , B = conj(DataDBuf), compute DataDBuf * conj(DataDBuf)
// Notes:
//(1) This result is used in coarse and fine FO estimation, and to compute corrLTFs, used in 
//    the signal power computation.
//(2) and (3): the results are accumulated to compute the total power. This part is enabled 
//    only during fine FO estimation

// B input of the multiplier
always @ (posedge PhyClk or negedge nPhyRst)
   begin: B_Mult_Blk
      if (nPhyRst == 1'b0) begin
         BReRx0Reg <= CONST_ZERO_TDFOIPWIDTH;
         BImRx0Reg <= CONST_ZERO_TDFOIPWIDTH;
`ifdef RW_NX_DERIV_PATH1
         BReRx1Reg <= CONST_ZERO_TDFOIPWIDTH;
         BImRx1Reg <= CONST_ZERO_TDFOIPWIDTH;
`endif
      end
      else if (CorrEn == 1'b1)begin
         //Conjugate and register the buffered inputs
         case (MultMuxCount)
            default: begin
               BReRx0Reg <= DataDBufReRx0;
               BImRx0Reg <= -DataDBufImRx0;
`ifdef RW_NX_DERIV_PATH1
               BReRx1Reg <= DataDBufReRx1;
               BImRx1Reg <= -DataDBufImRx1;
`endif
            end
            3'd0   : begin
               if (TDFOEstMode != COARSE) begin
                  BReRx0Reg <= DataInReRx0;
                  BImRx0Reg <= -DataInImRx0;
`ifdef RW_NX_DERIV_PATH1
                  BReRx1Reg <= DataInReRx1;
                  BImRx1Reg <= -DataInImRx1;
`endif
               end
            end
         endcase
      end
   end //B_Mult_Blk

// A input of the multiplier
always @ (posedge PhyClk or negedge nPhyRst)
   begin: A_Mult_Blk
      if (nPhyRst == 1'b0) begin
         AReRx0Reg <= CONST_ZERO_TDFOIPWIDTH;
         AImRx0Reg <= CONST_ZERO_TDFOIPWIDTH;
`ifdef RW_NX_DERIV_PATH1
         AReRx1Reg <= CONST_ZERO_TDFOIPWIDTH;
         AImRx1Reg <= CONST_ZERO_TDFOIPWIDTH;
`endif
      end
      else if (CorrEn == 1'b1) begin
         //Register the current Inputs
         case (MultMuxCount)
            default: begin
               AReRx0Reg <= DataInReRx0;
               AImRx0Reg <= DataInImRx0;
`ifdef RW_NX_DERIV_PATH1
               AReRx1Reg <= DataInReRx1;
               AImRx1Reg <= DataInImRx1;
`endif
            end
            3'd1   : begin
               if (TDFOEstMode != COARSE) begin
                  AReRx0Reg <= DataDBufReRx0;
                  AImRx0Reg <= DataDBufImRx0;
`ifdef RW_NX_DERIV_PATH1
                  AReRx1Reg <= DataDBufReRx1;
                  AImRx1Reg <= DataDBufImRx1;
`endif
               end
            end
         endcase
      end
   end //A_Mult_Blk

//Multipliers
   ComplexMult #(
                 .INPUT_WIDTH(TDFOIPWIDTH),
                 .MUL_TYPE(MUL_TYPE)
                ) U_CM0 (
                         //Inputs
                         .nPhyRst(nPhyRst),
                         .PhyClk(PhyClk),
                         .ARe(AReRx0Reg),
                         .AIm(AImRx0Reg),
                         .BRe(BReRx0Reg),
                         .BIm(BImRx0Reg),
                         .DataInEn(CorrEn),
                         //Outputs
                         .CRe(DataMultReRx0),
                         .CIm(DataMultImRx0)
                        );

`ifdef RW_NX_DERIV_PATH1
      ComplexMult #(
                    .INPUT_WIDTH(TDFOIPWIDTH),
                    .MUL_TYPE(MUL_TYPE)
                   ) U_CM1 (
                            //Inputs
                            .nPhyRst(nPhyRst),
                            .PhyClk(PhyClk),
                            .ARe(AReRx1Reg),
                            .AIm(AImRx1Reg),
                            .BRe(BReRx1Reg),
                            .BIm(BImRx1Reg),
                            .DataInEn(CorrEn),
                            //Outputs
                            .CRe(DataMultReRx1),
                            .CIm(DataMultImRx1)
                           );
`endif

//Register the multiplier output.
//Drop MSB: not used as the inputs are from Symetric Saturate
always @ (posedge PhyClk or negedge nPhyRst)
   begin: MultBlk
      if (nPhyRst == 1'b0) begin
         DataMultReRx0Reg  <= {MULTOUTWIDTH{1'b0}};
         DataMultImRx0Reg  <= {MULTOUTWIDTH{1'b0}};
`ifdef RW_NX_DERIV_PATH1
         DataMultReRx1Reg  <= {MULTOUTWIDTH{1'b0}};
         DataMultImRx1Reg  <= {MULTOUTWIDTH{1'b0}};
`endif
      end
      else if (CorrEn == 1'b1) begin
         DataMultReRx0Reg  <= DataMultReRx0[MULTOUTWIDTH-1:0];
         DataMultImRx0Reg  <= DataMultImRx0[MULTOUTWIDTH-1:0];
`ifdef RW_NX_DERIV_PATH1
         DataMultReRx1Reg  <= DataMultReRx1[MULTOUTWIDTH-1:0];
         DataMultImRx1Reg  <= DataMultImRx1[MULTOUTWIDTH-1:0];
`endif
      end
   end //MultBlk

// Add the outputs of all the chains
`ifdef RW_NX_DERIV_PATH1
   assign CorrSumRndRe = {DataMultReRx0Reg[MULTOUTWIDTH-1],DataMultReRx0Reg} +
                         {DataMultReRx1Reg[MULTOUTWIDTH-1],DataMultReRx1Reg};
   assign CorrSumRndIm = {DataMultImRx0Reg[MULTOUTWIDTH-1],DataMultImRx0Reg} +
                         {DataMultImRx1Reg[MULTOUTWIDTH-1],DataMultImRx1Reg};
`else
   assign CorrSumRndRe = DataMultReRx0Reg;
   assign CorrSumRndIm = DataMultImRx0Reg;
`endif


// The following code is for FO estimation.
// For total and signal power computation,
// - the accumulation must be done before rounding,
// - 64 samples are accumulated, so the moving average delay line is not needed.

// Cut 6 bits with Round
assign CorrSumIntRe = {CorrSumRndRe[ADD1OUTWIDTH-1], CorrSumRndRe[ADD1OUTWIDTH-1:6]} +
                                             {{(CORRWIDTH-1){1'b0}},CorrSumRndRe[5]};
assign CorrSumIntIm = {CorrSumRndIm[ADD1OUTWIDTH-1], CorrSumRndIm[ADD1OUTWIDTH-1:6]} + 
                                             {{(CORRWIDTH-1){1'b0}},CorrSumRndIm[5]};

//Register the final Corr value used for FO estimation
always @ (posedge PhyClk or negedge nPhyRst)
   begin: CorrSumBlk
      if (nPhyRst == 1'b0) begin
         CorrValSumReTemp <= CONST_ZERO_CORRWIDTH;
         CorrValSumImTemp <= CONST_ZERO_CORRWIDTH;
         CorrValSumValid  <= 1'b0;
      end
      else if (CorrEn == 1'b1) begin
         CorrValSumValid  <= 1'b0;
         // MultMuxCount value corresponding to Data * conj(DataDBuf)
         if (MultMuxCount==3'd3) begin 
            CorrValSumReTemp <= CorrSumIntRe;
            CorrValSumImTemp <= CorrSumIntIm;
            CorrValSumValid  <= 1'b1;
         end
      end else begin
         CorrValSumValid  <= 1'b0;
      end
   end //CorrSumBlk

//Moving Average Calculation
//The moving average block needs the latest "n" values (current + last n-1).
//The current CorrValSum is added to the previously accumulated 
//value and the nth previous CorrValSum is subtracted from it.
//The value for n for different cases is as below
//Packet Coarse TDFO: n = 16
//Packet Fine   TDFO: n = 64 (i.e use all valid inputs, no substraction needed)
//DataValid is used to ensure each data is entered only once in the delay line.
//CorrValSumValid is used to ensure each data is accumulated only once.

//Pipeline Buffer for storing Sum
always @ (posedge PhyClk or negedge nPhyRst)
   begin
      if (nPhyRst == 1'b0) begin
         CorrValSumDBRe[0] <= $signed({{CORRWIDTH}{1'b0}});
         CorrValSumDBIm[0] <= $signed({{CORRWIDTH}{1'b0}});
      end
      else if (MovAvgClr == 1'b1) begin
         CorrValSumDBRe[0] <= $signed({{CORRWIDTH}{1'b0}});
         CorrValSumDBIm[0] <= $signed({{CORRWIDTH}{1'b0}});
      end
      else if (TDFOEstMode == COARSE) begin
         //Store the incoming value
         if (DataValid == 1'b1 && MovAvgEn == 1'b1) begin
           CorrValSumDBRe[0] <= CorrValSumReTemp;
           CorrValSumDBIm[0] <= CorrValSumImTemp;
         end
      end
    end

//After that the samples are shifted through the buffer
//Stages 1 through 16
generate
   for (i = 1; i<16; i=i+1) begin: RxSum_Buffer
      always @ (posedge PhyClk or negedge nPhyRst)
         begin
            if (nPhyRst == 1'b0) begin
               CorrValSumDBRe[i] <= $signed({{CORRWIDTH}{1'b0}});
               CorrValSumDBIm[i] <= $signed({{CORRWIDTH}{1'b0}});
            end
            else if (MovAvgClr == 1'b1) begin
               CorrValSumDBRe[i] <= $signed({{CORRWIDTH}{1'b0}});
               CorrValSumDBIm[i] <= $signed({{CORRWIDTH}{1'b0}});
            end
            else if (TDFOEstMode == COARSE) begin
               //Take in the data from the previous stage
               if (DataValid == 1'b1) begin
                  CorrValSumDBRe[i] <= CorrValSumDBRe[i-1];
                  CorrValSumDBIm[i] <= CorrValSumDBIm[i-1];
               end
            end
         end
   end
endgenerate

// Accumulators for FO estimation and power computations.
// (1) Data for signal power computation is valid when MultMuxCount=3:
// - accumulate the unrounded multiplier outputs in LTFsAcc, per Rx path
// - computation is done during fine estmation only
// (2) Data for FO estimation is valid when MultMuxCount=4:
// - Coarse FO estimation: accumulate the latest 16 CorrValSum values in MovAvgAcc
// - Fine FO estimation: accumulate the latest 64 CorrValSum values (i.e. all, 
// CorrValSumDBIm[15]=0) in MovAvgAcc
// (3) Data for total power computation is valid 
// - when MultMuxCount=4 for the DataIn * conj(DataIn) part,
// - when MultMuxCount=5 for the DataDBuf * conj(DataDBuf) part.
// - the multiplier outputs for both Rx paths are accumulated in totalPowerX2
// - computation is done during fine estmation only

always @ (posedge PhyClk or negedge nPhyRst)
   begin: MovAvgBlk
      if (nPhyRst == 1'b0) begin
         MovAvgAccRe       <= CONST_ZERO_MOVAVGWIDTH;
         MovAvgAccIm       <= CONST_ZERO_MOVAVGWIDTH;
`ifdef RW_NX_TDFO_NSR_EN
         LTFsAccReRx0      <= {ACCUWIDTH{1'b0}};
         LTFsAccImRx0      <= {ACCUWIDTH{1'b0}};
`ifdef RW_NX_DERIV_PATH1
         LTFsAccReRx1      <= {ACCUWIDTH{1'b0}};
         LTFsAccImRx1      <= {ACCUWIDTH{1'b0}};
`endif         
         totalPowerX2      <= {(ACCUWIDTH+1){1'b0}};
`endif         
      end
      else if (MovAvgClr == 1'b1) begin
         MovAvgAccRe       <= CONST_ZERO_MOVAVGWIDTH;
         MovAvgAccIm       <= CONST_ZERO_MOVAVGWIDTH;
`ifdef RW_NX_TDFO_NSR_EN
         LTFsAccReRx0      <= {ACCUWIDTH{1'b0}};
         LTFsAccImRx0      <= {ACCUWIDTH{1'b0}};
`ifdef RW_NX_DERIV_PATH1
         LTFsAccReRx1      <= {ACCUWIDTH{1'b0}};
         LTFsAccImRx1      <= {ACCUWIDTH{1'b0}};
`endif         
         totalPowerX2      <= {(ACCUWIDTH+1){1'b0}};
`endif         
      end
      else if (MovAvgEn == 1'b1) begin
         case (MultMuxCount)
`ifdef RW_NX_TDFO_NSR_EN
            // Data for signal power accumulator is valid
            3'd3   : begin
               LTFsAccReRx0  <= LTFsAccReRx0 + {{(ACCUWIDTH-MULTOUTWIDTH){DataMultReRx0Reg[MULTOUTWIDTH-1]}},DataMultReRx0Reg};
               LTFsAccImRx0  <= LTFsAccImRx0 + {{(ACCUWIDTH-MULTOUTWIDTH){DataMultImRx0Reg[MULTOUTWIDTH-1]}},DataMultImRx0Reg};
`ifdef RW_NX_DERIV_PATH1
               LTFsAccReRx1  <= LTFsAccReRx1 + {{(ACCUWIDTH-MULTOUTWIDTH){DataMultReRx1Reg[MULTOUTWIDTH-1]}},DataMultReRx1Reg};
               LTFsAccImRx1  <= LTFsAccImRx1 + {{(ACCUWIDTH-MULTOUTWIDTH){DataMultImRx1Reg[MULTOUTWIDTH-1]}},DataMultImRx1Reg};
`endif         
            end
`endif         
            // Data for FO accumulator and total power DataIn part is valid
            3'd4   : begin
               MovAvgAccRe <= MovAvgAccRe + $signed({{MOVAVGWIDTH-CORRWIDTH{CorrValSumReTemp[CORRWIDTH-1]}},CorrValSumReTemp}) - 
                                            $signed({{MOVAVGWIDTH-CORRWIDTH{CorrValSumDBRe[15][CORRWIDTH-1]}},CorrValSumDBRe[15][CORRWIDTH-1:0]});
               MovAvgAccIm <= MovAvgAccIm + $signed({{MOVAVGWIDTH-CORRWIDTH{CorrValSumImTemp[CORRWIDTH-1]}},CorrValSumImTemp}) - 
                                            $signed({{MOVAVGWIDTH-CORRWIDTH{CorrValSumDBIm[15][CORRWIDTH-1]}},CorrValSumDBIm[15][CORRWIDTH-1:0]});
`ifdef RW_NX_TDFO_NSR_EN
               if (TDFOEstMode != COARSE)
                  totalPowerX2   <= totalPowerX2 + {{(ACCUWIDTH-MULTOUTWIDTH+1){CorrSumRndRe[ADD1OUTWIDTH-1]}},CorrSumRndRe}; // Add Din * Din' for all RX paths
`endif         
            end
`ifdef RW_NX_TDFO_NSR_EN
            // Data for total power DataDBuf part is valid
            3'd5   : begin
               if (TDFOEstMode != COARSE)
                  totalPowerX2   <= totalPowerX2 + {{(ACCUWIDTH-MULTOUTWIDTH)+1{CorrSumRndRe[ADD1OUTWIDTH-1]}},CorrSumRndRe}; // Add Dbuf * Dbuf' for all RX paths
            end
`endif         
            default: begin
            end
         endcase
      end
   end //MovAvgBlk
`ifdef RW_NX_TDFO_NSR_EN
   
// Divide by 2, cut 6 bits and Round. Saturation not needed when TDFOIPWIDTH=10
assign totalPower    = {{2{totalPowerX2[ACCUWIDTH]}},totalPowerX2[ACCUWIDTH:7]} + {{(ACCUWIDTH-5){1'b0}},totalPowerX2[6]};
 
// Rounding 6 bits
assign NextCorrLTFsReRx0 = LTFsAccReRx0[ACCUWIDTH-1:6] + {{(TDFOIPWIDTH*2-1){1'b0}},LTFsAccReRx0[5]};
assign NextCorrLTFsImRx0 = LTFsAccImRx0[ACCUWIDTH-1:6] + {{(TDFOIPWIDTH*2-1){1'b0}},LTFsAccImRx0[5]};
`ifdef RW_NX_DERIV_PATH1
assign NextCorrLTFsReRx1 = LTFsAccReRx1[ACCUWIDTH-1:6] + {{(TDFOIPWIDTH*2-1){1'b0}},LTFsAccReRx1[5]};
assign NextCorrLTFsImRx1 = LTFsAccImRx1[ACCUWIDTH-1:6] + {{(TDFOIPWIDTH*2-1){1'b0}},LTFsAccImRx1[5]};
`endif         

//Accumulate latest n values
always @ (posedge PhyClk or negedge nPhyRst)
   begin: corrLTFs_Blk
      if (nPhyRst == 1'b0) begin
         corrLTFsReRx0     <= {(TDFOIPWIDTH*2){1'b0}};
         corrLTFsImRx0     <= {(TDFOIPWIDTH*2){1'b0}};
`ifdef RW_NX_DERIV_PATH1
         corrLTFsReRx1     <= {(TDFOIPWIDTH*2){1'b0}};
         corrLTFsImRx1     <= {(TDFOIPWIDTH*2){1'b0}};
`endif         
      end
      else if (MovAvgClr == 1'b1) begin
         corrLTFsReRx0     <= {(TDFOIPWIDTH*2){1'b0}};
         corrLTFsImRx0     <= {(TDFOIPWIDTH*2){1'b0}};
`ifdef RW_NX_DERIV_PATH1
         corrLTFsReRx1     <= {(TDFOIPWIDTH*2){1'b0}};
         corrLTFsImRx1     <= {(TDFOIPWIDTH*2){1'b0}};
`endif         
      end
      else if (MovAvgEn == 1'b1) begin
         // Rounding
         corrLTFsReRx0     <= NextCorrLTFsReRx0;
         corrLTFsImRx0     <= NextCorrLTFsImRx0;
`ifdef RW_NX_DERIV_PATH1
         corrLTFsReRx1     <= NextCorrLTFsReRx1;
         corrLTFsImRx1     <= NextCorrLTFsImRx1;
`endif         
      end
   end //corrLTFs_Blk
`endif         

//Rounding
//2 Different Rounding Schemes are required
//For Coarse, Rounding from CORRWIDTH + 4 to CORRWIDTH + 3 (1 LSB)
//For Fine,   Rounding from CORRWIDTH + 6 to CORRWIDTH + 3 (3 LSBs)

//Coarse: only 16 values accumulated while accu is sized for 64.
//Drop 2 MSBs.
Round #(
        .INPUT_WIDTH(CORRWIDTH+4),
        .OUTPUT_WIDTH(CORRWIDTH+3)
       )
        U_ROUNDCOR2(
                    .InputData(MovAvgAccRe[CORRWIDTH+3:0]),
                    .RoundData(MovAvgAccReCoarse)
                   );

Round #(
        .INPUT_WIDTH(CORRWIDTH+4),
        .OUTPUT_WIDTH(CORRWIDTH+3)
       )
        U_ROUNDCOR3(
                    .InputData(MovAvgAccIm[CORRWIDTH+3:0]),
                    .RoundData(MovAvgAccImCoarse)
                   );

//Fine
Round #(
        .INPUT_WIDTH(CORRWIDTH+6),
        .OUTPUT_WIDTH(CORRWIDTH+3)
       )
        U_ROUNDCOR6(
                    .InputData(MovAvgAccRe),
                    .RoundData(MovAvgAccReFine)
                   );

Round #(
        .INPUT_WIDTH(CORRWIDTH+6),
        .OUTPUT_WIDTH(CORRWIDTH+3)
       )
        U_ROUNDCOR7(
                    .InputData(MovAvgAccIm),
                    .RoundData(MovAvgAccImFine)
                   );

//Register the required value
always @ (posedge PhyClk or negedge nPhyRst)
   begin: RndMux_Blk
      if (nPhyRst == 1'b0) begin
         MovAvgRndRe <= CONST_ZERO_CORRWIDTH_P3;
         MovAvgRndIm <= CONST_ZERO_CORRWIDTH_P3;
      end
      else if (MovAvgEn == 1'b1) begin
         if (TDFOEstMode == COARSE) begin // Coarse
            MovAvgRndRe <= MovAvgAccReCoarse;
            MovAvgRndIm <= MovAvgAccImCoarse;
         end
         else begin  // Fine
            MovAvgRndRe <= MovAvgAccReFine;
            MovAvgRndIm <= MovAvgAccImFine;
         end
      end
   end //RndMux_Blk

//Saturate from (CORRWIDTH + 3) to CORRWIDTH (SGN SYM)
SatSymSigned #(
               .INPUT_WIDTH(CORRWIDTH+3),
               .OUTPUT_WIDTH(CORRWIDTH)
              )
               U_SATCOR0(
                        .InputData(MovAvgRndRe),
                        .SatSymData(MovAvgSatRe)
                        );

SatSymSigned #(
               .INPUT_WIDTH(CORRWIDTH+3),
               .OUTPUT_WIDTH(CORRWIDTH)
              )
               U_SATCOR1(
                        .InputData(MovAvgRndIm),
                        .SatSymData(MovAvgSatIm)
                        );

//Register the final output
always @ (posedge PhyClk or negedge nPhyRst)
   begin: Out_Blk
      if (nPhyRst == 1'b0) begin
         CorrValOutRe <= {TDFOCVWIDTH{1'b0}};
         CorrValOutIm <= {TDFOCVWIDTH{1'b0}};
      end
      else if (MovAvgClr == 1'b1) begin
         CorrValOutRe <= 16'd0;
         CorrValOutIm <= 16'd0;
      end
      else if (MovAvgEn == 1'b1) begin
         CorrValOutRe <= {{(TDFOCVWIDTH-CORRWIDTH){MovAvgSatRe[CORRWIDTH-1]}},MovAvgSatRe};
         CorrValOutIm <= {{(TDFOCVWIDTH-CORRWIDTH){MovAvgSatIm[CORRWIDTH-1]}},MovAvgSatIm};
      end
   end //Out_Blk

//BYPASS DISABLED
assign ByPass    = BYPASS_PARAM;

//Generate CorrValOutValid relative to CorrValSumValid
//CorrValOut is delayed from CorrValSum by 3 clocks
DelayLine # (
             .DATAWIDTH(1), //Width of CorrValSumValid
             .LATENCY(3)    //3 clocks latency
             )
              U_DelayLineF (
                            .PhyClk(PhyClk),
                            .nPhyRst(nPhyRst),
                            .Bypass(ByPass),
                            .DataIn(CorrValSumValid),
                            .DataOut(CorrValOutValid)
                            );

endmodule //TDFOCorrVal

//////////////////////////////////////////////////////////////////////////////
// End of file
//////////////////////////////////////////////////////////////////////////////
