//------------------------------------------------------------------------------
// ldcpDecCtrl.v
// 
// Description:
//   Controller for the LDPC decoder. Control begins by setting the read and
//   write indexes that go to the memory block. The memory block has a read
//   latency and READ_LATENCY defines how many cycles later the read information
//   is available. Considering that flag bits are needed a cycle early in many
//   places (see the _1 postfixes) and that the index counters are registers,
//   the index setting is done READ_LATENCY+2 cycles early.
// 
// Inputs:
//   nReset             : Asynchronous reset.
//   clk                : Master clock.
// (From the regs block)
//   enable            : Master enable. Blip this low between decodes to init.
//   enLostTime        : Enable the lost time recovery mechanism.
//   parityThresh      : Used for early termination if 'poor' progress is made.
//   targetLevel       : Value of the targetLevel register (see spec).
//   llrUnity          : value of the llrUnity register (see spec).
//   nomIterations     : Max number of iterations allowed on average.
//   maxRunningCount   : Max number of iterations (x8) - peak.
//   endRunningCount   : As above but near packet end.
//   iterationAbort    : Pulse to terminate the current iteration.
// (From Bwd)          
//   parityErrsUpdate  : A strobe indicating when the parity is updated.
//   parityErrs        : Number of parity errs, updated end of row && 
//                       accumulated for the iteration.
//   varMetricSum      : Sum of 2 metrics per macro cell along the macro row.
// (From mem)          
//   fwdFlag           : The flag bits for the current fwd cell.
//   bwdFlag           : The flag bits for the current bwd cell.
//   numMacroCells     : Number of cells/clkCycles per iteration.
//   ipLoaded          : Strobe to tell us when the input has been loaded.
//   ipStrobeReg       : Used with rdyToRcv to see if there is ip data
//                       buffered externally.
//                     
// Outputs:            
//   clkEnOut          : Gated clock enable.
// (To Ip)             
//   setRdyToRcv       : Pulse to say input buffer is ready to accept input.
//   firstBlk          : Pulse with timing as above when this is 1st blk.
// (To Fwd)            
//   fwdStall          : Stall signal for the forward processing.
//   fwdNoOp           : Stall just the datapath. The cell is a No Op.
//   fwdFirstIteration : True if this is the first iteration.  
//   fwdRowStart       : Start of row.
//   fwdIterStart      : Start of the  iteration of a decode.
//   pingPong          : selector for ping pong mems.
//   scaleFlag         : True when LLRs are to be scaled back to reduce growth.
//   fwdLlrUnity       : Integer that represents unity (gets scaled each iter).
//   latchChkMetric    : Latch the chk Metric for use in next bwd pass.
// (To Bwd)            
//   bwdStall          : Stall signal for the backward processing.
//   bwdNoOp           : Stall just the datapath. The cell is a No Op.
//   bwdIterStart      : Start of the iteration of a decode.
//   bwdFirstIteration : True if this is the first iteration.   
//   bwdRowStart       : Start of row.
// (To Ctrl)
//   holdFwdCell       : These cause the Cell RAM reading to be disabled so
//   holdBwdCell       : that previous fetched data remains on the RAM
//                       output latches.
// (To mem)            
//   fwdCellIdx        : Index into the cell info memory.
//   bwdCellIdx        : Index into the cell info memory.
//   blkPingPongS      : Determines which Vm buffer is used for input versus
//                       decode. Must be toggled at the moment decode starts
//                       for a block. This is not the same as the moment that
//                       is available.
//   blkPingPongE      : Block toggle (for RAM ping pong).
// (to the regs block) 
//   curIteration      : The current iteration number of the ldpc decode.
//   prevParityErrs    : Number of parity errors in previous iteration.
//   prevDecodeStatus  : 0/1 is fail/pass of previously decoded block.
//
// 26 Apr 2010 M. Rumsey. Created.
//
// (c) Copyright 2010, Blue Rum Consulting Limited, All Rights Reserved.
//------------------------------------------------------------------------------

`include "ldpcDec.vh"

module ldpcDecCtrl # (
  parameter  ITERS8_BITS = numBits(8*`LDEC_MAX_ITER),
  parameter  NUM_USERS = 1
) (
   input                                        nReset,
   input                                        clk, 
   // User control
   input [4:0]                                  ctrlUserIn,
   output reg [4:0]                             ctrlUserDec,
   output reg [4:0]                             ctrlUserOp,
   output wire                                  shrtBlkModDec,
   output wire                                  shrtBlkModOp,
   input                                        enableIp,
   input [NUM_USERS-1:0]                        enabledUsers,
   input                                        ipSkipBlock,
   input                                        blockOpComplete,
   input                                        packetOpComplete,
   output reg                                   enableOp,
   // Master enable from Regs.              
   input                                        enable,
   input                                        enLostTime,
   input                                        beatTimeLine,
   // Code characteristics from Regs.       
   input [numBits(`LDEC_MAX_BLKNUM)-1:0]        packetLenU0, // TODO add per user registers if NUM_USERS>1
   input [15:0]                                 packetBytesLs,
   input [`LDEC_PB_MS_LEFT:0]                   packetBytesMs,
   input [`LDEC_PB_MS_LEFT+16:0]                frameEndByte,
   input [numBits(`LDEC_K_MAX)-1:0]             k,
   input [numBits(`LDEC_K_MAX-1)-1:0]           nShrtFloor,
   input [numBits(`LDEC_MAX_BLKNUM)-1:0]        shrtModU0, // TODO add per user registers if NUM_USERS>1 
   // From Ip block
   input                                        lastBlkOfSym,
   input                                        waitFirstWordOfSym,
   // Shortening etc parameters from Regs.
   input [numBits(`LDEC_K_MAX)-1:0]             parityThresh,
   input [`LDEC_VAR_BITS-1+7:0]                 targetLevel,
   input                                        `LDEC_chkAbsType llrUnity0,
   input                                        `LDEC_chkAbsType llrUnity1,
   input [ITERS8_BITS-1:0]                      nomIterations,
   input [numBits(`LDEC_MAX_ITER)-1:0]          earlyTestIterations,
   input [ITERS8_BITS-1:0]                      maxRunningCount,
   input [ITERS8_BITS-1:0]                      endRunningCount,
   // External override to force current block to stop
   input                                        iterationAbort,
   // From Bwd                              
   input                                        parityErrsUpdate,
   input [numBits(`LDEC_PAR_ERRS_MAX)-1:0]      parityErrs,
   input [`LDEC_VAR_BITS-1+7:0]                 varMetricSum,
   // From Mem                              
   input [`LDEC_FLAG_BITS-1:0]                  fwdFlag,
   input [`LDEC_FLAG_BITS-1:0]                  bwdFlag,
   input [`LDEC_FLAG_BITS-1:0]                  fwdFlag_1,
   input [`LDEC_FLAG_BITS-1:0]                  bwdFlag_1,
   input [numBits(`LDEC_NCOLS_RIGHT)-1:0]       fwdCol_1,
   input [numBits(`LDEC_NCOLS_RIGHT)-1:0]       bwdCol_1,
   input [numBits(`LDEC_NCELLS)-1:0]            numMacroCells,
   // From Ip                               
   input                                        ipLoaded,
   // From Op                               
   input                                        opBufferLocked,
   // To Clocking                           
   output                                       clkEnOut,
   // To Ip                                 
   output                                       setRdyToRcvOut,
   output                                       decodeStartOut,
   output                                       firstBlkOut,
   // To Fwd                                
   output                                       fwdStallOut,
   output                                       fwdStallOut_1,
   output                                       fwdNoOpOut,
   output                                       fwdNoOpOut_1,
   output                                       fwdFirstIterationOut,
   output                                       fwdFirstIterationOut_1,
   output                                       metricVeryFirstUseOut, 
   output                                       metricVeryFirstUseOut_1, 
   output                                       fwdRowStartOut,
   output                                       fwdIterStartOut_1,
   output reg                                   scaleFlagOut,
   output `LDEC_chkAbsType                      fwdLlrUnityOut,
   output                                       latchChkMetricOut,
   // To Bwd                                
   output                                       bwdStallOut,
   output                                       bwdStallOut_1,
   output                                       stallVrOut,
   output                                       bwdNoOpOut,
   output                                       bwdNoOpOut_1,
   output                                       bwdCellPrimeOut,
   output                                       bwdFirstIterationOut_1,
   output reg                                   bwdIterStartOut,
   output                                       bwdRowStartOut,
   output                                       bwdRowStartOut_1,
   output                                       bwdRowStartOut_2,
   output                                       bwdEndIterOut,
   output reg [`LDEC_CHK_BITS-3:0]              bwdLlrUnityDiv2,
   // To Mem                                
   output                                       decodeActiveOut,
   output                                       decodeActiveOut_1,
   output                                       decodeActiveOut_2,
   output                                       decodeActiveOut_2D,
   output                                       disablingOut,
   output [numBits(`LDEC_NCELLS-1)-1:0]         fwdCellIdxOut,
   output [numBits(`LDEC_NCELLS-1)-1:0]         bwdCellIdxOut,
   output                                       holdFwdCellOut_1,
   output                                       holdBwdCellOut_1,
   output                                       blkPingPongSOut,
   output                                       blkPingPongEOut,
   // To Op block.                          
   output                                       decodeCompleteOut,
   output                                       newBlkPulseOut,
   // To Regs for status registers.         
   output [numBits(`LDEC_MAX_BLKNUM)-1:0]       blkErrsOut,
   output reg                                   packetCompleteOut,
   output reg                                   packetStatusOut, 
   output reg [numBits(`LDEC_MAX_ITER)-1:0]     curIterationOut,
   output reg [numBits(`LDEC_PAR_ERRS_MAX)-1:0] prevParityErrsOut,
   output reg [numBits(`LDEC_MAX_ITER)-1:0]     prevIterationsOut,
   output reg                                   prevDecodeStatusOut,
   output [`LDEC_VAR_BITS+5:0]                  varMetricChkSumOut,
   // RW MODIFIED
   output wire [ 7:0]                           dbgBlkNum,
   output wire [15:0]                           dbgIter
   // END RW MODIFIED
   
   );

`include "ldpcDecFuncs.vh"

  // pipeStallStateType;
  localparam [1:0] waitEndRowState = 0;
  localparam [1:0] addStallsState = 1;
  localparam [1:0] fwdStallState = 2;
  localparam [1:0] stallReArmState = 3;

  // lostTimeStateType;
  localparam [1:0] waitingForIoStallState = 0;
  localparam [1:0] addingLostTimeState = 1;
  localparam [1:0] waitingForIpLoadState = 2;

  // extDataStateType;
  localparam [1:0] waitForDecCompleteState = 0;
  localparam [1:0] waitForRdyToRcvState = 1;
  localparam [1:0] chkExtDataState = 2;
  
  localparam NUM_ABC_PIPES = `LDEC_PIPE_A + `LDEC_PIPE_B + `LDEC_PIPE_C;
  localparam RC_BITS = 12;     // 8 for 256 iters && 3 for 8x. 1 for sign. 
  localparam DIFF_BITS = 6;
  localparam NI_HI = numBits(`LDEC_MAX_ITER)-1;
  localparam BN_HI = numBits(`LDEC_MAX_BLKNUM)-1;
  localparam PE_HI = numBits(`LDEC_PAR_ERRS_MAX)-1;
  localparam CI_HI = numBits(`LDEC_NCELLS-1)-1; // fwdCellIdx, bwdCellIdx
  localparam K_HI = numBits(`LDEC_K_MAX)-1;
  localparam PDB_HI = numBits(`LDEC_MAX_PACKET_BITS-1)-1;
  
  localparam [numBits(`LDEC_NCOLS_RIGHT)-1:0] NCOLS_RIGHT = `LDEC_NCOLS_RIGHT;
  localparam [NI_HI+1:0] MAX_ITER1 = `LDEC_MAX_ITER;

  // Counts cycles up to 1/8 iteration.
  localparam integer LOST_CYCLES_BITS = numBits(ceilDiv(`LDEC_NCELLS, 8));
  reg [LOST_CYCLES_BITS-1:0] lostCycles;
  // Lost iterations in units of 1/8 iteration.
  localparam integer MAX_LOST_ITERS = 63;
  localparam integer LOST_ITERS_BITS = 6;
  
  // User comtrol
  reg [4:0]                             ctrlUserInPending;
  reg                                   pendingUser;

  // Packet split between several decoders: skip one block in ip and current counts
  // when another decoder is used
  reg [2:0]                             ipSkipBlockPending;
  reg [4:0]                             ipSkipBlockUser;
  
  // IP controller signals
  reg                                   enableLast;  // For simple edge detection.
  reg                                   enableLastD;
  reg [NUM_USERS-1:0]                   enabledUsersLast;  // For simple edge detection.
  reg [1:0]                             numBuffersFree;
  reg [1:0]                             numBuffersFull;
  reg                                   receiving;
  wire                                  waitingForIo;
  reg                                   waitingForIp;
  reg                                   waitingForOp;
  reg [1:0]                             endDecTime;
  reg                                   decoderFree;
  reg                                   firstRcv;
  
  // flag for last blk after de-padding. Updates on decodeStart
  // !newBlk (can be different if loading delays start).
  reg                                   lastActualBlk;
  // flag for last blk after de-padding. Updates on ipLoaded so applies
  // to the block just loaded.
  wire [NUM_USERS-1:0]                  lastActualBlkIpArray; 

  reg                                   setRdyToRcv;
  reg                                   decodeStart;
  reg                                   decodeStartD1;  // decoder is free && ip;
  reg                                   decodeReallyStart;
  reg                                   decodeReallyStartD1;  // as above plus the decoder op
  reg                                   decodeReallyStartD2;  // has been taken into the opbuf
  
  wire                                  iterationAbort2;
  reg                                   fwdEndIter;  
  reg                                   bwdEndIter;
  reg                                   bwdIterStarted;
  reg                                   bwdIterStarted_1;
  wire                                  bwdIterStarted_2;
  reg                                   bwdIterStart_2;
  // Lint: only used in the no pipe configuration.
  reg                                   bwdIterStartedNoPipe_1;
  reg                                   bwdFirstIteration_1;
  wire                                  fwdEndRow;  
  wire                                  bwdEndRow;
  wire                                  fwdEndRow_1;  
  wire                                  bwdEndRow_1;  
  reg                                   newBlk;  // Init sig to prep for a new block.
  reg                                   newBlkPulse;
  reg                                   firstBlk;
  reg [`LDEC_VAR_BITS+5:0]              varMetricChkSum;
  reg [ITERS8_BITS-1:0]                 dynRunningCount;
  reg signed [RC_BITS-1:0]              runningCount;
  reg  [PDB_HI:0] packetDataBitCount;
  reg  [PDB_HI:0] packetDataBitCountIp;
                                        
  // Local version of outputs.          
  reg [numBits(`LDEC_NCELLS-1)-1:0]     fwdCellIdx;
  reg [numBits(`LDEC_NCELLS-1)-1:0]     bwdCellIdx;
  reg                                   fwdStall;
  reg                                   bwdStall;
  wire                                  fwdStall_1;
  wire                                  bwdStall_1;
  reg                                   stallVr;
  reg                                   fwdNoOp;
  reg                                   bwdNoOp;
  wire                                  fwdNoOp_1;
  wire                                  bwdNoOp_1;
  reg                                   fwdStallMis_1;
  reg                                   bwdStallMis_1;
  wire                                  fwdPipeStall_1;
  reg                                   fwdPipeStall_1_r;
  reg                                   bwdPipeStall_1;
  reg                                   bwdRowStart;
  reg                                   bwdRowStart_1;
  reg                                   bwdRowStart_2;
  reg                                   fwdRowStart;
  reg                                   fwdRowStart_1;
  wire                                  fwdEndIter_1;
  wire                                  bwdEndIter_1;
  reg `LDEC_chkAbsType                  fwdLlrUnity;
  reg                                   fwdFirstIteration;
  reg                                   fwdFirstIteration_1;
  reg                                   fwdIterStart_1;
  reg [BN_HI:0]                         curBlkNumArray[0:NUM_USERS-1];
  reg [BN_HI:0]                         ipBlkNumArray[0:NUM_USERS-1];  
  reg                                   blkPingPongE;   // Replaces curBlkNum[0]
  reg [BN_HI:0]                         blkErrs;
  reg                                   decodeComplete;
  reg                                   decodeActive;
  reg                                   decodeActive_1;
  reg                                   decodeActive_2;
  reg                                   decodeActive_2D1;
  reg                                   decodeActive_2D2;
  reg                                   decodeActive_2D3;
  wire                                  holdBwdCell_1;
  wire                                  holdFwdCell_1;
  reg                                   blkPingPongS;
  reg                                   firstBlkOfSym;
  reg                                   waitFirstWordOfSymReg;
  reg                                   ipBufferAlreadyLoaded;
  wire [PDB_HI:0]                       endSubPacket;
  wire [PDB_HI:0]                       numPacketBits;
  reg                                   subPacketFailed;
                                        
  reg                                   metricVeryFirstUse;
  wire                                  metricVeryFirstUse_1;
  reg [`LDEC_NCOLS-1:0]                 metricVeryFirstUseVec;
  wire                                  endRow;
  
  // Recovery of time lost due to IO stalls.
  reg [1:0]                                   lostTimeState; // lostTimeStateType
  reg [LOST_ITERS_BITS-1:0]                    lostIterations;
  wire [LOST_ITERS_BITS:0]                     lostIterationsRoundUp;
  wire [numBits(ceilDiv(`LDEC_NCELLS, 8))-1:0] eighthIteration;
  reg                                          transferLostTime;
  
  // Map registers to MU arraies to avoid sv
  wire [numBits(`LDEC_MAX_BLKNUM)-1:0]  packetLenArray[0:NUM_USERS-1];
  wire [numBits(`LDEC_MAX_BLKNUM)-1:0]  shrtModArray[0:NUM_USERS-1];
  assign packetLenArray[0] = packetLenU0;
  assign shrtModArray[0]   = shrtModU0;
  
  
  assign clkEnOut  = enable | enableLast | enableLastD;
  assign disablingOut  = enableLastD & ! enable;
  assign iterationAbort2  = iterationAbort & decodeActive;
  
  assign shrtBlkModDec = (curBlkNumArray[ctrlUserDec] < shrtModArray[ctrlUserDec] ) ? 1'b1 : 1'b0;
  assign shrtBlkModOp  = (curBlkNumArray[ctrlUserOp]  < shrtModArray[ctrlUserOp] )  ? 1'b1 : 1'b0;

  //assert ((iterationAbort != 1'b1) || (decodeActive == 1'b1)) else
  //  $error("Attempt to abort decode when no decode in progress.");

  // Aggregate Mode Sub Packet Tracking. Note frameEnd is dynamic.
  generate
    if (`LDEC_SPECIAL_AGG_SUPPORT) begin: gAggMode
    assign endSubPacket  = {frameEndByte, 3'b000};
  end
  endgenerate //  gAggMode

  generate
    if (`LDEC_PAD_SUPPORT) begin: gPadMode
    assign numPacketBits  = {packetBytesMs, packetBytesLs, 3'b000};
  end
  endgenerate //  gPadMode
  
  //---------------------------------------------------------------------------
  // Packet bit counting needed for AGG mode && de-padding
  //---------------------------------------------------------------------------

  generate
    if (`LDEC_SPECIAL_AGG_SUPPORT || `LDEC_PAD_SUPPORT) begin: gBitCnt
      always @(posedge(clk)  `LDEC_RESET_STR)
      begin : pPktBitCount
        reg [K_HI:0] k2V;
        integer i;
        
        if (!nReset) begin
          for (i=0;i<NUM_USERS;i=i+1)
            ipBlkNumArray[i] <= `LDEC_PAD(1'b0, BN_HI);
          k2V = `LDEC_PAD(2'b0, K_HI);
          packetDataBitCountIp <= `LDEC_PAD(1'b0, PDB_HI);
          packetDataBitCount <= `LDEC_PAD(1'b0, PDB_HI);
        end else begin
          if (enable == 1'b0) begin
            packetDataBitCountIp <= `LDEC_PAD(1'b0, PDB_HI);
            packetDataBitCount <= `LDEC_PAD(1'b0, PDB_HI);
            for (i=0;i<NUM_USERS;i=i+1)
              ipBlkNumArray[i] <= `LDEC_PAD(1'b0, BN_HI);
          end
          else begin

            // data bit count timed to ip loads.
            if ((ipLoaded == 1'b1) || (ipSkipBlock==1'b1)) begin
              // Update packetDataBitCount. Needed for bit/byte de-padding.
              packetDataBitCountIp <= packetDataBitCountIp + `LDEC_PAD(k2V, PDB_HI-K_HI);
              ipBlkNumArray[ctrlUserIn] <= ipBlkNumArray[ctrlUserIn] + `LDEC_PAD(1'b1, BN_HI);
            end
            // Get k2 for the next ip block.
            k2V = k - nShrtFloor;
            if (ipBlkNumArray[ctrlUserIn] < shrtModArray[ctrlUserIn]) begin
              k2V = k2V - `LDEC_PAD(1'b1, K_HI);
            end
            // data bit count timed to decode starts.
            if (decodeStart == 1'b1) begin
              packetDataBitCount <= packetDataBitCountIp;
            end
          end
        end

      end //pPktBitCount

      genvar ip_idx0;                                                                                                         
      for (ip_idx0=0;ip_idx0<NUM_USERS;ip_idx0=ip_idx0+1) begin : gIpBlk0                                                                
        assign lastActualBlkIpArray[ip_idx0] = ((packetDataBitCountIp >= numPacketBits) &&
                                  ((numPacketBits != `LDEC_PAD(1'b0, PDB_HI)) || (ipBlkNumArray[ip_idx0] >= packetLenArray[ip_idx0])));
      end                                                                                                                    
      
    end
  endgenerate // gBitCnt

  generate
    if (!(`LDEC_SPECIAL_AGG_SUPPORT || `LDEC_PAD_SUPPORT)) begin: gBitCnt0
      always @(posedge(clk) `LDEC_RESET_STR)
      begin : pPktBitCount0
        integer idx;
        if (!nReset) begin
          for (idx=0;idx<NUM_USERS;idx=idx+1)
            ipBlkNumArray[idx] <= `LDEC_PAD(1'b0, BN_HI);
        end else begin
          if (enable == 1'b0) begin
            for (idx=0;idx<NUM_USERS;idx=idx+1)
              ipBlkNumArray[idx] <= `LDEC_PAD(1'b0, BN_HI);
          end
          else begin
            if ((ipLoaded == 1'b1) || (ipSkipBlock==1'b1)) begin
              ipBlkNumArray[ctrlUserIn] <= ipBlkNumArray[ctrlUserIn] + `LDEC_PAD(1'b1, BN_HI);
            end                            
          end
        end
      end //pPktBitCount0
      
      genvar ip_idx1;                                                                                                         
      for (ip_idx1=0;ip_idx1<NUM_USERS;ip_idx1=ip_idx1+1) begin : gIpBlk1                                                                
        assign lastActualBlkIpArray[ip_idx1] = (ipBlkNumArray[ip_idx1] >= packetLenArray[ip_idx1]) && enabledUsers[ip_idx1];    
      end                                                                                                                    

  end
  endgenerate //  gBitCnt0
     
  //---------------------------------------------------------------------------
  // Input data controller.
  //---------------------------------------------------------------------------
  
  // Create setRdyToRcvIp pulse if receiving is high when enableIp goes high
  reg setRdyToRcvIp, enableIpD;
  always @(posedge(clk) `LDEC_RESET_STR)
  begin
    if (nReset == 1'b0) begin
      setRdyToRcvIp <= 1'b0;
      enableIpD <= 1'b0;
    end else begin
      if (enable == 1'b0) begin
        enableIpD <= 1'b0;
        setRdyToRcvIp <= 1'b0;
      end else begin
        enableIpD <= enableIp;
        setRdyToRcvIp <= 1'b0; // RTZ
        if ((enableIp==1'b1) && (enableIpD==1'b0) && (receiving==1'b1))
          setRdyToRcvIp <= 1'b1;
      end
    end
  end
  
  // Pipe Id of the user being processed
  always @(posedge(clk) `LDEC_RESET_STR)
  begin
    if (nReset == 1'b0) begin
      ctrlUserInPending <= 5'd0;
      pendingUser <= 1'b0;
    end else begin
      if (enable == 1'b0) begin
        ctrlUserInPending <= 5'd0;
        pendingUser <= 1'b0;
      end else begin
        if (ipLoaded==1'b1) begin
          ctrlUserInPending <= ctrlUserIn;
          pendingUser <= 1'b1;
        end else if (decodeStart==1'b1) begin
          pendingUser <= 1'b0;
        end
      end
    end
  end
  
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pIp
    
    reg  [numBits(2)-1:0] numBuffersFreeV;
    reg [numBits(2)-1:0]  numBuffersFullV;
    reg                   decoderFreeV;
    reg                   waitACycleV;
    
    if (nReset == 1'b0) begin
      enableLast <= 1'b0;
      enableLastD <= 1'b0;
      enabledUsersLast <= {NUM_USERS{1'b0}};
      setRdyToRcv <= 1'b0;
      decodeStart <= 1'b0;
      decodeStartD1 <= 1'b0;
      firstBlk <= 1'b0;
      numBuffersFree <= 2'd1;
      numBuffersFull <= 2'd1;
      receiving <= 1'b0;
      decodeActive_1 <= 1'b0;  
      decodeActive_2 <= 1'b0;      
      decodeActive_2D1 <= 1'b0;
      decodeActive_2D2 <= 1'b0;
      decodeActive_2D3 <= 1'b0;
      decodeActive <= 1'b0;
      blkPingPongS <= 1'b0;
      decoderFree <= 1'b0;
      waitingForOp <= 1'b0;
      waitingForIp <= 1'b1;
      endDecTime <= 2'b0;
      firstRcv <= 1'b1;
      firstBlkOfSym <= 1'b1;
      waitFirstWordOfSymReg <= 1'b1;
      waitACycleV = 1'b0;        
      ctrlUserDec <= 5'd0;
    end else begin
      if (enable == 1'b0) begin
        enableLast <= 1'b0;
        enableLastD <= enableLast;
        enabledUsersLast <= {NUM_USERS{1'b0}};
        setRdyToRcv <= 1'b0;
        decodeStart <= 1'b0;
        decodeStartD1 <= 1'b0;
        firstBlk <= 1'b0;
        // There are 2 buffers but because of an initialising newBlk signal we
        // set these to 1 rather than 2 and 0.
        numBuffersFree <= 2'd1;
        numBuffersFull <= 2'd1;
        receiving <= 1'b0;
        decodeActive_1 <= 1'b0;      
        decodeActive_2 <= 1'b0;      
        decodeActive_2D1 <= 1'b0;
        decodeActive_2D2 <= 1'b0;
        decodeActive_2D3 <= 1'b0;
        decodeActive <= 1'b0;
        blkPingPongS <= 1'b0;
        decoderFree <= 1'b0;
        waitingForOp <= 1'b0;
        waitingForIp <= 1'b1;
        endDecTime <= 2'b0;
        firstRcv <= 1'b1;
        firstBlkOfSym <= 1'b1;
        waitFirstWordOfSymReg <= 1'b1;
        waitACycleV = 1'b0;        
        ctrlUserDec <= 5'd0;
        
      end
      else begin
        
        enableLastD <= enableLast;
        decodeActive_1 <= decodeActive_2;
        // Hold clock on to allow logging. Enough delay is added for worst case
        // pipelining.
        decodeActive_2D1 <= decodeActive_2;    // logging only.
        decodeActive_2D2 <= decodeActive_2D1;  // logging only.
        decodeActive_2D3 <= decodeActive_2D2;  // logging only.
        decodeActive <= decodeActive_1;
        decoderFreeV = decoderFree;
        numBuffersFreeV = numBuffersFree;
        numBuffersFullV = numBuffersFull;
        enableLast <= enable;
        enabledUsersLast <= enabledUsers;
        setRdyToRcv <= 1'b0;
        decodeStart <= 1'b0;
        decodeStartD1 <= decodeStart;
        
        firstBlk <= ! enableLast;
        
        // 'Receiving' is a key state signal that mirrors the rdyToRcv output.
        if (waitACycleV) begin
          // when moving from receiving state following ipLoaded need to insert
          // a cycle to allow for lastActualBlkIp to be set.
          waitACycleV = 1'b0;
        end
        else if (receiving) begin
          // When a new block of ip is loaded, send 'receiving' low.
          if (ipLoaded) begin
            receiving <= 1'b0;
            waitACycleV = 1'b1;
            //            assert numBuffersFullV < 2
            //              report "Attempt to load new input when no input buffer is free"
            //              severity failure;
            numBuffersFullV = numBuffersFullV + 2'd1;
          end
        end else if (numBuffersFreeV > 2'd0) begin
          // Note lastActualBlkIp will have just been set for the next
          // block (assuming we get here as a result of ipLoaded). Alternatively
          // a decode completes, newBlk is set && numBuffersFreeV is incremented.
          if (!lastActualBlkIpArray[ctrlUserIn] || firstRcv) begin
            setRdyToRcv <= 1'b1;      
            receiving <= 1'b1;
          end
          numBuffersFreeV = numBuffersFreeV - 2'd1;
        end
      
        // After each decode (indicated by NewBlk) a VM buffer becomes freed up.
        if (newBlkPulse == 1'b1) begin
          //          assert numBuffersFreeV < 2
          //            report "Attempt to increment numBuffersFree beyond range"
          //            severity failure;           
          numBuffersFreeV = numBuffersFreeV + 2'd1;
          //          assert numBuffersFullV > 0
          //            report "Attempt to decrement numBuffersFull beyond range"
          //            severity failure;
          if (numBuffersFullV > 2'd0) begin
            numBuffersFullV = numBuffersFullV - 2'd1;
          end
          // The decoder is now free
          decoderFreeV = 1'b1;
          firstRcv <= 1'b0;
          decodeActive_2 <= 1'b0;     
          decodeActive_1 <= 1'b0;  
          decodeActive <= 1'b0;          
          // If a decode completes && opBufferLocked is set begin the data for
          // the previous decode has ! been fetched. Must wait for this to
          // happen before the next decode can begin.
          waitingForOp <= opBufferLocked;
          endDecTime <= 2'b0;
        end
        if (waitingForOp == 1'b1 && opBufferLocked == 1'b0 && endDecTime > 2'd2) begin
          // Output buffer has now been read so a buffer is now free. Note that
          // it takes a few cycles for opBufferLocked to respond to completion
          // of a new decode hence the timer.
          waitingForOp <= 1'b0;
        end
        if (endDecTime < 2'd3) begin
          endDecTime <= endDecTime + 2'd1;
        end        
        // Switch the VM buffers when we have a full input buffer and the decoder
        // is free to accept it. Note that the actual decode could still stall
        // following this if opBufferLocked.
        
        if (decoderFreeV == 1'b1 && numBuffersFullV > 2'd0) begin
          decoderFreeV = 1'b0;
          decodeActive_2 <= 1'b1;
          decodeStart <= 1'b1;          
          blkPingPongS <= !blkPingPongS;
          if (pendingUser==1'b1) begin
            ctrlUserDec <= ctrlUserInPending;
          end else begin
            ctrlUserDec <= ctrlUserIn;
          end
        end
        // Synchronise OFDM boundary signals to the current block

        if (decodeStart) begin
          firstBlkOfSym <= lastBlkOfSym;
          // This signal is used to tell us if decoding may have been delayed
          // due to waiting for the on-air data to arrive. This means the input
          // of the LDPC block was stalled waiting for the first bit of the OFDM
          // symbol. However if the ip buffer was already loaded by the end of
          // the previous decode begin there was no actual wait.
          waitFirstWordOfSymReg <= waitFirstWordOfSym & !ipBufferAlreadyLoaded;
        end

        // Track stalls due to lack of input
        if (decoderFreeV == 1'b1 && numBuffersFullV == 2'd0) begin
          waitingForIp <= 1'b1;
        end
        else begin
          waitingForIp <= 1'b0;
        end
        numBuffersFull <= numBuffersFullV;
        numBuffersFree <= numBuffersFreeV;
        decoderFree <= decoderFreeV;
      end
    end   

  end //pIp

  assign waitingForIo  = waitingForIp | waitingForOp;

  //---------------------------------------------------------------------------
  // Detect end of packet
  //---------------------------------------------------------------------------

  // lastActualBlk is timed to the start of a decode.
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pPkt
    if (nReset == 1'b0) begin
      packetCompleteOut <= 1'b0;
      lastActualBlk <= 1'b0;    
    end else begin
      if (!enable) begin
        lastActualBlk <= 1'b0;    
        packetCompleteOut <= 1'b0;
      end
      else begin
        // ipBlkNum goes to numBlks on lastActualBlkIp so we can use this when
        // the currently decoded blkNum reaches the last blk (numBlks-1).
        if ((curBlkNumArray[ctrlUserIn] == ipBlkNumArray[ctrlUserIn] - `LDEC_PAD(1'b1, BN_HI))) begin
          lastActualBlk <= lastActualBlkIpArray[ctrlUserIn]; // follow lastActualBlkIp so that lastActualBlk goes back to 0 at user switch
        end
        // Packet complete is set when the last decode is transferred to the
        // decoder, which can be delayed relative to the completion of decode
        // if the previous output has !yet been output.
        if (newBlk && !opBufferLocked) begin
          packetCompleteOut <= lastActualBlk; // follow lastActualBlk so that packetCompleteOut goes back to 0 at user switch
        end
      end
    end   
  end //pPkt
  

  //---------------------------------------------------------------------------
  // Track time lost due to IO stalls
  //---------------------------------------------------------------------------
  
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pLostTime
    // lint: Some bits may be unused.
    reg [31:0] tmp32;
       
    if (!nReset) begin
      transferLostTime <= 1'b0;
      lostCycles <= `LDEC_PAD(1'b0, LOST_CYCLES_BITS-1);
      lostIterations <= `LDEC_PAD(1'b0, LOST_ITERS_BITS-1);
      lostTimeState <= waitingForIoStallState;
    end else begin 
      if (!enLostTime) begin
        transferLostTime <= 1'b0;
        lostCycles <= `LDEC_PAD(1'b0, LOST_CYCLES_BITS-1);
        lostIterations <= `LDEC_PAD(1'b0, LOST_ITERS_BITS-1);
        lostTimeState <= waitingForIoStallState;
      end else begin

        //----------------------
        // The Lost Time counter
        //----------------------

        // On all LDPC blocks other than the first of an OFDM/STBC symbol
        // a stall on the input is not normal and is not factored into the
        // iteration calculations. The stall is 'lost time' which we count
        // so that a correction can be made.
        // The same mechanism can track stalls due to output not being
        // collected.
        case (lostTimeState)
          waitingForIoStallState : begin
            
            if (waitingForOp || waitingForIp) begin
              lostTimeState <= addingLostTimeState;
            end
          end
          addingLostTimeState : begin
            
            // Counting lost time.
            if (lostCycles == eighthIteration) begin
              lostCycles <= `LDEC_PAD(1'b0, LOST_CYCLES_BITS-1);
              // Put a limit on lost Iterations. Don't expect stalls to
              // last more than 3 iterations. Allow for a couple of occasions.
              tmp32 = MAX_LOST_ITERS-1;
              if (lostIterations < tmp32[LOST_ITERS_BITS-1:0]) begin
                lostIterations <= lostIterations + {{(LOST_ITERS_BITS-1) {1'b0}}, 1'b1};
              end
            end else begin
              lostCycles <= lostCycles + {{(LOST_CYCLES_BITS-1) {1'b0}} ,1'b1};
            end
            if (waitFirstWordOfSym) begin
              // The decode core is stalled 'waitingForIp' && the input
              // input has stalled waiting for the new symbol to arrive. So
              // this stall is 'normal'.
              lostIterations <= `LDEC_PAD(1'b0, LOST_ITERS_BITS-1);
              lostCycles <= `LDEC_PAD(1'b0, LOST_CYCLES_BITS-1);
              // 'sit out' the rest of the load.
              lostTimeState <= waitingForIpLoadState;
            end else if (!waitingForIo) begin
              transferLostTime <= 1'b1;              
            end
            // Clear lost iterations when we know they have been transferred
            // into the runningCount. This is when we can go back to tracking.
            if (transferLostTime) begin
              transferLostTime <= 1'b0;
              lostIterations <= `LDEC_PAD(1'b0, LOST_ITERS_BITS-1);
              lostCycles <= `LDEC_PAD(1'b0, LOST_CYCLES_BITS-1);
              lostTimeState <= waitingForIoStallState;
            end
          end
          default: begin // waitingForIpLoadState
            
            if (!waitingForIp) begin
              lostTimeState <= waitingForIoStallState;              
            end
          end
        endcase
        
      end
    end
  end

  assign lostIterationsRoundUp = (lostCycles == `LDEC_PAD(1'b0, LOST_CYCLES_BITS-1)) ?
                                 {1'b0, lostIterations} :
                                 {1'b0, lostIterations} + `LDEC_PAD(1'b1, LOST_ITERS_BITS);
  
  assign eighthIteration = numMacroCells >> 2'd3;       
  
  //---------------------------------------------------------------------------
  // Set the cell indexes.
  //---------------------------------------------------------------------------
  
  // The forward index starts once input data is available. It will stop when
  // it loops back to zero until the next block arrives (generally it should
  // already be there).
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pFwdCellIdx
    if (nReset == 1'b0) begin
      fwdCellIdx <= `LDEC_PAD(1'b0, CI_HI);
      fwdIterStart_1 <= 1'b0;
    end else begin
      if (enable == 1'b0 || newBlk == 1'b1 || decodeActive_2 == 1'b0) begin
        fwdCellIdx <= `LDEC_PAD(1'b0, CI_HI);
        fwdIterStart_1 <= 1'b0;
      end
      else begin
        // The hold signal is fedback from the flags information that we
        // fetch on each cycle. It will be generated if an end of row flag
        // is seen on bwd but ! fwd.
        if (holdFwdCell_1 == 1'b0) begin
          fwdIterStart_1 <= 1'b0;
          if (fwdCellIdx != `LDEC_PAD(1'b0, CI_HI) || waitingForIo == 1'b0) begin
            if (fwdCellIdx == numMacroCells-`LDEC_PAD(1'b1, CI_HI)) begin
              fwdCellIdx <= `LDEC_PAD(1'b0, CI_HI);
            end
            else begin
              if (fwdCellIdx == `LDEC_PAD(1'b0, CI_HI)) begin
                fwdIterStart_1 <= 1'b1;
              end
              fwdCellIdx <= fwdCellIdx + `LDEC_PAD(1'b1, CI_HI);
            end
          end
        end
      end
    end
   end //pFwdCellIdx

  // The bwd index is a row behind the fwd. Both are with T-2 timing. The only
  // info we have about the fwd end of row is with T-1 timing. We can use this
  // to enable the move from count 0 to 1. The count 0 processing downstream
  // will just keep repeating itself until this point.
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pBwdCellIdx
    
    reg  stopCntV;
    
    if (nReset == 1'b0) begin
      bwdCellIdx <= `LDEC_PAD(1'b0, CI_HI);    
      bwdIterStartOut <= 1'b0;
      bwdIterStarted <= 1'b0;
      bwdIterStartedNoPipe_1 <= 1'b0;
      stopCntV = 1'b1;
      bwdLlrUnityDiv2 <= `LDEC_PAD(1'b0, `LDEC_CHK_BITS-3);
    end else begin
      if (enable == 1'b0 || newBlk == 1'b1) begin
        bwdCellIdx <= `LDEC_PAD(1'b0, CI_HI);
        bwdIterStartOut <= 1'b0;
        bwdIterStarted <= 1'b0;
        bwdIterStartedNoPipe_1 <= 1'b0;
        stopCntV = 1'b1;
      end
      else begin
        // The start is set 
        if (bwdStall == 1'b0) begin
          bwdIterStartOut <= 1'b0;
        end
        // Enable bwd counting on the first end of fwd row. fwdCellIdx has
        // T-2 timing so is already on the new row when fwdEndRow_1 is set.
        // We immediately release the bwd counter so that it will keep in step
        // && set the bwdIterStarted_1 signal.
        if (stopCntV == 1'b1 && fwdEndRow_1 == 1'b1) begin
          stopCntV = 1'b0;
          if (bwdCellIdx == `LDEC_PAD(1'b0, CI_HI)) begin
            // The IterStart signal is a pulse except where extended due
            // to fwd/bwd PipeStall. 
            bwdIterStartedNoPipe_1 <= 1'b1;
          end
        end   
        if ((stopCntV == 1'b0) &&
            (holdBwdCell_1 == 1'b0 || bwdIterStart_2 == 1'b1)) begin
          if (bwdCellIdx == numMacroCells-`LDEC_PAD(1'b1, CI_HI)) begin
            // At end of bwd iteration set a stop as a default but
            // this will be overridden (below) if the fwd part has ! been
            // stalled due to lack of input.
            bwdCellIdx <= `LDEC_PAD(1'b0, CI_HI);
            stopCntV = 1'b1;         
          end
          else begin
            bwdCellIdx <= bwdCellIdx + `LDEC_PAD(1'b1, CI_HI);
          end       
        end
        // Indicate start with T-0 timing !ing cell idx has T-2 timing
        if (bwdCellIdx == `LDEC_PAD(1'b1, CI_HI)) begin
          bwdIterStartOut <= 1'b1;
          bwdLlrUnityDiv2 <= fwdLlrUnity[`LDEC_CHK_BITS-2 : 1];
        end 
      end
       if (enable) begin
         bwdIterStarted <= bwdIterStarted_1;
       end
    end
  end //pBwdCellIdx
  
  assign bwdIterStarted_2  = bwdIterStarted_1 | bwdIterStart_2;

  // Allow a pre-fetch of the first bwd cell info.
  assign bwdCellPrimeOut  = fwdEndRow_1 & !bwdIterStarted;
  
  //---------------------------------------------------------------------------
  // Generate control signals derived from flags.
  //---------------------------------------------------------------------------
  
  assign fwdEndRow = (decodeActive == 1'b1 ) ? fwdFlag[`LDEC_FLAG_END_ROW_BIT] : 1'b0;
  assign bwdEndRow = (bwdIterStarted == 1'b1 ) ? bwdFlag[`LDEC_FLAG_END_ROW_BIT] : 1'b0;
  assign fwdEndRow_1 = (decodeActive_1 == 1'b1 ) ? fwdFlag_1[`LDEC_FLAG_END_ROW_BIT] : 1'b0;
  assign bwdEndRow_1 = (bwdIterStarted_1 == 1'b1 ) ? bwdFlag_1[`LDEC_FLAG_END_ROW_BIT] : 1'b0;
  assign latchChkMetricOut  = fwdEndRow & (bwdEndRow | !bwdIterStarted);
  
  // Stalls with T-0 timing. If fwd/bwd rows are of unequal length begin
  // a stall is required on the shorter row.

  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pStall
    if (nReset == 1'b0) begin
      bwdStall <= 1'b1;
      fwdStall <= 1'b1;
      bwdNoOp <= 1'b0;
      fwdNoOp <= 1'b0;
      bwdStallMis_1 <= 1'b1;
      fwdStallMis_1 <= 1'b1;
    end else begin
      bwdStall <= bwdStall_1;
      fwdStall <= fwdStall_1;
      bwdNoOp <= bwdNoOp_1;
      fwdNoOp <= fwdNoOp_1;
      // Stalls with T-1 timing, due to fwd/bwd row length mismatch.
      bwdStallMis_1 <= (bwdEndRow_1 & (!fwdEndRow_1)) | (!bwdIterStarted_2);
      fwdStallMis_1 <= (fwdEndRow_1 & (!bwdEndRow_1 & bwdIterStarted_1)) |
                       (!decodeActive_2) | waitingForIo;
    end
  end //pStall

  // Combine stalls due to row length mismatches with those for pipeline delay.
  assign bwdStall_1  = bwdStallMis_1 | bwdPipeStall_1;
  assign fwdStall_1  = fwdStallMis_1 | fwdPipeStall_1;
  // Hold signals are exclusively for controlling the cell index && the cell
  // RAM. 
  assign holdBwdCell_1  = bwdPipeStall_1 | (bwdEndRow_1 & (!fwdEndRow_1) & bwdIterStarted_2);
  assign holdFwdCell_1  = fwdPipeStall_1 | (fwdEndRow_1 & (!bwdEndRow_1) & bwdIterStarted_1);
  // NoOps are when we want control logic to work as normal but the content of
  // the cell is a NoOp. This is relevant when `LDEC_NUM_PORTS==1.
  assign bwdNoOp_1 = (bwdCol_1 == NCOLS_RIGHT) ? 1'b1 : 1'b0;
  assign fwdNoOp_1 = (fwdCol_1 == NCOLS_RIGHT) ? 1'b1 : 1'b0;
  
  // Determine  row start.
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pRowStart
    if (nReset == 1'b0) begin
      fwdRowStart_1 <= 1'b0;
      fwdRowStart <= 1'b0;
      bwdRowStart_1 <= 1'b0;
      bwdRowStart <= 1'b0;
    end else begin
      if (enable == 1'b0) begin
        fwdRowStart_1 <= 1'b0;
        fwdRowStart <= 1'b0;
        bwdRowStart_1 <= 1'b0;
        bwdRowStart <= 1'b0;
      end
      else begin
        // The cellIdx compare takes into account that CellIdx is T-2.
        bwdRowStart_1 <= bwdRowStart_2;
        if ((bwdEndRow_1 == bwdIterStarted_1 && fwdEndRow_1 == 1'b1) ||
            (fwdCellIdx == `LDEC_PAD(1'b0, CI_HI))) begin
          fwdRowStart_1 <= 1'b1;
        end
        else if (fwdStall_1 == 1'b0) begin
          fwdRowStart_1 <= 1'b0;
        end
        bwdRowStart <= bwdRowStart_1;
        fwdRowStart <= fwdRowStart_1 & ! fwdStall_1;
      end
    end
  end //pRowStart

  // Tracking of first iteration. CellIdx has T-2 timing.  
  assign fwdEndIter_1 = ((fwdCellIdx == `LDEC_PAD(1'b0, CI_HI)) && (fwdStall_1 == 1'b0) &&
                         (fwdEndRow_1 == 1'b1) && (decodeActive_1 == 1'b1) ) ? 1'b1 : 1'b0;
  
  assign bwdEndIter_1 = ((bwdCellIdx == `LDEC_PAD(1'b0, CI_HI)) && (bwdStall_1 == 1'b0) &
                         (bwdEndRow_1 == 1'b1) && (decodeActive_1 == 1'b1) ) ? 1'b1 : 1'b0;

  // First iteration signal, both T==0 && T==1.
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pFirstIter
    if (nReset == 1'b0) begin
      fwdFirstIteration <= 1'b0;
      fwdFirstIteration_1 <= 1'b0;
      bwdFirstIteration_1 <= 1'b0;
      fwdEndIter <= 1'b0;
      bwdEndIter <= 1'b0;
    end else begin
      if (enable == 1'b0) begin
        fwdFirstIteration <= 1'b0;
        fwdFirstIteration_1 <= 1'b0;
        bwdFirstIteration_1 <= 1'b0;
        fwdEndIter <= 1'b0;
        bwdEndIter <= 1'b0;
      end
      else begin
        // Pipeline the end iteration signal.
        fwdEndIter <= fwdEndIter_1;
        bwdEndIter <= bwdEndIter_1;   
        if (newBlk == 1'b1) begin
          fwdFirstIteration <= 1'b1;
          fwdFirstIteration_1 <= 1'b1;
          bwdFirstIteration_1 <= 1'b1;
        end
        if (fwdEndIter_1 == 1'b1) begin
          fwdFirstIteration_1 <= 1'b0;
        end
        if (bwdEndIter_1 == 1'b1) begin
          bwdFirstIteration_1 <= 1'b0;
        end
        if (fwdEndIter == 1'b1) begin
          fwdFirstIteration <= 1'b0;
        end
      end
    end
  end //pFirstIter

  //---------------------------------------------------------------------------
  // Additional stall due to pipeline
  //---------------------------------------------------------------------------

  // Extra pipelining in the fwd block means the bwd block has to stall at the
  // start of a row to wait for the checkmetric to get updated. Likewise the
  // forward block needs to stall because it depends on check metrics passed
  // from the bwd section. The extra stalls occur on each && every row except
  // the first forward row of a block.

  assign endRow  = fwdEndRow_1 & (bwdEndRow_1 | (!bwdIterStarted_1));

  generate
    if (NUM_ABC_PIPES > 0) begin: genPipeStall
      always @(posedge(clk) `LDEC_RESET_STR)
      begin : pPipeStall
        // lint: Some bits may be unused.
        reg [31:0] tmp32;        
        reg [1:0] pipeStallStateV; // pipeStallStateType
        localparam SC_BITS = numBits(maximum(1, maximum(NUM_ABC_PIPES-1, `LDEC_BWD_FWD_LATENCY-1)));
        reg  [SC_BITS-1:0] stallCountV;

        if (nReset == 1'b0) begin
          bwdIterStart_2 <= 1'b0;
          bwdIterStarted_1 <= 1'b0;
          bwdRowStart_2 <= 1'b0;
          fwdPipeStall_1_r <= 1'b0;
          bwdPipeStall_1 <= 1'b1;
          stallVr <= 1'b0;
          pipeStallStateV = waitEndRowState;
          stallCountV = 1'b0;
        end else begin
          bwdIterStart_2 <= 1'b0;
          if (enable == 1'b0 || newBlk == 1'b1) begin
            bwdIterStart_2 <= 1'b0;
            fwdPipeStall_1_r <= 1'b0;
            bwdPipeStall_1 <= 1'b1;
            bwdIterStarted_1 <= 1'b0;
            pipeStallStateV = waitEndRowState;
            bwdRowStart_2 <= 1'b0;                 
            stallVr <= 1'b0;
            stallCountV = 1'b0;
          end
          else begin
            bwdRowStart_2 <= 1'b0;
            // Once both fwd and bwd are showing end of row throw in a 
            // counted number of stalls.
            case (pipeStallStateV)
              waitEndRowState : begin
                if (endRow == 1'b1) begin
                  fwdPipeStall_1_r <= 1'b1;
                  bwdPipeStall_1 <= 1'b1;
                  stallVr <= 1'b1;
                  tmp32 = NUM_ABC_PIPES - 1;
                  stallCountV = tmp32[SC_BITS-1:0];
                  // case of only one pipeline stage.
                  if (stallCountV == {SC_BITS {1'b0}}) begin
                    bwdIterStart_2 <= !bwdIterStarted_2;
                    bwdRowStart_2 <= 1'b1;
                    // `LDEC_PIPE_VR needs T-2 timing.
                    if (`LDEC_PIPE_VR == 1) begin
                      stallVr <= 1'b0;
                    end
                  end                
                  pipeStallStateV = addStallsState;
                end
              end
              addStallsState : begin
//                if (stallCountV == `LDEC_PAD(1'b1, SC_BITS-1)) begin   // case of >1 pipeline stage.
                if (stallCountV == 1'b1) begin   // case of >1 pipeline stage.
                  bwdIterStart_2 <= !bwdIterStarted_2;
                  bwdRowStart_2 <= 1'b1;
                  if (`LDEC_PIPE_VR == 1) begin
                    stallVr <= 1'b0;
                  end
                end                
                if (stallCountV == {SC_BITS {1'b0}}) begin
                  bwdPipeStall_1 <= 1'b0;
                  // This one releases bwdCellIdx at start of block.
                  bwdIterStarted_1 <= 1'b1;
                  // Release stall to varResp FIFO if ! already done.
                  stallVr <= 1'b0;
                  if (`LDEC_BWD_FWD_LATENCY + `LDEC_PIPE_X == 0) begin
                    fwdPipeStall_1_r <= 1'b0;
                    pipeStallStateV = stallReArmState;
                  end
                  else begin
                    pipeStallStateV = fwdStallState;
                    tmp32 = `LDEC_BWD_FWD_LATENCY+`LDEC_PIPE_X-1;
                    stallCountV = tmp32[SC_BITS-1:0];
                  end
                end
                else begin
                  stallCountV = stallCountV - 1'b1;
                end
                // Stalling of FWD processing only
              end
              fwdStallState : begin
                if (stallCountV == {SC_BITS {1'b0}}) begin
                  fwdPipeStall_1_r <= 1'b0;
                  stallVr <= 1'b0;
                  pipeStallStateV = stallReArmState;
                end
                else begin
                  stallCountV = stallCountV - 1'b1;
                end              
              end
              default : begin // stallReArmState
                pipeStallStateV = waitEndRowState;            
              end
            endcase                
          end
        end
        
      end //pPipeStall
      assign fwdPipeStall_1 = fwdPipeStall_1_r;
    end else begin
      reg bwdIterStartNoPipe_2;
      always @(*) begin
        // The no pipelining case comprises of the following, plus genBypassStall1
        // which we would have embedded here if Verilog allowed nested generates.
        bwdPipeStall_1 = 1'b0;
        bwdIterStarted_1 = bwdIterStartedNoPipe_1;
        // Without additional pipelining the bwd iteration start follows the end
        // of the first row.
        bwdIterStartNoPipe_2  = (fwdEndRow_1 & decodeActive_1) &
                                !bwdIterStarted_1;
        bwdIterStart_2 = bwdIterStartNoPipe_2;
        bwdRowStart_2 = bwdIterStart_2 | (bwdEndRow_1 & fwdEndRow_1);
        stallVr = 1'b0;
      end
    end
  endgenerate //  genPipeStall
  
  // No pipelining
  // When `LDEC_BWD_FWD_LATENCY>0 we need to stall just the fwd block to stagger
  // it relative to bwd block (this allows data written to VM RAM to be
  // written && begin become readable for the same location).
  generate
    if (`LDEC_BWD_FWD_LATENCY+`LDEC_PIPE_X > 0 && NUM_ABC_PIPES == 0) begin: genBypassStall1
      always @(posedge(clk) `LDEC_RESET_STR)
      begin : pBypassStall

        reg [31:0] tmp32;
        localparam C_BITS = numBits(maximum(1, `LDEC_BWD_FWD_LATENCY - 1));
        reg [C_BITS-1:0] countV;
        
        if (nReset == 1'b0) begin
          fwdPipeStall_1_r <= 1'b0;
//          if (`LDEC_RESET_ALL) begin
            countV = `LDEC_PAD(1'b0, C_BITS-1);
//          end
        end else begin   
          // Look for endRow begin add fwd stalls
          if (endRow == 1'b1) begin
            fwdPipeStall_1_r <= 1'b1;
            tmp32 = `LDEC_BWD_FWD_LATENCY+`LDEC_PIPE_X - 1;
            countV = tmp32[C_BITS-1:0];
          end
          else if (fwdPipeStall_1 == 1'b1) begin
            if (countV == 0) begin
              fwdPipeStall_1_r <= 1'b0;
            end
            else begin
              countV = countV - `LDEC_PAD(1'b1, C_BITS-1);
            end
          end
        end
      end //pBypassStall
      assign fwdPipeStall_1 = fwdPipeStall_1_r;
    end
  endgenerate //  genBypassStall1

  generate
    if (`LDEC_BWD_FWD_LATENCY+`LDEC_PIPE_X == 0 && NUM_ABC_PIPES == 0) begin: genBypassStall0
      assign fwdPipeStall_1 = 1'b0;
    end
  endgenerate

  always @(posedge(clk), negedge(nReset))
  begin : pRunningCount
    localparam DRC_BITS = numBits(`LDEC_MAX_ITER*8)+1;
    localparam DE_BITS =  DIFF_BITS + 3;
      
    reg  [2:0]             endCounterV;
    reg  [BN_HI:0]         blkV;
    // Difference in number of iterations (measured in eighths) from one block
    // to the next. 63 seems a reasonable upper limit.
    reg [DIFF_BITS-1:0]    diffV;
    reg [ITERS8_BITS-1:0]  diffV1;
    reg atEndV;
    reg [numBits(`LDEC_MAX_ITER)-1:0] borrowItersV;
    reg [DRC_BITS-1:0]     drcV;
    reg                    decodeReallyStartPendingV;
    reg [DE_BITS-1:0]      deV;

    if (nReset == 1'b0) begin
      decodeReallyStartPendingV = 1'b0;
      decodeReallyStart <= 1'b0;
      decodeReallyStartD1 <= 1'b0;
      decodeReallyStartD2 <= 1'b0;
//      if (`LDEC_RESET_ALL) begin
        borrowItersV = `LDEC_PAD(1'b0, numBits(`LDEC_MAX_ITER)-1);       
        dynRunningCount <= `LDEC_PAD(1'b0, ITERS8_BITS-1);
        diffV = `LDEC_PAD(1'b0, DIFF_BITS-1);
        endCounterV = 3'd0;
        atEndV = 1'b0;
//      end
    end else begin
      // The calc of dynRunningCount is done at the start of decode. We want
      // the true start of decode, which is when ip is available, the op
      // buffer is !locked (prev-prev block is still outputting so prev
      // block is held in the decoder), && of course prev decode has complete.
      decodeReallyStart <= 1'b0;
      if (decodeStart) begin
        if (! waitingForIo) begin
          decodeReallyStart <= 1'b1;
        end else begin
          decodeReallyStartPendingV = 1'b1;
        end
      end else if (decodeReallyStartPendingV && !waitingForIo) begin
        decodeReallyStart <= 1'b1;
        decodeReallyStartPendingV = 1'b0;
      end
      decodeReallyStartD1 <= decodeReallyStart; 
      decodeReallyStartD2 <= decodeReallyStartD1; 
        
      if (decodeReallyStartD2 == 1'b1) begin        
        if (atEndV == 1'b1) begin
          drcV = `LDEC_PAD(endRunningCount, DRC_BITS-ITERS8_BITS);
        end
        else begin
          deV =  diffV*endCounterV;
          drcV = {1'b0, maxRunningCount} - deV[DRC_BITS-1:0];            
          if (`LDEC_PAD_SUPPORT) begin
            drcV = drcV + {1'b0, borrowItersV, 3'b0};
          end
          if (drcV > {`LDEC_MAX_ITER, 3'd0}) begin
            drcV = {`LDEC_MAX_ITER, 3'd0};
          end
        end
        dynRunningCount <= drcV[ITERS8_BITS-1:0];
      end
      
      // This comes the cycle before decodeReallyStartD2 code above but we put it after
      // so synthesis knows the registered versions of the variables are used.
      if (decodeReallyStartD1) begin

        // Interpolation between max and end running count near packet end.
        atEndV = 1'b0;
        // get number of blocks (+1) to be processed after this one.
        blkV = packetLenArray[ctrlUserDec] - curBlkNumArray[ctrlUserDec];
        if (blkV > `LDEC_PAD(3'd4, BN_HI-2)) begin
          endCounterV = 3'd0;
        end
        else if (blkV <= `LDEC_PAD(1'b1, BN_HI)) begin
          atEndV = 1'b1;
        end else begin
          endCounterV = 3'd5 - blkV[2:0];
        end
        // This is the delta in running count after each decode.
        diffV1 = (maxRunningCount - endRunningCount) >> 2'd2;
        diffV = `LDEC_CLIP_U(diffV1, ITERS8_BITS, DIFF_BITS);

        // If, after padding removal, this is the last block && one || more 
        // blocks is being dropped, begin borrow iterations. Note lastActualBlk
        // was updated a cycle earlier for the current block.
        borrowItersV = `LDEC_PAD(1'b0,  numBits(`LDEC_MAX_ITER)-1);
        if (`LDEC_PAD_SUPPORT) begin
          if (lastActualBlk) begin
            borrowItersV = `LDEC_MIN(nomIterations >> 2'd3, endRunningCount >> 2'd3);
          end
        end
      end
      if (enable == 1'b0) begin
        decodeReallyStartPendingV = 1'b0;
        decodeReallyStart <= 1'b0;
        decodeReallyStartD1 <= 1'b0;
        decodeReallyStartD2 <= 1'b0;
      end
    end

  end //pRunningCount

  //---------------------------------------------------------------------------
  // Parity checking  end-condition
  //---------------------------------------------------------------------------

  // Dynamically adjust the maximum running count as the end of packet;
  // approached.

  // parityErrs is updated at T+1 in Bwd block and we use the parity update strobe
  // that is synchronous with it to ensure correct timing.
  wire blockOpCompleteP;
  reg  blockOpCompleteD;
  assign blockOpCompleteP = blockOpComplete & !blockOpCompleteD;
  
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pPar
    
    reg                                      armedForEndIterV;
    reg signed [RC_BITS:0]                   runningCountV;
    reg                                      terminateV;
    reg signed [ITERS8_BITS:0]               iterationsV;
    reg [NI_HI:0]                            numIterationsV;
    integer idx;
    
    if (nReset == 1'b0) begin
      packetStatusOut <= 1'b1;
      subPacketFailed <= 1'b0;
      blkErrs <= `LDEC_PAD(1'b0, BN_HI);
      newBlk <= 1'b0;
      newBlkPulse <= 1'b0;
      armedForEndIterV = 1'b0;
      curIterationOut <= `LDEC_PAD(1'b0, NI_HI);     
      ipBufferAlreadyLoaded <= 1'b0;
      runningCount <= `LDEC_PADS(1'sb0, RC_BITS-1);
      prevDecodeStatusOut <= 1'b0;
      prevIterationsOut <= `LDEC_PAD(1'b0, NI_HI);
      prevParityErrsOut <= `LDEC_PAD(1'b0, PE_HI);
      blkPingPongE        <= 1'b0;
      for (idx=0;idx<NUM_USERS;idx=idx+1)
        curBlkNumArray[idx] <= `LDEC_PAD(1'b0, BN_HI);
      decodeComplete <= 1'b0;
      ctrlUserOp <= 5'd0;
      enableOp <= 1'b0;
      ipSkipBlockPending <= 3'd0;
      ipSkipBlockUser <= 5'd0;
      blockOpCompleteD <= 1'b0;
    end else begin
      newBlkPulse <= 1'b0;
      numIterationsV = curIterationOut;
      if (enable == 1'b0) begin
        packetStatusOut <= 1'b1;
        subPacketFailed <= 1'b0;
        blkErrs <= `LDEC_PAD(1'b0, BN_HI);
        newBlk <= 1'b0;
        armedForEndIterV = 1'b0;
        numIterationsV = `LDEC_PAD(1'b0, NI_HI);
        runningCount <= `LDEC_PADS(1'b0, RC_BITS-1);
        prevDecodeStatusOut <= 1'b0;
        prevIterationsOut <= `LDEC_PAD(1'b0, NI_HI);
        prevParityErrsOut <= `LDEC_PAD(1'b0, PE_HI);
        blkPingPongE        <= 1'b0;
        for (idx=0;idx<NUM_USERS;idx=idx+1)
          curBlkNumArray[idx] <= `LDEC_PAD(1'b0, BN_HI);
        decodeComplete <= 1'b0;
        ipBufferAlreadyLoaded <= 1'b0;       
        ctrlUserOp <= 5'd0;
        enableOp <= 1'b0;
        ipSkipBlockPending <= 3'd0;
        ipSkipBlockUser <= 5'd0;
        blockOpCompleteD <= 1'b0;
      end
      else begin
        blockOpCompleteD <= blockOpComplete;
        if ((blockOpCompleteP==1'b1) && (newBlk==1'b0))
          enableOp <= 1'b0;

        // If ipSkipBlock is received during a decode (i.e. the IP would have been stalled if this decoder was selected), 
        // wait end of decode before updating curBlkNumArray.
        if (ipSkipBlock==1'b1) begin
          if ((decodeActive==1'b1) && (ctrlUserIn==ctrlUserDec)) begin
            ipSkipBlockPending <= ipSkipBlockPending + 3'd1;
            ipSkipBlockUser <= ctrlUserIn;
          end else begin
            curBlkNumArray[ctrlUserIn] <= curBlkNumArray[ctrlUserIn] + `LDEC_PAD(1'b1, BN_HI);
          end
        end

        runningCountV = $signed({$signed(runningCount[RC_BITS-1]), runningCount});
        // Cancel negative running count if we had to wait for on-air data.
        if ((runningCountV < `LDEC_PADS(1'sb0, RC_BITS)) && decodeStartD1 &&
            ((beatTimeLine && waitFirstWordOfSymReg) ||
             (! beatTimeLine && firstBlkOfSym)))
        begin
          runningCountV = `LDEC_PADS(1'sb0, RC_BITS);
        end
        // enLostTime mode. Compensate for time lost due to IO stalls.
        if (transferLostTime) begin
          runningCountV = runningCountV + `LDEC_PADS(lostIterationsRoundUp, RC_BITS-LOST_ITERS_BITS);
        end
        runningCount <= `LDEC_CLIP_S(runningCountV, RC_BITS+1, RC_BITS);
        
        // This is the regular end of iteration processing, however if
        // a decode has finished, but the output buffer has not been
        // emptied from the previous decode, then it is bypassed. newBlk remains
        // high, which holds the decoder in a ready to start state until the
        // output buffer becomes free.
        if (! (newBlk & opBufferLocked)) begin
          // Signal that there is new output, but only after the output buffer
          // becomes free.
          decodeComplete <= 1'b0;
          if (newBlk == 1'b1) begin
            newBlk <= 1'b0;
            if (firstBlk == 1'b0) begin
              // This filters out the first newBlk at start of first decode.
              decodeComplete <= 1'b1;
              // If there is a skip block on the same cycle (stiil unregistered), add it here because this code overrides the ipSkipBlock code above
              // If there is a pending block skip for the current user, add 2, else 1.
              if (ctrlUserDec==ipSkipBlockUser)
                curBlkNumArray[ctrlUserDec] <= curBlkNumArray[ctrlUserDec]
                                               + `LDEC_PAD(1'b1, BN_HI)
                                               + `LDEC_PAD(ipSkipBlockPending, BN_HI-2)
                                               + `LDEC_PAD(ipSkipBlock, BN_HI-1);
              else
                curBlkNumArray[ctrlUserDec] <= curBlkNumArray[ctrlUserDec] + `LDEC_PAD(1'b1, BN_HI) + `LDEC_PAD(ipSkipBlock, BN_HI-1);
              blkPingPongE <= ~blkPingPongE;
              ipSkipBlockPending <= 3'd0;
            end
          end
          // First enable after all disabled
          if (enableLast == 1'b0) begin
            newBlk <= 1'b1;
            newBlkPulse <= 1'b1;
            blkPingPongE <= 1'b0;
          end
          for (idx=0;idx<NUM_USERS;idx=idx+1) begin
            if ((enabledUsersLast[idx]==1'b0) && (enabledUsers[idx] == 1'b1)) begin
              curBlkNumArray[idx] <= `LDEC_PAD(1'b0, BN_HI);
              if (ipSkipBlockUser==idx)
                ipSkipBlockPending <= 3'd0;
            end
          end
          
          if (bwdEndIter == 1'b1) begin
            armedForEndIterV = 1'b1;
          end
          if (parityErrsUpdate == 1'b1 || iterationAbort2 == 1'b1) begin        
            // End of iteration processing.
            if (armedForEndIterV == 1'b1 || iterationAbort2 == 1'b1) begin
              armedForEndIterV = 1'b0;
              if ({1'b0, numIterationsV} < MAX_ITER1) begin
                numIterationsV = numIterationsV + `LDEC_PAD(1'b1, NI_HI);
              end
              runningCountV = $signed({$signed(runningCount[RC_BITS-1]),
                                       runningCount}) + `LDEC_PADS(5'sd8, RC_BITS-4);   
              prevParityErrsOut <= parityErrs;
    
              //-----------------------------------------------------------------
              // The end (of decode) conditions
              //-----------------------------------------------------------------
              
              terminateV = 1'b0;
              if ((parityErrs == `LDEC_PAD(1'b0, PE_HI)) || (iterationAbort2 == 1'b1) ||
                  (runningCountV+`LDEC_PADS(5'sd8, RC_BITS-4) > $signed({5'b00000, dynRunningCount})) ||
                  // this is for when we started with a negative running count.
                  // The above could burn up the 'gained' latency so set an
                  // absolute limit on number of iterations.
                  ({numIterationsV, 3'd0} > maxRunningCount)) begin
                terminateV = 1'b1;
              end
              else if (`LDEC_EARLY_TERM_SUPPORT) begin
                // Terminate early if poor progress is being made. This early 
                // stopping is disabled when (nomIterations === maxRunningCount).
                if ((numIterationsV >= earlyTestIterations) &&
                    (nomIterations < maxRunningCount)) begin
                  if (`LDEC_PAD(parityErrs, numBits(`LDEC_K_MAX) -
                                numBits(`LDEC_PAR_ERRS_MAX)) > parityThresh) begin
                     
                    terminateV = 1'b1;                  
                  end
                end
              end              
              if (subPacketFailed) begin
                terminateV = 1'b1;
              end
              //-----------------------------------------------------------------
              // The end (of decode) processing
              //-----------------------------------------------------------------
              if (terminateV == 1'b1) begin
                // This is the end of this decode. 'prev' status registers report
                // the status of the decode just done.
                if ((parityErrs == `LDEC_PAD(1'b0, PE_HI)) && (iterationAbort2 == 1'b0)) begin
                  prevDecodeStatusOut <= 1'b1;
                end else begin
                  prevDecodeStatusOut <= 1'b0;
                  packetStatusOut <= 1'b0;
                  subPacketFailed <= 1'b1;
                  blkErrs <= blkErrs + `LDEC_PAD(1'b1, BN_HI);
                end
                prevIterationsOut <= numIterationsV;
                prevParityErrsOut <= parityErrs;
                numIterationsV = `LDEC_PAD(1'b0, NI_HI);
                newBlk <= 1'b1;
                newBlkPulse <= 1'b1;
                if (numBuffersFull == 2'd2) begin
                  ipBufferAlreadyLoaded <= 1'b1;
                end else begin
                  ipBufferAlreadyLoaded <= 1'b0;                  
                end
                // Enable output block to catch newBlk, 1cc before decodeComplete
                ctrlUserOp <= ctrlUserDec;
                enableOp <= 1'b1;

                if ((`LDEC_SPECIAL_AGG_SUPPORT==1'b1) && (endSubPacket !=
                      `LDEC_PAD(1'b0, numBits(`LDEC_MAX_PACKET_BITS-1)-1))) begin
                  // Clear subPacketFailed at the start of each sub-packet.
                  if (packetDataBitCount == endSubPacket) begin
                    // Last block of subpacket does !cross into the next.
                    subPacketFailed <= 1'b0;
                  end
                  else if (packetDataBitCount > endSubPacket) begin
                    // block crosses sub-packet boundaries && a parity fail
                    // affects both sub-packets.                    
                    if ((parityErrs == `LDEC_PAD(1'b0, PE_HI)) && !iterationAbort2) begin
                      subPacketFailed <= 1'b0;
                    end
                    else begin
                      subPacketFailed <= 1'b1;
                    end
                  end
                end else begin
                  subPacketFailed <= 1'b0;
                end

                // On blocks that are not the last of the OFDM symbol we allow
                // the running count to go negative, which allows 'saved'
                // latency to be passed onto the immediately following blocks.
                iterationsV = $signed({1'sb0, $signed(`LDEC_MIN(dynRunningCount, nomIterations))});
                runningCountV = runningCountV - `LDEC_PADS(iterationsV, RC_BITS-ITERS8_BITS);
              end
              runningCount <= `LDEC_CLIP_S(runningCountV, RC_BITS+1, RC_BITS);
            end                     // armedForEndIterV
          end                       // parityErrsUpdate
        end                         // !newBlk
      end                           // enable
      curIterationOut <= numIterationsV;
    end
  end //pPar

  //---------------------------------------------------------------------------
  // Detect first use of a metric
  //---------------------------------------------------------------------------

  // Signal the first use of a metric with T-1 timing. This causes the
  // varMetrics to fetched (in the vmMem block) from the input RAM instead
  // of the decode RAM.
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pFirstUse
    if (nReset == 1'b0) begin
      metricVeryFirstUseVec <= `LDEC_PAD(1'b0, `LDEC_NCOLS-1);
      metricVeryFirstUse <= 1'b1;
    end else begin
      if (newBlk == 1'b1) begin
        metricVeryFirstUseVec <= {`LDEC_NCOLS {1'b1}};
        metricVeryFirstUse <= 1'b1;
      end
      else if (fwdStall_1 == 1'b0 && decodeActive_1 == 1'b1) begin
        metricVeryFirstUse <= metricVeryFirstUse_1;
        if (fwdNoOp_1 == 1'b0) begin
          metricVeryFirstUseVec[fwdCol_1] <= 1'b0;
        end
      end
    end
  end //pFirstUse

  assign metricVeryFirstUse_1 = (`LDEC_PAD(fwdCol_1, 32-numBits(`LDEC_NCOLS)) < `LDEC_NCOLS) ?
                                metricVeryFirstUseVec[fwdCol_1] : 1'b0;
  
  //-------------------------------------------------------------------------
  // Scaling support.
  //-------------------------------------------------------------------------

  // Scaling works by comparing varMetricSum with a threshold.
  // varMetricSum is guaranteed to be stable by the end of the fwd
  // iteration. It comes from the bwd block, which is a row behind the fwd
  // block, however the varMetricSum is updated on the 'last' use of a
  // varMetric, where the last row is excluded from consideration.
  // The scaling flag is used in the fwd block && must therefore update
  // on iteration boundaries.
  
  always @(posedge(clk) `LDEC_RESET_STR)
  begin : pScaling
    
    reg firstIterV;
    reg [`LDEC_VAR_BITS-1+7:0] targetLevelV;
    
    if (nReset == 1'b0) begin
      varMetricChkSum <= `LDEC_PAD(1'b0, `LDEC_VAR_BITS+5);
      scaleFlagOut <= 1'b0;
      firstIterV = 1'b0;
      fwdLlrUnity <= `LDEC_PAD(1'b0, `LDEC_CHK_BITS-1);
    end else begin
      if (fwdIterStart_1 == 1'b1 && fwdStall_1 == 1'b0) begin
        if (fwdFirstIteration_1 == 1'b1) begin
          if (blkPingPongS) begin
            fwdLlrUnity <= llrUnity0;
          end else begin
            fwdLlrUnity <= llrUnity1;
          end
          scaleFlagOut <= 1'b0;
          firstIterV = 1'b1;
        end
        else begin
          // Generate a check sum, which is a strong measure of exact matching
          // against the reference model. Unsigned vector is used so that the
          // sum may wrap (this is also modelled in the C reference).
          if (firstIterV == 1'b1) begin
            firstIterV = 1'b0;
            varMetricChkSum <= varMetricSum[`LDEC_VAR_BITS+5:0];
          end
          else begin
            varMetricChkSum <= varMetricChkSum + varMetricSum[`LDEC_VAR_BITS+5:0];
          end
          // Decide whether to scale back.
          if (bwdFirstIteration_1) begin
            targetLevelV = targetLevel >> 1;
          end else begin
            targetLevelV = targetLevel;            
          end          
          if (varMetricSum > targetLevelV) begin
            scaleFlagOut <= 1'b1;
            // Scale by beta. Allow it to eventually go to zero.
            if (fwdLlrUnity == `LDEC_PAD(1'b1, `LDEC_CHK_BITS-2)) begin
              fwdLlrUnity <= `LDEC_PAD(1'b0, `LDEC_CHK_BITS-2);
            end
            else begin
              fwdLlrUnity <= roundNoClipC(fwdLlrUnity);
            end
          end
          else begin
            scaleFlagOut <= 1'b0;
          end
        end
      end                           // fwdIterStart_1
    end
  end //pScaling

  assign bwdRowStartOut_1 = bwdRowStart_1;
  assign bwdRowStartOut_2 = bwdRowStart_2;
  assign fwdStallOut = fwdStall | !decodeActive;
  assign bwdStallOut = bwdStall | !decodeActive;
  assign fwdStallOut_1 = fwdStall_1 | !decodeActive_1;
  assign bwdStallOut_1 = bwdStall_1 | !decodeActive_1;
  assign stallVrOut = stallVr;
  assign fwdNoOpOut = fwdNoOp;
  assign bwdNoOpOut = bwdNoOp;
  assign fwdNoOpOut_1 = fwdNoOp_1;
  assign bwdNoOpOut_1 = bwdNoOp_1;
  assign fwdLlrUnityOut = fwdLlrUnity;
  assign fwdCellIdxOut = fwdCellIdx;
  assign bwdCellIdxOut = bwdCellIdx;
  assign fwdRowStartOut = fwdRowStart & decodeActive;
  assign bwdRowStartOut = bwdRowStart;
  assign bwdEndIterOut = bwdEndIter;
  assign fwdFirstIterationOut = fwdFirstIteration;
  assign fwdFirstIterationOut_1 = fwdFirstIteration_1;
  assign fwdIterStartOut_1 = fwdIterStart_1;
  assign blkPingPongEOut = blkPingPongE;
  assign blkErrsOut = blkErrs;
  assign metricVeryFirstUseOut = metricVeryFirstUse;
  assign metricVeryFirstUseOut_1 = metricVeryFirstUse_1 & decodeActive_1;
  assign decodeActiveOut = decodeActive;
  assign decodeActiveOut_1 = decodeActive_1;
  assign decodeActiveOut_2 = decodeActive_2;
  assign holdFwdCellOut_1 = holdFwdCell_1;
  assign holdBwdCellOut_1 = holdBwdCell_1;
  assign varMetricChkSumOut = varMetricChkSum;
  assign newBlkPulseOut = newBlkPulse;
  assign firstBlkOut = firstBlk;
  assign blkPingPongSOut = blkPingPongS;
  assign decodeActiveOut_2D = decodeActive_2D3;
  assign bwdFirstIterationOut_1 = bwdFirstIteration_1;
  assign decodeCompleteOut = decodeComplete;
  assign setRdyToRcvOut  = setRdyToRcv || setRdyToRcvIp;
  assign decodeStartOut  = decodeStart;  
  
  // RW MODIFIED
  wire parErrAboveThres;
  assign parErrAboveThres = (`LDEC_PAD(parityErrs, numBits(`LDEC_K_MAX) - numBits(`LDEC_PAR_ERRS_MAX)) > parityThresh) ? 1'b1: 1'b0;
  assign dbgIter = {parErrAboveThres,
                    decodeActiveOut,
                    waitingForIp,
                    curIterationOut[4:0],
                    runningCount[7:0]};
  assign dbgBlkNum = curBlkNumArray[0][7:0];
  // END RW MODIFIED
  
endmodule

  
