//---------------------------------------------------------------------------
// ldpcEncOp.v
// 
// Description:
//   Fetches data from the output buffer && delivers it to the external
//   downstream module. When clrToSend is high data will be presented along
//   with a strobe. Data is fetched from the memory once encDone;
//   received.
//
// Inputs:
//   enable        : The enable register bit.
//   zEnum         : Indicates which Z size is being used (from `Z_SIZES) list.
//   k etc         : Various register values.
//   clrToSend     : Downstream HW ready to accept output.
//   opBufferData  : Read data from HD mem.
//   encDone       : Pulse at the end of an encode. 
// Outputs:
//   opStrobe      : When high a valid output sample is presented. This;
//                   deemed to have been accepted if clrToSend is high on
//                   the following clock edge at which point a new sample
//                   will be presented.
//   opDataWord    : 8 bits of the output data, presented in the same order
//                   that the data was received, lsb of first word being the
//                   first bit in time.
//   opBufferSel   : Select the ENC RAM for read.
//   opBufferAddr  : The address for the ENC RAM.
//   opActive      : Held high while in output mode.
//
// 28 Aug 2010 M. Rumsey. Created.
//
// (c) Copyright 2010, Blue Rum Consulting Limited, All Rights Reserved.
//------------------------------------------------------------------------------

`include "ldpcEnc.vh"

module ldpcEncOp
  (  
     input                                   nReset,
     input                                   opClk,
     input                                   disabling,
     input                                   enable,
     input [`NUM_MU_BITS-1:0]                opUser,  
     // Fm Regs
     input [numBits(`Z_ENUM_MAX)-1:0]        zEnum,
     input [numBits(`K_MAX)-1:0]             k,
     input [numBits(`N_MAX)-1:0]             n,
     input [numBits(`Z_MAX)-1:0]             z, 
     input [numBits(`K_MAX)-1:0]             nShrtFloor,
     input [numBits(`MAX_BLKNUM)-1:0]        shrtMod,
     input [numBits(`M_MAX-1)-1:0]           nPuncFloor,
     input [numBits(`MAX_BLKNUM)-1:0]        puncMod,
     input [numBits(`REP_MAX-1)-1:0]         nRepFloor,
     input [numBits(`MAX_BLKNUM)-1:0]        repMod,
     input [numBits(`MAX_BLKNUM)-1:0]        packetLen,
     input [numBits(`OP_WIDTH)-1:0]          opWidth,
     // Fm Top
     input [numBits(`NCOLS)-1:0]             parityStartCol,
     input                                   clrToSend, 
     // Fm vmMem
     input [`Z_MAX-1:0]                      opBufferData,
     // Fm Ctrl
     input                                   opStart,
     output                                  opStrobeOut,
     output reg [`OP_WIDTH-1:0]              opDataWordOut,
     // To encMem
     output                                  pingPongOpOut,
     output                                  opBufferSelOut,
     output [`ENC_RAM_A-1:0]                 opBufferAddrOut,
     // To Regs/top
     output `blkNumArrayType                 curBlkNumOut,
     output reg [`NUM_MU-1:0]                packetCompleteOut,  
     output                                  setRdyToRcvOut,
     output                                  lastBlockOpSampleOut,
     output                                  lastOpSampleOut,
     output reg [numBits(`OP_WIDTH)-1:0]     numOpBitsOut,
     // To Ctrl
     output                                  opClkEnOut,
     output                                  opDoneOut,
     // To Debug
     output [`MEM_MUX_BITS-1:0]              memMuxOut,
     output [`MEM_FETCH_SIZE-1:0]            memDataWordOut); 

`include "ldpcEncFuncs.vh"

  localparam MFS           = `MEM_FETCH_SIZE;
  localparam NUM_OF_BITS   = `REP_MAX + `N_MAX + 2*`OP_WIDTH;
  localparam COL_REG_BITS  = `CR_OP_HI+1;
  localparam COL_REG_BITS2 = `CR_OP_HI+1 + `OP_WIDTH;
  localparam OP_WIDTH_BITS =  numBits(`OP_WIDTH);
  localparam BLKNUM_BITS   =  numBits(`MAX_BLKNUM);
  localparam NN = numBits(`N_MAX);
  localparam NK = numBits(`K_MAX);
  localparam NM = numBits(`M_MAX);
  localparam NR = numBits(`REP_MAX);
  localparam NT = numBits(NUM_OF_BITS); 
  
  reg [COL_REG_BITS-1:0]                     collectionReg;
  reg [`OP_WIDTH-1:0]                        collectionRegArray [0:`NUM_MU-1];
  reg                                        collectEnable;
  reg                                        waitForEncode;
  reg                                        opStrobe;
  reg  [numBits(COL_REG_BITS2)-1:0]          colRegBase [0:`NUM_MU-1];
  wire [numBits(COL_REG_BITS2)-1:0]          nextColRegBase;
  reg  [numBits(COL_REG_BITS2)-1:0]          colRegBaseSnapshot;
  reg  [numBits(ceilDiv(`Z_MAX, MFS)-1)-1:0] memMux;
  reg  [MFS-1:0]                             memDataWord;
  reg  [NN-1:0]                 numSamplesToCollect;
  reg  [numBits(MFS)-1:0]                    numSamplesToFetch;
  reg  [numBits(`Z_MAX)-1:0]                 numSamplesFromRow;
  reg  [numBits(`Z_MAX+MFS)-1:0]             numSamplesFromRowMFS;
  // bit counter as used in pCollect
  reg  [numBits(NUM_OF_BITS)-1:0]            numberOfBits;
  // bit counter and compare value as used in pCtrl (monitors what's fetched
  // from memory, whereas numberOfBits monitors at the output).
  wire [numBits(NUM_OF_BITS)-1:0]            totalNumOpBits; // compare val
  reg  [numBits(NUM_OF_BITS)-1:0]            bitCount;
  reg  [`ENC_RAM_A-1:0]                      opBufferAddr;
  reg                                        opBufferSel;
  reg                                        colRegPrimed;
  reg                                        fetchingData;
  reg                                        opDone;
  reg                                        lastOutput;
  wire                                       opClkEn;
  reg                                        lastCollectDone;
  wire                                       lastBlk;
  reg                                        firstCollect;
  reg                                        firstOutput;
  wire [NM-1:0]                 puncBits;
  wire [numBits(`REP_MAX)-1:0]               repBits;
  reg                                        jumpToParity;
  // Shortening calcs
  reg `blkNumType                            blkNumVec [0:`NUM_MU-1];  
  wire [numBits(`MAX_BLKNUM)-1:0]            blkNum;
  wire [NK-1:0]                 k2;
  reg  [NK-1:0]                 shrtBits;
  wire [NK-1:0]                 shrtBlkMod;
  reg                                        setRdyToRcv;
  reg                                        pingPongOp;

  genvar                                     idx1;
  
  // The clock is enabled on the opStart pulse in order to 'catch' it.
  // During a transfer the clock is stopped if clrToSend is dropped. The
  // opStrobe will still stay high as valid data is presented. The controller
  // can always fetch faster than it can deliver so opStrobe stays high until
  // the final data is delivered.
  assign opClkEn  = opStart | (clrToSend & !waitForEncode) ||
                    disabling || opDone || setRdyToRcv;

  assign puncBits = (blkNum < puncMod) ?
                    nPuncFloor + `PAD(1'b1, NM-1) : nPuncFloor;
  assign repBits = (blkNum < repMod) ?
                   nRepFloor + `PAD(1'b1, numBits(`REP_MAX)-1) : nRepFloor;

  assign totalNumOpBits  = `PAD(n, NT-NN) - `PAD(shrtBits, NT-NK) -
                           `PAD(puncBits, NT-NM) + `PAD(repBits, NT-NR);
  
  assign blkNum  = blkNumVec[opUser];

  //---------------------------------------------------------------------------
  // Work out shortening parameter
  //---------------------------------------------------------------------------

  assign shrtBlkMod = `PAD((blkNum < shrtMod ) ? 1'b1 : 1'b0, NK-1);

  // This is registered as otherwise it is a timing critical path.
  always @(posedge(opClk) `RESET_STR)
  begin : pShrtBits
    integer idx1;
    
    if (nReset == 1'b0) begin
      
      setRdyToRcv <= 1'b0;
      shrtBits <= `PAD(1'b0, NK-1);
      packetCompleteOut <= {`NUM_MU {1'b0}};
      pingPongOp <= 1'b0;
      `LENC_INITQ(blkNumVec, `PAD(1'b0, BLKNUM_BITS-1), `NUM_MU);
      
    end else begin
      
      setRdyToRcv <= 1'b0;
      if (opStart == 1'b1) begin
        shrtBits <= nShrtFloor+shrtBlkMod;
      end
      if (opDone == 1'b1) begin
        // We clear this down to prevent k2 going negative when k is changed.
        shrtBits <= `PAD(1'b0, NK-1);
        blkNumVec[opUser] <= blkNumVec[opUser] + `PAD(1'b1, BLKNUM_BITS-1);
        if (`DOUBLE_BUFFER) begin
          pingPongOp <= !pingPongOp;
        end
        if (lastBlk == 1'b1) begin
          packetCompleteOut[opUser] <= 1'b1;
        end
        else begin
          setRdyToRcv <= 1'b1;
        end
      end
      if (enable == 1'b0) begin
        setRdyToRcv <= 1'b0;
        shrtBits <= `PAD(1'b0, NK-1);
        `LENC_INITQ(blkNumVec, `PAD(1'b0, BLKNUM_BITS-1), `NUM_MU);
        packetCompleteOut <= {`NUM_MU {1'b0}};
        pingPongOp <= 1'b0;       
      end
    end
  end //pShrtBits

  assign lastBlk = (blkNum == packetLen-`PAD(1'b1, BLKNUM_BITS-1)) ? 1'b1 : 1'b0;
  
  assign k2  = k-shrtBits;

  
  localparam OW_CR = numBits(COL_REG_BITS2) - numBits(`OP_WIDTH);
  always @(posedge(opClk) `RESET_STR)
  begin : pCtrl
    reg [numBits(COL_REG_BITS2)-1:0]  colRegBaseV;
    reg [NN-1:0]         numSamplesToCollectV;
    reg [numBits(MFS)-1:0]            numSamplesToFetchV;
    reg [numBits(`Z_MAX)-1:0]         numSamplesFromRowV;
    reg [numBits(NUM_OF_BITS)-1:0]    bitCountV;
    reg                               jumpToParityV;
    reg                               lastCollectDoneV;
    reg                               collectConfirmedV;
    reg                               incrAddrV;
    integer                           idx1;
    
    if (nReset == 1'b0) begin
      
      waitForEncode <= 1'b1;
      `LENC_INITQ(colRegBase, `PAD(1'b0, numBits(COL_REG_BITS2)-1), `NUM_MU);
      fetchingData <= 1'b1;
      colRegPrimed <=1'b0;
      collectEnable <= 1'b0;
      colRegPrimed <= 1'b0;
      firstCollect <= 1'b1;
      lastCollectDone <= 1'b0;
      // RAM addressing
      memMux <= `PAD(1'b0, `MEM_MUX_BITS-1);     
      opBufferAddr <= `PAD(1'b0, `ENC_RAM_A-1);
      opBufferSel <= 1'b0;
      jumpToParity <= 1'b0;
      bitCount <= `PAD(1'b0, numBits(NUM_OF_BITS)-1);
      //if (`RESET_ALL) begin
      numSamplesToCollect <= `PAD(1'b0, NN-1);
      numSamplesToFetch <=  `PAD(1'b0, numBits(MFS)-1);
      numSamplesFromRow <= `PAD(1'b0, numBits(`Z_MAX)-1);
      numSamplesFromRowMFS <= `PAD(1'b0, numBits(`Z_MAX+MFS)-1);
      //end

    end else begin
      
      if (~enable || (opDone && !`CONCAT_OP)) begin
        // We only reset the pointer in the collection register at the start
        // of packet. This allows a partial output word from the previous block
        // to be concatenated with data from the following decode block.
        `LENC_INITQ(colRegBase, `PAD(1'b0, numBits(COL_REG_BITS2)-1), `NUM_MU);
      end
      
      if (opClkEn) begin       
        if (~enable || opDone) begin
          waitForEncode <= 1'b1;
          fetchingData <= 1'b1;
          // Number of bits collected
          bitCount <= `PAD(1'b0, numBits(NUM_OF_BITS)-1);
          // As above, but cycles back to 0 on repetitions.
          numSamplesToCollect <= `PAD(1'b0, NN-1);
          numSamplesToFetch <=  `PAD(1'b0, numBits(MFS)-1);
          numSamplesFromRow <= `PAD(1'b0, numBits(`Z_MAX)-1);
          numSamplesFromRowMFS <= MFS[numBits(`Z_MAX+MFS)-1:0];
          
          colRegPrimed <= 1'b0;
          collectEnable <= 1'b0;
          firstCollect <= 1'b1;
          // RAM addressing
          memMux <= `PAD(1'b0, `MEM_MUX_BITS-1);     
          lastCollectDone <= 1'b0;
          opBufferAddr <= `PAD(1'b0, `ENC_RAM_A-1);
          opBufferSel <= 1'b0;
          jumpToParity <= 1'b0;
        end
        else begin
          
          numSamplesFromRowV = `PAD(1'b0, numBits(`Z_MAX)-1);
          bitCountV = `PAD(1'b0, numBits(NUM_OF_BITS)-1);
          numSamplesToCollectV = `PAD(1'b0, NN-1);
          jumpToParityV = 1'b0;
          lastCollectDoneV = 1'b0;
          
          if (waitForEncode) begin
            if (opStart) begin
              // Move into output mode
              waitForEncode <= 1'b0; 
              opBufferSel <= 1'b1;
            end
          end
          else if (clrToSend && ~opDone) begin
            
            //------------------------------------------------------------------
            // Whether to enable the collection register.
            //------------------------------------------------------------------
            
            // Work out if there is enough space in the collection register if
            // we enable a collect.
            
            collectConfirmedV = 1'b0;
            numSamplesToFetchV = numSamplesToFetch;
            if (!lastCollectDone) begin
              // Limit num fetched samples at end of a Z row.
              if (numSamplesFromRowMFS >= z) begin
                numSamplesToFetchV = z - numSamplesFromRow;
                numSamplesToCollectV = numSamplesToCollect +
                  `PAD(numSamplesToFetchV, NN - numBits(MFS));
                numSamplesFromRowV = `PAD(1'b0, numBits(`Z_MAX)-1);
              end
              else begin
                numSamplesToFetchV = MFS[numBits(MFS)-1:0];
                numSamplesToCollectV = numSamplesToCollect +
                  `PAD(MFS[numBits(MFS)-1:0], NN-numBits(MFS));
                numSamplesFromRowV = numSamplesFromRowMFS;
              end
              // But if we have read into the shortened region skip shortening.
              if (numSamplesToCollectV >= k2 && fetchingData) begin
                jumpToParityV = 1'b1;   // register later
                numSamplesToCollectV = k2;
                numSamplesToFetchV = k2 - numSamplesToCollect;
                // parity is on new row.
                numSamplesFromRowV = `PAD(1'b0, numBits(`Z_MAX)-1);                
              end
              // Detect the last time that we need to fetch data from the
              // encoder memory.
              bitCountV = bitCount + `PAD(numSamplesToFetchV,
                                          numBits(NUM_OF_BITS)-numBits(MFS));
              if (bitCountV >= totalNumOpBits && !firstCollect) begin
                numSamplesToFetchV = totalNumOpBits - bitCount;
                lastCollectDoneV = 1'b1;   // register later               
              end
              // begin the next step is to check we stop a word early.
              // colRegBase is where the data will be put. Do a test increment
              // of this to see if there is space.
              if (firstCollect) begin
                collectConfirmedV = 1'b1;
                // leave colRegBase at 0.
              end
              else begin
                // Here we assume MFS samples (not numSamplesToCollectV) will
                // be loaded. This simplification reduces the complexity if
                // the collection process.
                colRegBaseV = colRegBase[opUser];
                if (collectEnable) begin
                  colRegBaseV = nextColRegBase; // What's being loaded now.
                end
                if (colRegPrimed) begin  // delays op to 'stock' the buffer
                  colRegBaseV = colRegBaseV -
                    `PAD(opWidth, numBits(COL_REG_BITS2)-numBits(`OP_WIDTH));
                end
                // What if we load more?
                if (colRegBaseV <= COL_REG_BITS[numBits(COL_REG_BITS2)-1:0] -
                                     MFS[numBits(COL_REG_BITS2)-1:0]) begin
                  // Allow collection
                  collectConfirmedV = ~lastCollectDone;                  
                end
              end
            end
            if (collectConfirmedV) begin              
              collectEnable <= 1'b1;
              lastCollectDone <= lastCollectDoneV;
              numSamplesToCollect <= numSamplesToCollectV;
              numSamplesToFetch <= numSamplesToFetchV;
              numSamplesFromRow <= numSamplesFromRowV;
              numSamplesFromRowMFS <= numSamplesFromRowV + MFS[numBits(`Z_MAX)-1:0];
              bitCount <= bitCountV;
              if (jumpToParityV) begin
                fetchingData <= 1'b0;
              end
            end
            else begin
              collectEnable <= 1'b0;
              numSamplesToFetch <= `PAD(1'b0, numBits(MFS)-1);
            end
            if (opBufferSel) begin            
              firstCollect <= 1'b0;
            end
            //---------------------------------------------------------------
            // Counters synchronous to the fetch
            //---------------------------------------------------------------
            if (collectEnable) begin
              // Allow the collection reg to get more than one sample so it can
              // handle a small amount of data being fetched at the transition
              // past the shortened region.
              colRegPrimed <= 1'b1;
            end
            //-----------------------------------------------------------------
            // Collection register base.
            //------------------------------------------------------------------
            // Where collected data goes (colRegBase) on the next cycle
            // (colRegBase), depends on the previous base, how much data was
            // previously collected and how much was output.
            colRegBaseV = colRegBase[opUser];
            if (collectEnable) begin
              colRegBaseV = nextColRegBase;
            end
            // Decrement colRegBase if there is to be an output sample.
            // Generally this is signified by output regEnable but if the LDPC
            // block does not fill a whole number of output words begin the
            // output will be held over till the next block.
            if (colRegPrimed && (colRegBaseV >= `PAD(opWidth, OW_CR))) begin 
              colRegBaseV = colRegBaseV - `PAD(opWidth, OW_CR);
            end
            colRegBase[opUser] <= colRegBaseV;
            
            //------------------------------------------------------------------
            // Manage RAM addressing
            //------------------------------------------------------------------
            
            // memMux is used combinatorially while Addr is a cycle earlier.
            // Addr   MemMux (3 section example)
            // 0      0
            // 0      0  (First data now presented)
            // 0      1
            // 1      2
            // memMux can be incremented if the RAM data has been collected,
            // however addr must be incremented only if the RAM data *will be*
            // collected, hence the separation by collectEnable and
            // collectConfirmedV.
            
            if (collectEnable) begin
              
              if (memMux == MEM_MUX_MAX(zEnum) || jumpToParity ||
                  lastCollectDone) begin
                memMux <= `PAD(1'b0, `MEM_MUX_BITS-1);
              end
              else begin
                memMux <= memMux + `PAD(1'b1, `MEM_MUX_BITS-1);
              end                                 
            end                     // collect enable
            opBufferSel <= 1'b0;     
            if (collectConfirmedV) begin
              // Update address. The Addressing order;
              // 1. all data
              // 2. last Z row
              // 3. remaining Z rows, ending on last but one.
              
              if (jumpToParityV) begin // Step 2 - last Z row
                // Due to RAM fetch latency we use the 'variable' to move the
                // address early. memMux gets set to 0 the cycle after.
                opBufferAddr <= `P1_BASE;
                opBufferSel <= 1'b1;
              end
              else if (!jumpToParity ||
                       MEM_MUX_MAX(zEnum) == `PAD(1'b0, `MEM_MUX_BITS-1)) begin
                // Here we increment the address, but !e that after we
                // have jumped to the parity part we know for sure that
                // we (normally) want to stay at the P1 address for several
                // cycles so there is no need to run this code. This avoids 
                // being falsely trigged to increment the address a second
                // time based on a spurious value of memMix.
                
                incrAddrV = 1'b0;
                if (MEM_MUX_MAX(zEnum) == `PAD(1'b0, `MEM_MUX_BITS-1)) begin
                  // Special case of MFS>=z. Increment address any time
                  // a collect is confirmed.
                  // collconf  --
                  // collectEn   --
                  // rdData      --
                  // addr      --   (so when collconf addr can be incremented).
                  incrAddrV = 1'b1;
                end
                else begin
                  if  // Normal case, detect cycle before last mem mux value
                    (((memMux == MEM_MUX_MAX(zEnum)-
                       `PAD(1'b1, `MEM_MUX_BITS-1)) && collectEnable) ||
                     // Case where previous cycle was a no collect
                     ((memMux == MEM_MUX_MAX(zEnum)) && !collectEnable)) begin
                      incrAddrV = 1'b1;
                    end
                end
                if (incrAddrV) begin                  
                  // Final row
                  if (opBufferAddr == `NCOLS_LESS2) begin
                    // Final output is fetched. Loop back to the beginning
                    // in case reps are required.
                    fetchingData <= 1'b1;
                    opBufferAddr <= `PAD(1'b0, `ENC_RAM_A-1);
                    opBufferSel <= 1'b1;  
                    numSamplesToCollect <= `PAD(1'b0, NN-1);                    
                  end
                  else begin
                    opBufferSel <= 1'b1;  
                    // last Z row done, jump to step 3.
                    if (opBufferAddr == `P1_BASE) begin             
                      opBufferAddr <= parityStartCol;
                    end
                    else begin
                      // Routine increment of address.
                      opBufferAddr <= opBufferAddr + `PAD(1'b1, `ENC_RAM_A-1);
                    end
                  end
                end
              end        
              lastCollectDone <= lastCollectDoneV;
            end                       // collect confirmed            
            jumpToParity <= jumpToParityV;
          end                         // else waitForEncode
        end
      end
    end
  end //pCtrl

  //-------------------------------------------------------------------
  // Load collection register
  //-------------------------------------------------------------------

  always @(posedge(opClk) `RESET_STR)
  begin : pCollect
    
    localparam NUM_OF_BITS_BITS = numBits(NUM_OF_BITS);
    reg [COL_REG_BITS-1:0]          collectionRegV;
    reg [NUM_OF_BITS_BITS-1:0]      numberOfBitsV;
    reg                             lastOutputV;
    integer                         b;
    integer                         idx1;
    
    if (nReset == 1'b0) begin
      opDone <= 1'b0;
      opStrobe <= 1'b0;
      numberOfBits <= `PAD(1'b0, NUM_OF_BITS_BITS-1);
      numOpBitsOut <= `PAD(1'b0, numBits(`OP_WIDTH)-1);
      firstOutput <= 1'b0;
      collectionReg <= `PAD(1'b0, COL_REG_BITS-1);
      `LENC_INITQ(collectionRegArray, `PAD(1'b0, `OP_WIDTH-1), `NUM_MU);
      lastOutput <= 1'b0;
      //if (`RESET_ALL) begin
      colRegBaseSnapshot <= `PAD(1'b0, numBits(COL_REG_BITS2)-1);
      //end
    end else begin
      if (opClkEn == 1'b1) begin

        opDone <= 1'b0;          
        if (opDone) begin
          opStrobe <= 1'b0;
          numOpBitsOut <= `PAD(1'b0, numBits(`OP_WIDTH)-1);
          firstOutput <= 1'b0;
          lastOutput <= 1'b0;
        end

        collectionRegV = collectionReg;
      
        // Initialise the collection reg for the current user.
        if (opStart) begin
          if  (`CONCAT_OP)
            collectionRegV[`OP_WIDTH-1:0] = collectionRegArray[opUser];
          else
            collectionRegV[`OP_WIDTH-1:0] = 16'd0;
        end
        
        if (clrToSend && !opDone) begin

          opDone <= lastOutput;
          opStrobe <= 1'b0;
          lastOutput <= 1'b0;
          numOpBitsOut <= `PAD(1'b0, numBits(`OP_WIDTH)-1);
          if (firstCollect) begin
            firstOutput <= 1'b1;
            colRegBaseSnapshot <= colRegBase[opUser];
          end
          // After an 'output' any remaining data in the collection register
          //  must be shifted down.
          if (opStrobe) begin
            collectionRegV = shiftDownOp(collectionRegV, OP_WIDTH_BITS-1, 0, opWidth);
          end
          // New data is begin loaded at colRegBase.
          if (collectEnable == 1'b1) begin
            
            // With the 'if' this is the same as updatePart (so same as VHDL)
            // but this increases gate-count (though gate-count is still
            // better than with update part).
            if (colRegBase[opUser] <= COL_REG_BITS[numBits(COL_REG_BITS2)-1:0] -
                                     MFS[numBits(COL_REG_BITS2)-1:0]) begin
              collectionRegV[colRegBase[opUser] +: MFS] = memDataWord;
            end else begin
              collectionRegV[colRegBase[opUser] +: MFS] = `PAD(1'b0, COL_REG_BITS-1);
            end
          end
          
          // Track the number of bits that are about to be output. 
          lastOutputV = 1'b0;
          if (colRegPrimed == 1'b1 && lastOutput == 1'b0) begin
            if (firstOutput == 1'b1) begin
              //numberOfBitsV = puncBits + shrtBits + `OP_WIDTH;
              numberOfBitsV = `PAD(opWidth, NUM_OF_BITS_BITS-numBits(`OP_WIDTH));
              if (`CONCAT_OP) begin
                // We may be outputting data from the previous block so there is
                // less than `OP_WIDTH data.(others => (others => '0'))
                numberOfBitsV = numberOfBitsV -
                  `PAD(colRegBaseSnapshot, NUM_OF_BITS_BITS-numBits(COL_REG_BITS2));
              end
            end else begin
              numberOfBitsV = numberOfBits + `PAD(opWidth,  NUM_OF_BITS_BITS-numBits(`OP_WIDTH));
            end
            if (numberOfBitsV >= totalNumOpBits) begin
              lastOutputV = 1'b1;
            end
            numberOfBits <= numberOfBitsV;
          end
          // Generally we create an opStrobe after the outputEnable signal,
          // however there is also special handling required when the last bits
          // of the LDPC block do not fill a whole output words.
          if (colRegPrimed == 1'b1 && lastOutput == 1'b0) begin
            // Get a pointer to start of where 
            firstOutput <= 1'b0;
            // Generate op strobe except on last output in `CONCAT_OP mode.
            // Unless it is the last packet || the last output fully fills the
            // output word.
            if (lastOutputV == 1'b0 || !`CONCAT_OP ||
                (nextColRegBase >= `PAD(opWidth, numBits(COL_REG_BITS2)-numBits(`OP_WIDTH))) ||
                 lastBlk == 1'b1) begin
              opStrobe <= 1'b1;
              numOpBitsOut <= opWidth;
            end
            else begin
              // In thiscase (the partial output word) retained in order to  
              //  be joined with the next block except at the end of the packet.
              opStrobe <= 1'b0;
              opDone <= 1'b1;
            end                       // colRegBase
            // Number of output reg bits if !`OP_WIDTH
            if (nextColRegBase < `PAD(opWidth, numBits(COL_REG_BITS2)-numBits(`OP_WIDTH))) begin
              numOpBitsOut <= nextColRegBase[numBits(`OP_WIDTH)-1:0];
            end
            // If this is the last word and it is not CONCAT_OP mode, begin
            // zero any unused bits. Do this also for last block in
            // `CONCAT_OP mode.
            if (lastOutputV == 1'b1 && (!`CONCAT_OP || lastBlk == 1'b1)) begin
              // A partial word may be provided on the last block. Zero any
              // unused bits of the output word.
              for (b=0; b<=`OP_WIDTH-1; b=b+1) begin
                if ($unsigned(b[numBits(COL_REG_BITS2)-1:0]) >= nextColRegBase) begin
                  collectionReg[b[numBits(COL_REG_BITS2)-1:0]] <= 1'b0;
                end                
              end
            end
          end                         // colRegPrimed
          if (lastOutputV && `CONCAT_OP) begin
            // For `CONCAT_OP mode, save any left over bits for concatenation
            // with next block.
            collectionRegArray[opUser] <= collectionRegV[`OP_WIDTH-1:0];
          end          
          lastOutput <= lastOutputV;
        end
        collectionReg <= collectionRegV;
        if (!enable) begin
          opStrobe <= 1'b0;
          numOpBitsOut <= `PAD(1'b0, numBits(`OP_WIDTH)-1);
          firstOutput <= 1'b0;
          lastOutput <= 1'b0;
          opDone <= 1'b0;          
        end
      end
    end
  end //pCollect

  // This is a pointer to the first bit after the current output. This value;
  // needed in multiple places. Note it is only part of the calculation for the
  // next col reg base.
  assign nextColRegBase = colRegBase[opUser] + `PAD(numSamplesToFetch,
                                          numBits(COL_REG_BITS2)-numBits(MFS));  

  //---------------------------------------------------------------------------
  // Pick a word out of the memory read data.
  //---------------------------------------------------------------------------

  always @(opBufferData, memMux)
  begin : pMemMux
    integer m;
    //reg [`MEM_MUX_BITS-1:0] m; // risk of wrap.
    memDataWord = `PAD(1'b0, `MEM_FETCH_SIZE-1);
    for (m=0; m<=MUX_MAX; m=m+1) begin
      if (memMux == m[`MEM_MUX_BITS-1:0]) begin
        memDataWord = opBufferData[MFS*m +: MFS];
      end
    end
  end //pMemMux  

  // Gating sel with clrToSend effectively 'gates' the RAM at the same time
  // that the op block is gated. Necessary to make the clrToSend==0 cycle a
  // total NoOp.
  assign opBufferSelOut = opBufferSel & clrToSend;
  assign opBufferAddrOut = opBufferAddr;
  assign opStrobeOut = opStrobe;

  // zero unused bits up to 16
  always @(*)
  begin
    case (numOpBitsOut)
     5'd1    : opDataWordOut = {15'd0,collectionReg[0]};
     5'd2    : opDataWordOut = {14'd0,collectionReg[1:0]};
     5'd3    : opDataWordOut = {13'd0,collectionReg[2:0]};
     5'd4    : opDataWordOut = {12'd0,collectionReg[3:0]};
     5'd5    : opDataWordOut = {11'd0,collectionReg[4:0]};
     5'd6    : opDataWordOut = {10'd0,collectionReg[5:0]};
     5'd7    : opDataWordOut = { 9'd0,collectionReg[6:0]};
     5'd8    : opDataWordOut = { 8'd0,collectionReg[7:0]};
     5'd9    : opDataWordOut = { 7'd0,collectionReg[8:0]};
     5'd10   : opDataWordOut = { 6'd0,collectionReg[9:0]};
     5'd11   : opDataWordOut = { 5'd0,collectionReg[10:0]};
     5'd12   : opDataWordOut = { 4'd0,collectionReg[11:0]};
     5'd13   : opDataWordOut = { 3'd0,collectionReg[12:0]};
     5'd14   : opDataWordOut = { 2'd0,collectionReg[13:0]};
     5'd15   : opDataWordOut = { 1'd0,collectionReg[14:0]};
     5'd16   : opDataWordOut = {      collectionReg[15:0]};
     default : opDataWordOut = collectionReg[`OP_WIDTH-1:0];
    endcase 
  end

  assign opClkEnOut = opClkEn;
  // opDone is set as last output reg is presented, however if clrToSend had been
  // dropped begin the output has to be held until it is raised again - only at
  // that point has the output been sampled.
  assign opDoneOut = opDone;
  assign lastBlockOpSampleOut = lastOutput;
  assign lastOpSampleOut = lastOutput && lastBlk;
  `LENC_PACK(gBlk, blkNumVec, curBlkNumOut, BLKNUM_BITS, `NUM_MU)
  assign setRdyToRcvOut = setRdyToRcv;
  assign memMuxOut = memMux;
  assign memDataWordOut = memDataWord;
  assign pingPongOpOut  = pingPongOp;
  
endmodule




