//-----------------------------------------------------------------------
// ldpcDecPkg.v
// 
// Description
//   Constants and functions for the LDPC Decoder.        
//
// 5 Apr 2010 M. Rumsey
// 4 Apr 2011 M. Rumsey. Added USE_ALT_RAMS option.
//
// (c) Copyright 2010, Blue Rum Consulting Limited, All Rights Reserved.
//------------------------------------------------------------------------------

`ifdef BRC_TIMESCALE
`timescale 1ns/1ps
`endif

`ifndef ldpcDec_vh
`define ldpcDec_vh

// Macro version of minimum and maximum
`define LDEC_MIN(a,b) ((a<b)?a:b)
`define LDEC_MAX(a,b) ((a>b)?a:b)

// how to convert seconds to timeunits when reading vectors
`define LDEC_TU 1e9
// the Verilog time resolution factor, consistent with the timescale ratios above.
`define LDEC_TR 1000
`define LDEC_DBG_PORT_WIDTH  (32)

//---------------------------------------------------------------------------
// User configurable.
//---------------------------------------------------------------------------

// This is an easy way to set the pipelining
// for example use 0, 1, 2, 3 for ASIC 160MHz, 240MHz, 320MHz and 480MHz,
// or 4 for for FPGA.
`define LDEC_NUM_PIPES      (3)

// This ASIC/FPGA switch gives an easy way to switch between what is
// optimal on an ASIC and an FPGA. You may not have to change any other
// options. See the 'sel' functions in constants below for the choices
// made in each case.
`define LDEC_FPGA           (0)
//`define LDEC_CLK_GATING     (`LDEC_FPGA ? 0 : 1)
// LDEC_CLK_GATING inserts functional clock gating in the code: if it is set to 1 and the clock is not gated, functionality is not the same
`define LDEC_CLK_GATING     0
// RESET_ALL forces power-up resetting of signals that do not strictly need
// resetting.
// RESET_ALL=0. Control logic 'manages' these signals and ensures that they
//              are synchronously initialised before use, so power up
//              resetting is not required and gate-count is reduced.
// RESET_ALL=1. Forces these signals to be reset, perhaps because this is
//             your design policy.
// Note : RESET_STR_B also needs changing if you change RESET_ALL
// code updated to remove use of this define as not supported by SpyGlass
//`define LDEC_RESET_ALL    (1'b1)

// Uncomment the negedge line to get async resets, otherwise use the
// blank RESET_STR line option to get synchronous resets.
`define LDEC_RESET_STR      , negedge(nReset)
//`define LDEC_RESET_STR

// If RESET_ALL=false removes the need for a reset to an always block then
// the reset string (in the sensitivity list) needs to be removed.
// RESET_STR_B is the reset string used on such always blocks.
// If  RESET_ALL=0 use this
//`define LDEC_RESET_STR_B
// If  RESET_ALL=1 use this
`define LDEC_RESET_STR_B `LDEC_RESET_STR

// If your synthesis tool does not support recursive functions then comment this out
// `define LDEC_ALLOW_RECURSIVE

// Select 32 bit or 16 wide accesses (BUS 32 = 1/0 respectively).
// In 32 bit mode WRITE_EN and READ_EN become 2 bits, which allows you to
// set the 16 bit words within the 32 bit word individually or together.
`define LDEC_BUS32         (1'b1)
  
// Number of ports on the varResp and chkResp memories. Single port is less
// area but the technique requires that all code macro rows are the same
// length. Different code tables will be loaded (in the testbench) and
// there is some performance loss due to NoOp cycles introduced to
// make the code macro row weights the same.
`define LDEC_NUM_PORTS      (2)
// Comment or uncomment this to match. This allows cleaner removal of
// unused code.
`define LDEC_2_PORTS

// For the CELL RAM it is possible to use 2 SP RAMs instead of
// a 1 dual port RAM (where 1 wr port and 2 rd ports are used). For the
// 2 SP RAMs set the following to 1.
`define LDEC_USE_ALT_RAMS   (0)
// The cell Ram can also be replaced by a 'ROM'. The cell Ram defines the 
// codes to be used, so use of a Rom is fine if the block is only used for  
// the standard defined by the code (e.g 802.11n).
`define LDEC_USE_CELL_ROM
// RAMs may be located at the top of the design. May be useful for some ASIC
// design methodologies, however location with the functional logic
// may help grouping in implementation.
`define LDEC_RAMS_AT_TOP
 
// Pipelining options. 0/1 are used for disable/enable.
// For MINSUM_ALG=1 enable in order A, AB, XAB, XABC
// (or try as below, as there is often not much difference when MINSUM_ALG=1)
// For MINSUM_ALG=3 enable in order A, AB, ABC, XABC

// PIPE X is in the bwd block after the compare-selectect of the check metric.
// Note that if MINSUM_ARG=1 there is also an offset subtraction and
// PIPE X then becomes more worth doing.
`define LDEC_PIPE_X     (`LDEC_FPGA ? 1 : (`LDEC_NUM_PIPES > 2) ? 1 : 0)
    
// In ldpcDecFwd, PIPE A registering falls in the middle of the RotateDownZ
// function. This is a 'barrel shifter' where the registering can be moved
// to optimize timing. The registering is only relevant if PIPE_A=1.
// If PIPE_B is not set (as typical for ASIC) then the best timing split is
// likely to be roughly in the middle of the rotator. RDZ_LEVEL can have
// values from -1 to 6. Level 0 is the first and level 6 is the 7th,
// corresponding to the number of bits in the cyclical shift. -1 causes
// the register to preceed the rotator.
`define LDEC_PIPE_A    (`LDEC_FPGA ? 1 : (`LDEC_NUM_PIPES > 0) ? 1 : 0) 

// PIPE B may be set to 0, 1. PipeB is equivalent to PIPE A with RDZ_LEVEL =
// 6 as far as varMetric processing is concerned, but PipeB also pipelines
// checkResponses.
`define LDEC_PIPE_B     (`LDEC_FPGA ? 1 : (`LDEC_NUM_PIPES > 1) ? 1 : 0)

// PIPE C may be set to 0, 1. Note that when MINSUM_ALG = 0|1 there is
// very little logic in the stage. It is more appropriate to use this when
// MINSUM_ALG = 2|3.
`define LDEC_PIPE_C     (`LDEC_FPGA ? 0 : (`LDEC_NUM_PIPES > 3) ? 1 : 0)

// Control location of PIPE_A. See PIPE_A comments. Here we pick a reasonable
// default value depending on what pipelining is selected. The signal at PIPE_A
// is varMetricMid. If the critical path is before this signal then decrease
//  RDZ_LEVEL. If it is after then increase it.
`define LDEC_RDZ_LEVEL  (`LDEC_PIPE_X ? 0 : `LDEC_PIPE_B ? -1 : 3)

// BWD_FWD_LATENCY refers to the process of passing data from bwd to fwd blocks.
// Latency 0: fwd & bwd blocks are in sync and may be processing the same
//            code columns. Data is passed combinatorially (see ldpcDecVmMem).
// Latency N: There is no combinatorial bypass. fwd processing is staggered
//            N cycles after bwd processing to allow time for data to be
//            written into the VM RAM and then be read. Latency of 1 means
//            a RAM location can be addressed for write and read in the same
//            cycle and the new data will be what's read.
`define LDEC_BWD_FWD_LATENCY  (0)

// Intermediate constant for extra latency between completion of bwd row
// and start of fwd row.
`define LDEC_PIPE_ABCX   (`LDEC_PIPE_A + `LDEC_PIPE_B + `LDEC_PIPE_C + `LDEC_PIPE_X)
`define LDEC_PIPE_ABCXL  (`LDEC_PIPE_ABCX + `LDEC_BWD_FWD_LATENCY)
// Pipelining on varMetricSum. Only possible if other pipelining adds
// latency.
`define LDEC_PIPE_VMS   (`LDEC_PIPE_ABCXL ? 1: 0)
// Pipelining on parity check. Not dependant on any other pipelining.
`define LDEC_PIPE_PAR   (1)
// Register varResp FIFO output. Register just becomes last stage of FIFO
// so no performance impact. Less useful if PIPE_X is set, otherwise enabled
// if 2 or more of PIPE A/B/C set (this is not a requirement - it just
// implies timing is difficult).
`define LDEC_PIPE_VR    (`LDEC_PIPE_X ? 0 : (`LDEC_PIPE_A+`LDEC_PIPE_B+`LDEC_PIPE_C > 1) ? 1 : 0)

// Z_SLICE2 is used in the countBits function that adds parity fails across
// Z check-sums. We break the processing into a two level tree and Z_SLICE_2
// determines the size of the adders in the first level. For an ASIC the
// sqrt(Z) is suggested. For an FPGA the size of the LUT plays a role.
// Experiment with this setting if you have timing issues from parityVec to
// parityErrs.
// Only relevant if EARLY_TERM_SUPPORT 1.
`define LDEC_Z_SLICE2   (`LDEC_FPGA ? 6 : 9)

// The input pre-processor handles re-mapping of IP_WIDTH samples to the
// DEC_RAM_IP_WIDTH samples needed for the RAM. Bridging of shortened data is
// handled. Without this option you must set IP_WIDTH=DEC_RAM_IP_WIDTH.
`define LDEC_IP_PREPROC  (1)
// Max Input Interface width (each input sample is IP_BITS wide). The actual
// width may be less than or equal to this as set by the ipWidth input port.
// IP_WIDTH is often a multiple of the number of bits in the QAM symbol.
`ifdef RW_NX_LDPC_LDECX2
`define LDEC_IP_WIDTH    (20)
`else
`define LDEC_IP_WIDTH    (10)
`endif //  RW_NX_LDPC_LDECX2
// Indicate whether it is OK for the input interface to drop rdyToRcv for
// a cycle at the transition from input data to parity when there is
// shortening (Set this to 1 if SHRT_SUPPORT is 0). This saves gates
// in ldpcDecIp at the expense of a single cycle on the input interface.
`define LDEC_IP_STALL_OK  (1)

// Width of the actual write into the input buffer RAM. For FPGA we
// compensate for reduced clock speed by a wider input bandwidth. This number
// is usually 9 or, for IP_WIDTH>9, 27.
`define LDEC_DEC_RAM_IP_WIDTH  (27)

// Output Interface width and width used to fetch data from RAM for output
// interface. Ideally MEM_FETCH_SIZE is divisible into Z but must be > OP_WIDTH.
// Tested values are (respectively) 8/9, 16/27, 32/54.
`define LDEC_OP_WIDTH        (8)
`define LDEC_MEM_FETCH_SIZE  (9)
//`define LDEC_OP_WIDTH        (16)
//`define LDEC_MEM_FETCH_SIZE  (27)
 
// Concatenation relates to the amount of input data for a block not being a
// multiple of the input width or the amount of output data not being a
// multiple of the output width. One option is for the decoder to ignore
// spare input bits and zero spare output bits, another is to allow the
// blocks of a packet to be concatenated such that an input or output word
// may straddle block boundaries.
`ifdef RW_NX_LDPC_LDECX2
`define LDEC_CONCAT_OP      1'b0
`define LDEC_CONCAT_IP      1'b0
`else
`define LDEC_CONCAT_OP      1'b1
`define LDEC_CONCAT_IP      1'b1
`endif //  RW_NX_LDPC_LDECX2

// Number of Spatial streams.
// This is referred to 80MHz channels so use double for 160MHz.
//`define LDEC_NSS            (2)
`ifdef RW_TXRX_1X1
  `define LDEC_NSS            (1)
`endif  
`ifdef RW_TXRX_2X2
  `define LDEC_NSS            (2)
`endif  

// Max number of blks in pkt. To avoid floats aMaxPPDUTime=5.484 ms becomes 54840 and the OFDM symbol period in us is x10.
`ifdef RW_NX_DERIV_CHBW20ONLY
  `define LDEC_NCBPSMAX_AX (2340)
  `define LDEC_NCBPSMAX_AC (416)
`endif
`ifdef RW_NX_DERIV_CHBW4020ONLY
  `define LDEC_NCBPSMAX_AX (4680)
  `define LDEC_NCBPSMAX_AC (864)
`endif
`ifdef RW_NX_DERIV_CHBW804020ONLY
  `define LDEC_NCBPSMAX_AX (9800)
  `define LDEC_NCBPSMAX_AC (1872)
`endif
`define LDEC_MAX_BLKNUM_AX (54840/136 * `LDEC_NSS * `LDEC_NCBPSMAX_AX / 1944)
`define LDEC_MAX_BLKNUM_AC (54840/36 * `LDEC_NSS * `LDEC_NCBPSMAX_AC / 1944)
`define LDEC_MAX_BLKNUM    (maximum(`LDEC_MAX_BLKNUM_AX, `LDEC_MAX_BLKNUM_AC))
//`define LDEC_MAX_BLKNUM    (54840/136 * `LDEC_NSS * `LDEC_NCBPSMAX_AX / 1944)

// Max number of bytes in pkt converted to left limit of upper word
// of a 2 word pair. Assumes 5.484ms aPpduMaxTime.
`define LDEC_MAX_PACKET_BITS_AX (54840/136 * `LDEC_NSS * `LDEC_NCBPSMAX_AX * 5/6)
`define LDEC_MAX_PACKET_BITS_AC (54840/36 * `LDEC_NSS * `LDEC_NCBPSMAX_AC * 5/6)
`define LDEC_MAX_PACKET_BITS (`LDEC_MAX(`LDEC_MAX_PACKET_BITS_AX,`LDEC_MAX_PACKET_BITS_AC))
`define LDEC_PB_MS_LEFT      (numBits(`LDEC_MAX_PACKET_BITS/8) - 17)

//---------------------------------------------------------------------------
// Changes not recommended - new vectors required.
//---------------------------------------------------------------------------

// Early termination support, as described by the PARITY_THRESH and
// EARLY_TEST_ITERATIONS registers, may be disabled. PREV_PARITY_ERRS then
// reports only number of code macro rows with errors, not total number
// of rows.
`define LDEC_EARLY_TERM_SUPPORT 1'b1
  
// Signal processing (affects performance)
`define LDEC_VAR_BITS       (8)
`define LDEC_CHK_BITS       (`LDEC_VAR_BITS-2)
`define LDEC_IP_BITS        (`LDEC_VAR_BITS-3)

// Set the min-sum algorithm type
// 0: no corrections. Expect >0.5dB loss.
// 1: offset. A fixed offset is subtracted from check metrics at row end.
//    This is done in the bwd block so timing is decreased after PIPE_C in
//    fwd block but increased in bwd block. Lower gate-count & performance
//    than (2). About the same as (3).
// 2: corrected. Fixed offset is applied conditionally after each macro cell.
//    This is done in the fwd block after PIPE_C. Only the 1st (lowest)
//    chkMetric is processed.
// 3: corrected as above, but marginalised (2nd lowest) chkMetric also done.
// 1 & 3 are the most viable options.
`define LDEC_MINSUM_ALG     (1)

// Repeat support is a feature of IP_PREPROC. Repeats (as defined in 802.11n)
// are a repeat of LLRs already sent such that they are added (in the
// decoder) to the previous LLRs. They are used when the LDPC block size is
// smaller that what the channel can support.
`define LDEC_REP_SUPPORT    1'b1
// Shortening/Puncturing is a feature for 802.11n but can be disabled for non 11n/ac
// applications.
`define LDEC_SHRT_SUPPORT   1'b1
`define LDEC_PUNC_SUPPORT   1'b1

// 802.11ac de-padding support. Padded bits are not output and any whole
// blocks that are all padding are not decoded.
`define LDEC_PAD_SUPPORT    1'b1
// Special Aggregate packet support. See FRAME_END_BYTE reg in userguide.
`define LDEC_SPECIAL_AGG_SUPPORT 1'b1

//---------------------------------------------------------------------------
// Not User Configurable
//---------------------------------------------------------------------------
  
`define LDEC_NUM_ADDR_BITS  (5)
`define LDEC_NUM_DATA_BITS  (1+`LDEC_BUS32)*16

// Block version information.
`define LDEC_BLK_ID         (8'hB0 + (`LDEC_NUM_PORTS==2)?8'h1:8'h0)
`define LDEC_MAJ_VER_LESS1  4'h4
`define LDEC_MIN_VER        4'h4
`define LDEC_BLK_VER        {`LDEC_MAJ_VER_LESS1, `LDEC_MIN_VER}
`define LDEC_BLK_ID_VER     {`LDEC_BLK_ID, `LDEC_BLK_VER}
                         
// Z is the number of rows and cols in the macro cell. There are
// several sizes of Z and we enumerate the possibilities.
`define LDEC_Z_ENUM_MAX     (2)
// R is the code rate and rEnum is the enumeration number for lookups.
`define LDEC_R_ENUM_MAX     (3)

`define LDEC_intArrayType integer  
`define LDEC_zSizeType [0:`LDEC_Z_ENUM_MAX]`LDEC_intArrayType
`define LDEC_rSizeType [0:`LDEC_R_ENUM_MAX]`LDEC_intArrayType                         
`define LDEC_kSizeType [0:`LDEC_Z_ENUM_MAX]`LDEC_rSizeType                     
   
`define LDEC_Z_MAX          (81)
   
// There are up to 88 cells used in 802.11n and this is what NCELLS gets set
// to for dual port mem. But for single port this number increased by the 
// number of NoOp cells required to make all code macro rows the same length.
//  Data comes from ldpcHmAnalyse.m.
`define LDEC_NCELLS         (sel2(`LDEC_NUM_PORTS-1, 88, 96))
`define LDEC_NCOLS          (24)
`define LDEC_NROWS          (12)
`define LDEC_NCOLS_RIGHT    (31)
`define LDEC_ROW_WEIGHT     (22)
`define LDEC_N_MAX          (`LDEC_Z_MAX*`LDEC_NCOLS)
`define LDEC_K_MAX          (`LDEC_N_MAX*5/6)
`define LDEC_M_MAX          (`LDEC_N_MAX/2)
// Max Number of Bits Per Symbol assumes 80MHz channels. NCBPS is then
// NCBPSMAX*NSS for non STBC, double for STBC, but we assume STBC is only
// supported on NSS_MAX-1 channels.
// TODO the formula does not work for Nss=1!!!
//`define LDEC_BPS_MAX        (LDEC_NCBPSMAX_AX*2*(`LDEC_NSS-1))
`define LDEC_BPS_MAX        (`LDEC_NCBPSMAX_AX * 2 *`LDEC_NSS)
`define LDEC_NREPS_MAX      (`LDEC_BPS_MAX - 1944*1/6)
`define LDEC_MAX_IPBITS     (`LDEC_BPS_MAX + `LDEC_IP_WIDTH - 1)
`define LDEC_MAX_ITER       (6'd31)
`define LDEC_PAR_ERRS_MAX   (`LDEC_EARLY_TERM_SUPPORT ? `LDEC_M_MAX-1 : `LDEC_NROWS+1)
`define LDEC_FLAG_BITS      (3)

// Address map: control registers.
`define LDEC_BLK_VERSION_ADDR            (0)
`define LDEC_ENABLE_ADDR                 (2)
`define LDEC_PACKET_LEN_ADDR             (3)
`define LDEC_PACKET_BYTES_LS_ADDR        (4)
`define LDEC_PACKET_BYTES_MS_ADDR        (5)
`define LDEC_FRAME_END_BYTE_LS_ADDR      (6)
`define LDEC_FRAME_END_BYTE_MS_ADDR      (7)
`define LDEC_CODE_ENUM_ADDR              (8)
`define LDEC_BITS_PER_SYMBOL_ADDR        (9)
`define LDEC_SHRT_FLOOR_ADDR             (10)
`define LDEC_SHRT_MOD_ADDR               (11)
`define LDEC_PUNC_FLOOR_ADDR             (12)
`define LDEC_PUNC_MOD_ADDR               (13)
`define LDEC_REP_FLOOR_ADDR              (14)
`define LDEC_REP_MOD_ADDR                (15)
`define LDEC_TARGET_LEVEL_ADDR           (16)
`define LDEC_PARITY_THRESH_ADDR          (17)
`define LDEC_ITERATIONS1_ADDR            (18)
`define LDEC_ITERATIONS2_ADDR            (19)
// Address map: status registers.   
`define LDEC_CUR_BLK_NUM_ADDR            (20)
`define LDEC_BLK_ERRS_ADDR               (21)
`define LDEC_PREV_ITERATIONS_ADDR        (22)
`define LDEC_PREV_PARITY_ERRS_ADDR       (23)
`define LDEC_PREV_DECODE_STATUS_ADDR     (24)
`define LDEC_CUR_ITERATION_ADDR          (25)
`define LDEC_VAR_METRIC_CHKSUM_ADDR      (26)
                                                   
// Input / Output ports (what you write may not be what you read!)
`define LDEC_CODE_ADDR                   (27)
                                      
// Meanings of bits in the flag memory and flag file.
`define LDEC_FLAG_END_ROW_BIT            (0)
`define LDEC_FLAG_FIRST_VAR_USE_BIT      (1)
`define LDEC_FLAG_LAST_VAR_USE_BIT       (2)
                                      
// This constant relates to the IP block.
// LoadBase is where we put new data. Consider that the collectionReg has
// DEC_RAM_IP_WIDTH-1 bits (not enough to output) then on the next cycle
// we load IP_WIDTH but only remove 1 bit (eg due to shortening quirk)
// then loadBase goes to DEC_RAM_IP_WIDTH-1 + (IP_WIDTH-1).
`define LDEC_LB_HI  (numBits(`LDEC_DEC_RAM_IP_WIDTH + `LDEC_IP_WIDTH - 2)-1)

// Looking for memory sizes? Look after the function declarations. Also try
// running the testbench for 10ns. Sizes are reported in the transcript.
  
//---------------------------------------------------------------------------
// Other dimensions
//---------------------------------------------------------------------------

`define LDEC_TARGET_LEVEL_LEFT (`LDEC_VAR_BITS-1+7)
`define LDEC_B_BITS numBits(`LDEC_MAX_BLKNUM)

//-------------------------------------------------------------------------
// Functions implemented with macros
//-------------------------------------------------------------------------

// Prepend 'bits' zeros before a.
`define LDEC_PAD(a, bits) {{bits {1'b0}}, a}
// Pad with signed 0's - for padding (with 0) signed numbers.
//`define LDEC_PADS(a, bits) $signed({{bits {1'sb0}}, a})
`define LDEC_PADS(a, bits) $signed({{bits {1'sb0}}, a})

// Signed clip. If bits nBits-1 and above are not all equal then we have a clipping situation
// where the msb is the sign bit and all other bits are not the sign bit.
`define LDEC_CLIP_S(a, ipBits, bits) ((a[ipBits-1:(bits)-1] != { (ipBits-1-(bits)+2) {a[ipBits-1]}}) ? $signed({a[ipBits-1], { ((bits)-1) {!a[ipBits-1]}}}) : $signed(a[(bits)-1:0]))

// Unsigned clip
//`define LDEC_CLIP_U(a, bits) ((a > 2**(bits)-1) ? 2**(bits)-1 : a)
`define LDEC_CLIP_U(a, ipBits, bits) ((a[ipBits-1:(bits)] != { (ipBits - (bits)) {1'b0}}) ? {(bits) {1'b1}} : a[(bits)-1:0])

// Absolute value. Crude - just does ones complement. Drops a bit.
`define LDEC_ABS1(a, width) $unsigned((a[width-1]) ? ~a[width-2:0] : a[width-2:0])

// Absolute value. Retains length
`define LDEC_ABS2(a, width) $unsigned((a[width-1]) ? ~a[width-1:0]+`LDEC_PAD(1'b1, width-1) : a[width-1:0])

// Extract a slice of a vector. Only ever use this if sliceIdx is a constant.
`define LDEC_DEMUX(a, sliceIdx, sliceLen) a[sliceIdx*sliceLen +: sliceLen]

// Convert sign magnitude format to one's complement
`define LDEC_SGNMAG_TO_ONES(aSgn, aAbs) (aSgn ?  $signed({1'sb1, ~aAbs}) : $signed({1'sb0, aAbs}))

//---------------------------------------------------------------------------
// Types
//---------------------------------------------------------------------------

// w0 is the packed width. w1 is the left most of the unpacked widths and the
// outermost loop for packing.
// 
// Generate form:
`define LDEC_UNPACK2(label, src, dest, w0, w1, w2) generate for (idx1 = 0; idx1<(w1)*(w2); idx1=idx1+1) begin:label assign dest[idx1/(w2)][idx1 % (w2)] = src[(1+idx1)*(w0)-1 -: w0]; end  endgenerate
`define LDEC_UNPACK(label, src, dest, w0, w1) generate for (idx1=0; idx1<(w1); idx1=idx1+1) begin:label assign dest[idx1] = src[(1+idx1)*(w0)-1 -: w0]; end  endgenerate
`define LDEC_PACK2(label, src, dest, w0, w1, w2) generate for (idx1=0; idx1<(w1)*(w2); idx1=idx1+1) begin:label assign dest[(1+idx1)*(w0)-1 -: w0] = $unsigned(src[idx1/(w2)][idx1 % (w2)]); end  endgenerate
`define LDEC_PACK(label, src, dest, w0, w1) generate for (idx1=0; idx1<(w1); idx1=idx1+1) begin: label assign dest[(1+idx1)*(w0)-1 : idx1*(w0)] = $unsigned(src[idx1]); end endgenerate
// version for use in functions and combinatorial always blocks.
`define LDEC_UNPACK2F(src, dest, w0, w1, w2) for (idx1 = 0; idx1<(w1)*(w2); idx1=idx1+1) dest[idx1/(w2)][idx1 % (w2)] = src[(1+idx1)*(w0)-1 -: w0]
`define LDEC_UNPACKF(src, dest, w0, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[idx1] = src[(1+idx1)*(w0)-1 -: w0]
`define LDEC_PACK2F(src, dest, w0, w1, w2) for (idx1=0; idx1<(w1)*(w2); idx1=idx1+1) dest[(1+idx1)*(w0)-1 -: w0] = $unsigned(src[idx1/(w2)][idx1 % (w2)])
`define LDEC_PACKF(src, dest, w0, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[(1+idx1)*(w0)-1 -: w0] = $unsigned(src[idx1])
// version for use in clocked always blocks.
`define LDEC_UNPACK2Q(src, dest, w0, w1, w2) for (idx1 = 0; idx1<(w1)*(w2); idx1=idx1+1) dest[idx1/(w2)][idx1 % (w2)] <= src[(1+idx1)*(w0)-1 -: w0]
`define LDEC_UNPACKQ(src, dest, w0, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[idx1] <= src[(1+idx1)*(w0)-1 -: w0]
`define LDEC_PACK2Q(src, dest, w0, w1, w2) for (idx1=0; idx1<(w1)*(w2); idx1=idx1+1) dest[(1+idx1)*(w0)-1 -: w0] <= $unsigned(src[idx1/(w2)][idx1 % (w2)])
`define LDEC_PACKQ(src, dest, w0, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[(1+idx1)*(w0)-1 -: w0] <= $unsigned(src[idx1])
// For copying unpacked arrays in functions and combinatorial always blocks.
`define LDEC_COPY(src, dest, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[idx1] = src[idx1]
`define LDEC_COPY2(src, dest, w1, w2) for (idx1 = 0; idx1<(w1)*(w2); idx1=idx1+1) dest[idx1/(w2)][idx1 % (w2)] = src[idx1/(w2)][idx1 % (w2)]
// For copying unpacked arrays in clocked  blocks.
`define LDEC_COPYQ(src, dest, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[idx1] <= src[idx1]
`define LDEC_COPY2Q(src, dest, w1, w2) for (idx1 = 0; idx1<(w1)*(w2); idx1=idx1+1) dest[idx1/(w2)][idx1 % (w2)] <= src[idx1/(w2)][idx1 % (w2)]
// Copy an initialisation value to each element of an array
`define LDEC_INIT(dest, init, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[idx1] = init
`define LDEC_INIT2(dest, init, w1, w2) for (idx1 = 0; idx1<(w1)*(w2); idx1=idx1+1) dest[idx1/(w2)][idx1 % (w2)] = init
// Sequential process form
`define LDEC_INITQ(dest, init, w1) for (idx1=0; idx1<(w1); idx1=idx1+1) dest[idx1] <= init
`define LDEC_INIT2Q(dest, init, w1, w2) for (idx1 = 0; idx1<(w1)*(w2); idx1=idx1+1) dest[idx1/(w2)][idx1 % (w2)] <= init


`define LDEC_chkAbsType [`LDEC_CHK_BITS-2:0]
`define LDEC_chkType signed [`LDEC_CHK_BITS-1:0]
`define LDEC_varType signed [`LDEC_VAR_BITS-1:0]
`define LDEC_varP1Type signed [`LDEC_VAR_BITS:0]
`define LDEC_varAbsType [`LDEC_VAR_BITS-2:0]

`define LDEC_CHK_METRIC_RIGHT ((`LDEC_MINSUM_ALG> 1) ? 2 : 1)
`define LDEC_chkAbs2Type [0:1]`LDEC_chkAbsType
`define LDEC_chkAbs2_R [0:1]
`define LDEC_chkAbs2_P [2*(`LDEC_CHK_BITS-1)-1:0]
`define LDEC_chkAbs3Type [0:`LDEC_CHK_METRIC_RIGHT]`LDEC_chkAbsType
`define LDEC_chkAbs3_R [0:`LDEC_CHK_METRIC_RIGHT]
`define LDEC_chkAbs3_P [(`LDEC_CHK_METRIC_RIGHT+1)*(`LDEC_CHK_BITS-1)-1:0]

`define LDEC_cycShiftType [numBits(`LDEC_Z_MAX-1)-1:0]
`define LDEC_cycShift_L [numBits(`LDEC_Z_MAX-1)-1:0]
`define LDEC_cycShift_R [0:`LDEC_NCOLS-1]
`define LDEC_cycShift_P [`LDEC_NCOLS*numBits(`LDEC_Z_MAX-1)-1:0]

// Type for organising the input into an array of samples.
`define LDEC_ip2DType [`LDEC_IP_BITS-1:0]
`define LDEC_ip2DCr_R [`LDEC_CR_HI:0]
`define LDEC_ip2DCr_P [(`LDEC_CR_HI+1)*`LDEC_IP_BITS-1:0]

// Z wide arrays of metrics and responses. All Z are used concurrently.  
`define LDEC_sgnZType [`LDEC_Z_MAX-1:0]
`define LDEC_chkMetricAbsZType [0:`LDEC_Z_MAX-1]`LDEC_chkAbs3Type
`define LDEC_chkMetricAbsZ_R [0:`LDEC_Z_MAX-1]`LDEC_chkAbs3_R
`define LDEC_chkMetricAbsZ_P [`LDEC_Z_MAX*(`LDEC_CHK_METRIC_RIGHT+1)*(`LDEC_CHK_BITS-1)-1:0]
`define LDEC_chkRespAbsZType [0:`LDEC_Z_MAX-1]`LDEC_chkAbsType
`define LDEC_chkRespAbsZ_R [0:`LDEC_Z_MAX-1]
`define LDEC_chkRespAbsZ_P [`LDEC_Z_MAX*(`LDEC_CHK_BITS-1)-1:0]

`define LDEC_chkZType signed [0:`LDEC_Z_MAX-1][`LDEC_CHK_BITS-1:0]
`define LDEC_chkZ_R [0:`LDEC_Z_MAX-1]
`define LDEC_chkZ_P [`LDEC_CHK_BITS*`LDEC_Z_MAX-1:0]


`define LDEC_varZType signed [0:`LDEC_Z_MAX-1][`LDEC_VAR_BITS-1:0]
`define LDEC_varZ_R [0:`LDEC_Z_MAX-1]
`define LDEC_varZ_P [`LDEC_VAR_BITS*`LDEC_Z_MAX-1:0]
`define LDEC_varP1ZType signed [0:`LDEC_Z_MAX-1][`LDEC_VAR_BITS:0]
`define LDEC_varP1Z_R [0:`LDEC_Z_MAX-1]
`define LDEC_varP1Z_P [`LDEC_Z_MAX*(`LDEC_VAR_BITS+1)-1:0]

`define LDEC_zMaskType [0:`LDEC_Z_ENUM_MAX]`LDEC_sgnZType
`define LDEC_zMask_R [0:`LDEC_Z_ENUM_MAX]
`define LDEC_zMask_P [(`LDEC_Z_ENUM_MAX+1)*`LDEC_Z_MAX-1:0]

// Slv versions concatenate all Z Sgn and all Z Abs bits for RAM storage.
`define LDEC_varSlvZType [`LDEC_Z_MAX*`LDEC_VAR_BITS-1:0]
`define LDEC_chkRespSlvZType [`LDEC_Z_MAX*`LDEC_CHK_BITS-1:0]
// Types for debug visualisation
`define LDEC_ipDbgAbsType [0:`LDEC_IP_WIDTH-1][`LDEC_IP_BITS-2:0]
`define LDEC_opDbgAbsType [0:`LDEC_DEC_RAM_IP_WIDTH-1][`LDEC_VAR_BITS-2:0]

`define LDEC_COL_BITS numBits(`LDEC_NCOLS_RIGHT)
`define LDEC_colType [`LDEC_COL_BITS-1:0]
`define LDEC_shrtColType [0:1]`LDEC_colType
`define LDEC_shrtCol_R [0:1]
`define LDEC_shrtCol_P [2*`LDEC_COL_BITS-1:0]


// Types used in ldpcDecIp for getSamplesAdded function.
`define LDEC_sampleAddedType3  [numBits(`LDEC_Z_MAX)-1:0]
`define LDEC_sampleAddedType2 [0:`LDEC_Z_ENUM_MAX]`LDEC_sampleAddedType3
`define LDEC_sampleAddedType  [0:`LDEC_OFFSET_LEN-1]`LDEC_sampleAddedType2

//---------------------------------------------------------------------------
// Memory sizes.
//---------------------------------------------------------------------------
  
// Memories. Sizes D, A and W refer to depth, number
// of address bits, and data bus width. I is the input width of all inputs
// concatentated together (for the bus that brings RAM inputs into the core
// from the top level). Likewise, O is for the output.

// Code Cell memory  
`define LDEC_CELL_RAM_D  (`LDEC_NCELLS)
`define LDEC_CELL_RAM_A  (numBits(`LDEC_CELL_RAM_D-1))
// Width is sum of widths needed to store cols, the cyc shift and flags.
`define LDEC_CELL_RAM_W  (numBits(`LDEC_NCOLS-1) + numBits(`LDEC_Z_MAX-1) + `LDEC_FLAG_BITS)
`define LDEC_CELL_RAM_I  (2*`LDEC_CELL_RAM_A + `LDEC_CELL_RAM_W + 3)
`define LDEC_CELL_RAM_O  (2*`LDEC_CELL_RAM_W)

// Variable Metrics Memory, applies to input and decode buffers.
`define LDEC_VM_RAM_D    (`LDEC_NCOLS)
`define LDEC_VM_RAM_A    (numBits(`LDEC_VM_RAM_D-1))
`define LDEC_VM_RAM_W    (`LDEC_VAR_BITS * `LDEC_Z_MAX)
`define LDEC_VM_RAM_WE   (ceilDiv(`LDEC_Z_MAX, `LDEC_DEC_RAM_IP_WIDTH))
`define LDEC_VM_RAM_I    (2*`LDEC_VM_RAM_WE + 4*`LDEC_VM_RAM_A + 2*`LDEC_VM_RAM_W + 2)
`define LDEC_VM_RAM_O    (2*`LDEC_VM_RAM_W)
                                   
// Variable Response Memory.
`define LDEC_VR_RAM_D    (`LDEC_NUM_PORTS==2 ? `LDEC_ROW_WEIGHT : `LDEC_ROW_WEIGHT-1+`LDEC_PIPE_C-`LDEC_PIPE_VR)
`define LDEC_VR_RAM_A    (numBits(`LDEC_VR_RAM_D-1))
`define LDEC_VR_RAM_W    (`LDEC_VAR_BITS * `LDEC_Z_MAX)
`define LDEC_VR_RAM_I    (2*`LDEC_VR_RAM_A + `LDEC_VR_RAM_W + 2)
`define LDEC_VR_RAM_O    (`LDEC_VR_RAM_W)
`define LDEC_VR1_RAM_I   (`LDEC_VR_RAM_A + `LDEC_VR_RAM_W + 1)
`define LDEC_VR1_RAM_O   (`LDEC_VR_RAM_W)

// Check Response Memory. For dual port lengthens if PIPE_A. For single port
// memory is shortened by a row but increased by the number of stalls due
// to pipelining.
`define LDEC_CR_RAM_D    (`LDEC_NCELLS + (`LDEC_NUM_PORTS==2 ? `LDEC_PIPE_A+`LDEC_BWD_FWD_LATENCY : `LDEC_NROWS*(`LDEC_PIPE_X+`LDEC_PIPE_A+`LDEC_PIPE_B+`LDEC_PIPE_C+`LDEC_BWD_FWD_LATENCY)-8))
`define LDEC_CR_RAM_A    (numBits(`LDEC_CR_RAM_D-1))
`define LDEC_CR_RAM_W    (`LDEC_CHK_BITS * `LDEC_Z_MAX)
`define LDEC_CR_RAM_I    (2*`LDEC_CR_RAM_A + `LDEC_CR_RAM_W + 2)
`define LDEC_CR_RAM_O    (`LDEC_CR_RAM_W)
`define LDEC_CR1_RAM_I   (`LDEC_CR_RAM_A + `LDEC_CR_RAM_W + 1)
`define LDEC_CR1_RAM_O   (`LDEC_CR_RAM_W)

// Hard Decision Memories (there are two of these to make ping pong buffers)
`define LDEC_HD_RAM_D    (`LDEC_NCOLS)
`define LDEC_HD_RAM_A    (numBits(`LDEC_HD_RAM_D-1))
`define LDEC_HD_RAM_W    (`LDEC_Z_MAX)
`define LDEC_HD_RAM_I    (3*`LDEC_HD_RAM_A + `LDEC_HD_RAM_W + 4)
`define LDEC_HD_RAM_O    (2*`LDEC_HD_RAM_W)
  
`endif

