gps/GPSResources/tcpmp 0.73/amr/26204/dec_dtx.c

/*
 *===================================================================
 *  3GPP AMR Wideband Floating-point Speech Codec
 *===================================================================
 */
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include "typedef.h"
#include "dec_dtx.h"
#include "dec_lpc.h"
#include "dec_util.h"

#define MAX_31                      (Word32)0x3FFFFFFF
#define L_FRAME                     256   /* Frame size                          */
#define RX_SPEECH_LOST              2
#define RX_SPEECH_BAD               3
#define RX_SID_FIRST                4
#define RX_SID_UPDATE               5
#define RX_SID_BAD                  6
#define RX_NO_DATA                  7
#define ISF_GAP                     128   /* 50                                  */
#define D_DTX_MAX_EMPTY_THRESH      50
#define GAIN_FACTOR                 75
#define ISF_FACTOR_LOW              256
#define ISF_FACTOR_STEP             2
#define ISF_DITH_GAP                448
#define D_DTX_HANG_CONST            7     /* yields eight frames of SP HANGOVER  */
#define D_DTX_ELAPSED_FRAMES_THRESH (24 + 7 - 1)
#define RANDOM_INITSEED             21845 /* own random init value               */


/*
 * D_DTX_reset
 *
 * Parameters:
 *    st             O: state struct
 *
 * Function:
 *    Initializes state memory
 *
 * Returns:
 *    non-zero with error, zero for ok
 */
int D_DTX_reset(D_DTX_State *st, const Word16 *isf_init)
{
   Word32 i;

   if(st == (D_DTX_State*)NULL)
   {
      return(-1);
   }
   st->mem_since_last_sid = 0;
   st->mem_true_sid_period_inv = (1 << 13);   /* 0.25 in Q15 */
   st->mem_log_en = 3500;
   st->mem_log_en_prev = 3500;

   /* low level noise for better performance in  DTX handover cases */
   st->mem_cng_seed = RANDOM_INITSEED;
   st->mem_hist_ptr = 0;

   /* Init isf_hist[] and decoder log frame energy */
   memcpy(st->mem_isf, isf_init, M * sizeof(Word16));
   memcpy(st->mem_isf_prev, isf_init, M * sizeof(Word16));

   for(i = 0; i < D_DTX_HIST_SIZE; i++)
   {
      memcpy(&st->mem_isf_buf[i * M], isf_init, M * sizeof(Word16));
      st->mem_log_en_buf[i] = 3500;
   }
   st->mem_dtx_hangover_count = D_DTX_HANG_CONST;
   st->mem_dec_ana_elapsed_count = 127;
   st->mem_sid_frame = 0;
   st->mem_valid_data = 0;
   st->mem_dtx_hangover_added = 0;
   st->mem_dtx_global_state = SPEECH;
   st->mem_data_updated = 0;
   st->mem_dither_seed = RANDOM_INITSEED;
   st->mem_cn_dith = 0;

   return(0);
}


/*
 * D_DTX_init
 *
 * Parameters:
 *    st           I/O: state struct
 *
 * Function:
 *    Allocates state memory and initializes state memory
 *
 * Returns:
 *    non-zero with error, zero for ok
 */
int D_DTX_init(D_DTX_State **st, const Word16 *isf_init)
{
   D_DTX_State *s;

   if(st == (D_DTX_State**)NULL)
   {
      return(-1);
   }

   *st = NULL;

   /* allocate memory */
   if((s = (D_DTX_State*)malloc(sizeof(D_DTX_State))) == NULL)
   {
      return(-1);
   }

   D_DTX_reset(s, isf_init);
   *st = s;

   return(0);
}


/*
 * D_DTX_exit
 *
 * Parameters:
 *    state        I/0: State struct
 *
 * Function:
 *    The memory used for state memory is freed
 *
 * Returns:
 *    void
 */
void D_DTX_exit(D_DTX_State **st)
{
   if(st == NULL || *st == NULL)
   {
      return;
   }

   /* deallocate memory */
   free(*st);
   *st = NULL;

   return;
}


/*
 * D_DTX_rx_handler
 *
 * Parameters:
 *    st              I/O: State struct
 *    frame_type        I: Frame type
 *
 * Function:
 *    Analyze received frame
 *
 *    Table of new SPD synthesis states
 *
 *                          |       previous SPD_synthesis_state
 *    Incoming              |
 *    frame_type            | SPEECH       | DTX           | D_DTX_MUTE
 *    ---------------------------------------------------------------
 *    RX_SPEECH_GOOD ,      |              |               |
 *    RX_SPEECH_PR_DEGRADED | SPEECH       | SPEECH        | SPEECH
 *    ----------------------------------------------------------------
 *    RX_SPEECH_BAD,        | SPEECH       | DTX           | D_DTX_MUTE
 *    ----------------------------------------------------------------
 *    RX_SID_FIRST,         | DTX          | DTX/(D_DTX_MUTE)| D_DTX_MUTE
 *    ----------------------------------------------------------------
 *    RX_SID_UPDATE,        | DTX          | DTX           | DTX
 *    ----------------------------------------------------------------
 *    RX_SID_BAD,           | DTX          | DTX/(D_DTX_MUTE)| D_DTX_MUTE
 *    ----------------------------------------------------------------
 *    RX_NO_DATA,           | SPEECH       | DTX/(D_DTX_MUTE)| D_DTX_MUTE
 *    RX_SPARE              |(class2 garb.)|               |
 *    ----------------------------------------------------------------
 *
 * Returns:
 *    new state
 */
UWord8 D_DTX_rx_handler(D_DTX_State *st, UWord8 frame_type)
{
   UWord8 newState;
   UWord8 encState;

   /* DTX if SID frame or previously in DTX{_MUTE}
    * and (NO_RX OR BAD_SPEECH)
    */
   if((frame_type == RX_SID_FIRST) | (frame_type == RX_SID_UPDATE) |
      (frame_type == RX_SID_BAD) | (((st->mem_dtx_global_state == DTX) |
      (st->mem_dtx_global_state == D_DTX_MUTE)) & ((frame_type == RX_NO_DATA) |
      (frame_type == RX_SPEECH_BAD) | (frame_type == RX_SPEECH_LOST))))
   {
      newState = DTX;

      /* stay in mute for these input types */
      if((st->mem_dtx_global_state == D_DTX_MUTE) &
         ((frame_type == RX_SID_BAD) | (frame_type == RX_SID_FIRST) |
         (frame_type == RX_SPEECH_LOST) | (frame_type == RX_NO_DATA)))
      {
         newState = D_DTX_MUTE;
      }

      /* evaluate if noise parameters are too old                     */
      /* since_last_sid is reset when CN parameters have been updated */
      st->mem_since_last_sid = D_UTIL_saturate(st->mem_since_last_sid + 1);

      /* no update of sid parameters in DTX for a Word32 while */
      if(st->mem_since_last_sid > D_DTX_MAX_EMPTY_THRESH)
      {
         newState = D_DTX_MUTE;
      }
   }
   else
   {
      newState = SPEECH;
      st->mem_since_last_sid = 0;
   }

   /*
    * reset the decAnaElapsed Counter when receiving CNI data the first
    * time, to robustify counter missmatch after handover
    * this might delay the bwd CNI analysis in the new decoder slightly.
    */
   if((st->mem_data_updated == 0) & (frame_type == RX_SID_UPDATE))
   {
      st->mem_dec_ana_elapsed_count = 0;
   }

   /*
    * update the SPE-SPD DTX hangover synchronization
    * to know when SPE has added dtx hangover
    */
   st->mem_dec_ana_elapsed_count++;

   /* saturate */
   if(st->mem_dec_ana_elapsed_count > 127)
   {
      st->mem_dec_ana_elapsed_count = 127;
   }

   st->mem_dtx_hangover_added = 0;

   if((frame_type == RX_SID_FIRST) | (frame_type == RX_SID_UPDATE) |
      (frame_type == RX_SID_BAD) | (frame_type == RX_NO_DATA))
   {
      encState = DTX;
   }
   else
   {
      encState = SPEECH;
   }

   if(encState == SPEECH)
   {
      st->mem_dtx_hangover_count = D_DTX_HANG_CONST;
   }
   else
   {
      if(st->mem_dec_ana_elapsed_count > D_DTX_ELAPSED_FRAMES_THRESH)
      {
         st->mem_dtx_hangover_added = 1;
         st->mem_dec_ana_elapsed_count = 0;
         st->mem_dtx_hangover_count = 0;
      }
      else if(st->mem_dtx_hangover_count == 0)
      {
         st->mem_dec_ana_elapsed_count = 0;
      }
      else
      {
         st->mem_dtx_hangover_count--;
      }
   }

   if(newState != SPEECH)
   {
      /*
       * DTX or D_DTX_MUTE
       * CN data is not in a first SID, first SIDs are marked as SID_BAD
       *  but will do backwards analysis if a hangover period has been added
       *  according to the state machine above
       */
      st->mem_sid_frame = 0;
      st->mem_valid_data = 0;

      if(frame_type == RX_SID_FIRST)
      {
         st->mem_sid_frame = 1;
      }
      else if(frame_type == RX_SID_UPDATE)
      {
         st->mem_sid_frame = 1;
         st->mem_valid_data = 1;
      }
      else if(frame_type == RX_SID_BAD)
      {
         st->mem_sid_frame = 1;
         st->mem_dtx_hangover_added = 0;   /* use old data */
      }
   }

   return newState;

   /* newState is used by both SPEECH AND DTX synthesis routines */
}


/*
 * D_DTX_cn_dithering
 *
 * Parameters:
 *    isf             I/O: CN ISF vector
 *    L_log_en_int    I/O: energy parameter
 *    dither_seed     I/O: random seed
 *
 * Function:
 *    Confort noise dithering
 *
 * Returns:
 *    void
 */
static void D_DTX_cn_dithering(Word16 isf[M], Word32 *L_log_en_int,
                               Word16 *dither_seed)
{
   Word32 temp, temp1, i, dither_fac, rand_dith,rand_dith2;

   /* Insert comfort noise dithering for energy parameter */
   rand_dith = D_UTIL_random(dither_seed) >> 1;
   rand_dith2 = D_UTIL_random(dither_seed) >>1;
   rand_dith = rand_dith + rand_dith2;
   *L_log_en_int = *L_log_en_int + ((rand_dith * GAIN_FACTOR) << 1);

   if(*L_log_en_int < 0)
   {
      *L_log_en_int = 0;
   }

   /* Insert comfort noise dithering for spectral parameters (ISF-vector) */
   dither_fac = ISF_FACTOR_LOW;
   rand_dith = D_UTIL_random(dither_seed) >> 1;
   rand_dith2 = D_UTIL_random(dither_seed) >> 1;
   rand_dith = rand_dith + rand_dith2;
   temp = isf[0] + (((rand_dith * dither_fac) + 0x4000) >> 15);

   /* Make sure that isf[0] will not get negative values */
   if(temp < ISF_GAP)
   {
      isf[0] = ISF_GAP;
   }
   else
   {
      isf[0] = (Word16)temp;
   }

   for(i = 1; i < M - 1; i++)
   {
      dither_fac = dither_fac + ISF_FACTOR_STEP;
      rand_dith = D_UTIL_random(dither_seed) >> 1;
      rand_dith2 = D_UTIL_random(dither_seed) >> 1;
      rand_dith = rand_dith + rand_dith2;
      temp = isf[i] + (((rand_dith * dither_fac) + 0x4000) >> 15);
      temp1 = temp - isf[i - 1];

      /* Make sure that isf spacing remains at least ISF_DITH_GAP Hz */
      if(temp1 < ISF_DITH_GAP)
      {
         isf[i] = (Word16)(isf[i - 1] + ISF_DITH_GAP);
      }
      else
      {
         isf[i] = (Word16)temp;
      }
   }

   /* Make sure that isf[M-2] will not get values above 16384 */
   if(isf[M - 2] > 16384)
   {
      isf[M - 2] = 16384;
   }

   return;
}


/*
 * D_DTX_exe
 *
 * Parameters:
 *    st           I/O: state struct
 *    exc2           O: CN excitation
 *    new_state      I: New DTX state
 *    prms           I: Vector of synthesis parameters
 *    isf            O: CN ISF vector
 *
 * Function:
 *    Confort noise generation
 *
 * Returns:
 *    void
 */
void D_DTX_exe(D_DTX_State *st, Word16 *exc2, Word16 new_state, Word16 isf[],
               Word16 **prms)
{

   Word32 i, j, L_tmp, ptr;
   Word32 exp0, int_fac;
   Word32 gain;
   Word32 L_isf[M], L_log_en_int, level32, ener32;
   Word16 log_en_index;
   Word16 tmp_int_length;
   Word16 exp, log_en_int_e, log_en_int_m, level;


   /*
    * This function is called if synthesis state is not SPEECH.
    * The globally passed inputs to this function are
    *    st->sid_frame
    *    st->valid_data
    *    st->dtxHangoverAdded
    *    new_state (SPEECH, DTX, D_DTX_MUTE)
    */
   if((st->mem_dtx_hangover_added != 0) & (st->mem_sid_frame != 0))
   {
      /* sid_first after dtx hangover period
       * or sid_upd after dtxhangover
       * consider twice the last frame
       */
      ptr = st->mem_hist_ptr + 1;

      if(ptr == D_DTX_HIST_SIZE)
      {
         ptr = 0;
      }

      memcpy(&st->mem_isf_buf[ptr * M], &st->mem_isf_buf[st->mem_hist_ptr * M],
         M * sizeof(Word16));

      st->mem_log_en_buf[ptr] = st->mem_log_en_buf[st->mem_hist_ptr];

      /* compute mean log energy and isf from decoded signal (SID_FIRST) */
      st->mem_log_en = 0;
      memset(L_isf, 0, M * sizeof(Word32));

      /* average energy and isf */
      for(i = 0; i < D_DTX_HIST_SIZE; i++)
      {
         /*
          * Division by D_DTX_HIST_SIZE = 8 has been done in dtx_buffer log_en
          * is in Q10
          */
         st->mem_log_en = (Word16)(st->mem_log_en + st->mem_log_en_buf[i]);

         for(j = 0; j < M; j++)
         {
            L_isf[j] = L_isf[j] + st->mem_isf_buf[i * M + j];
         }
      }

      /* st->log_en in Q9 */
      st->mem_log_en = (Word16)(st->mem_log_en >> 1);

      /*
       * Add 2 in Q9, in order to have only positive values for Pow2
       * this value is subtracted back after Pow2 function
       */
      st->mem_log_en = (Word16)(st->mem_log_en + 1024);

      if(st->mem_log_en < 0)
      {
         st->mem_log_en = 0;
      }

      for(j = 0; j < M; j++)
      {
         st->mem_isf[j] = (Word16)(L_isf[j]>>3);   /* divide by 8 */
      }
   }

   if(st->mem_sid_frame != 0)
   {
      /*
       * Set old SID parameters, always shift
       * even if there is no new valid_data
       */
      memcpy(st->mem_isf_prev, st->mem_isf, M * sizeof(Word16));
      st->mem_log_en_prev = st->mem_log_en;

      if(st->mem_valid_data != 0) /* new data available (no CRC) */
      {
         /* st->true_sid_period_inv = 1.0f/st->since_last_sid; */

         /*
          * Compute interpolation factor, since the division only works
          * for values of since_last_sid < 32 we have to limit
          * the interpolation to 32 frames
          */
         tmp_int_length = st->mem_since_last_sid;

         if(tmp_int_length > 32)
         {
            tmp_int_length = 32;
         }

         if(tmp_int_length >= 2)
         {
            st->mem_true_sid_period_inv =
               (Word16)(0x2000000 / (tmp_int_length << 10));
         }
         else
         {
            st->mem_true_sid_period_inv = 1 << 14;   /* 0.5 it Q15 */
         }

         D_LPC_isf_noise_d(*prms, st->mem_isf);
         (*prms) += 5;
         log_en_index = *(*prms)++;

         /* read background noise stationarity information */
         st->mem_cn_dith = *(*prms)++;

         /*
          * st->log_en = (Float32)log_en_index / 2.625 - 2.0;
          * log2(E) in Q9 (log2(E) lies in between -2:22)
          */
         st->mem_log_en = (Word16)(log_en_index << (15 - 6));

         /* Divide by 2.625  */
         st->mem_log_en = (Word16)((st->mem_log_en * 12483) >> 15);

         /*
          * Subtract 2 in Q9 is done later, after Pow2 function
          * no interpolation at startup after coder reset
          * or when SID_UPD has been received right after SPEECH
          */
         if((st->mem_data_updated == 0) ||
            (st->mem_dtx_global_state == SPEECH))
         {
            memcpy(st->mem_isf_prev, st->mem_isf, M * sizeof(Word16));
            st->mem_log_en_prev = st->mem_log_en;
         }
      }   /* endif valid_data */
   }   /* endif sid_frame */

   if((st->mem_sid_frame != 0) && (st->mem_valid_data != 0))
   {
      st->mem_since_last_sid = 0;
   }

   /* Interpolate SID info */
   if(st->mem_since_last_sid < 32)
   {
      int_fac = st->mem_since_last_sid << 10;   /* Q10 */
   }
   else
   {
      int_fac = 32767;
   }
   /* Q10 * Q15 -> Q10 */
   int_fac = (int_fac * st->mem_true_sid_period_inv) >> 15;

   /* Maximize to 1.0 in Q10 */
   if(int_fac > 1024)
   {
      int_fac = 1024;
   }
   int_fac = int_fac << 4;   /* Q10 -> Q14 */
   L_log_en_int = (int_fac * st->mem_log_en) << 1;   /* Q14 * Q9 -> Q24 */

   for(i = 0; i < M; i++)
   {
      /* Q14 * Q15 -> Q14 */
      isf[i] = (Word16)((int_fac * st->mem_isf[i]) >> 15);
   }
   int_fac = 16384 - int_fac;   /* 1-k in Q14 */

   /* ( Q14 * Q9 -> Q24 ) + Q24 -> Q24 */
   L_log_en_int = L_log_en_int + ((int_fac * st->mem_log_en_prev) << 1);

   for(i = 0; i < M; i++)
   {
      /* Q14 + (Q14 * Q15 -> Q14) -> Q14 */
      L_tmp = isf[i] + ((int_fac * st->mem_isf_prev[i]) >> 15);
      isf[i] = (Word16)(L_tmp << 1);   /* Q14 -> Q15 */
   }

   /* If background noise is non-stationary, insert comfort noise dithering */
   if(st->mem_cn_dith != 0)
   {
      D_DTX_cn_dithering(isf, &L_log_en_int, &st->mem_dither_seed);
   }

   /* L_log_en_int corresponds to log2(E)+2 in Q24, i.e log2(gain)+1 in Q25 */
   L_log_en_int = (L_log_en_int >> 9); /* Q25 -> Q16 */

   /* Find integer part  */
   log_en_int_e = (Word16)((L_log_en_int)>>16);

   /* Find fractional part */
   log_en_int_m = (Word16)((L_log_en_int - (log_en_int_e << 16)) >> 1);

   /*
    * Subtract 2 from L_log_en_int in Q9,
    * i.e divide the gain by 2 (energy by 4)
    * Add 16 in order to have the result of pow2 in Q16
    */
   log_en_int_e = (Word16)(log_en_int_e + (16 - 1));

   /* level = (Float32)( pow( 2.0f, log_en ) );  */
   level32 = D_UTIL_pow2(log_en_int_e, log_en_int_m);   /* Q16 */
   exp0 = D_UTIL_norm_l(level32);
   level32 = (level32 << exp0);   /* level in Q31 */
   exp0 = (15 - exp0);
   level = (Word16)(level32 >> 16);   /* level in Q15 */

   /* generate white noise vector */
   for(i = 0; i < L_FRAME; i++)
   {
      exc2[i] = (Word16)((D_UTIL_random(&(st->mem_cng_seed)) >> 4));
   }

   /* gain = level / sqrt(ener) * sqrt(L_FRAME) */
   /* energy of generated excitation */
   ener32 = D_UTIL_dot_product12(exc2, exc2, L_FRAME, &exp);
   D_UTIL_normalised_inverse_sqrt(&ener32, &exp);
   gain = ener32 >>16;
   gain = (level * gain) >> 15;   /* gain in Q15 */

   /* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */
   exp = (Word16)(exp0 + exp  + 4);

   if(exp >= 0)
   {
      for(i = 0; i < L_FRAME; i++)
      {
         L_tmp = (exc2[i] * gain) >> 15;   /* Q0 * Q15 */
         exc2[i] = (Word16)(L_tmp << exp);
      }
   }
   else
   {
      exp = (Word16)-exp;

      for(i = 0; i < L_FRAME; i++)
      {
         L_tmp = (exc2[i] * gain) >> 15;   /* Q0 * Q15 */
         exc2[i] = (Word16)(L_tmp >> exp);
      }
   }

   if(new_state == D_DTX_MUTE)
   {
      /*
       * mute comfort noise as it has been quite a long time since
       * last SID update was performed
       */
      tmp_int_length = st->mem_since_last_sid;

      if(tmp_int_length > 32)
      {
         tmp_int_length = 32;
      }

      st->mem_true_sid_period_inv = D_UTIL_saturate((0x02000000 / (tmp_int_length << 10)));
      st->mem_since_last_sid = 0;
      st->mem_log_en_prev = st->mem_log_en;

      /* subtract 1/8 in Q9 (energy), i.e -3/8 dB */
      st->mem_log_en = D_UTIL_saturate(st->mem_log_en - 64);
   }

   /* reset interpolation length timer if data has been updated.        */
   if((st->mem_sid_frame != 0) && ((st->mem_valid_data != 0) ||
      ((st->mem_valid_data == 0) && (st->mem_dtx_hangover_added) != 0)))
   {
      st->mem_since_last_sid = 0;
      st->mem_data_updated = 1;
   }

   return;
}


/*
 * D_DTX_activity_update
 *
 * Parameters:
 *    st           I/O: state struct
 *    isf            O: ISF vector
 *    exc            O: excitation
 *
 * Function:
 *    Confort noise generation
 *
 * Returns:
 *    void
 */
void D_DTX_activity_update(D_DTX_State *st, Word16 isf[], Word16 exc[])
{

   Word32 L_frame_en, log_en;
   Word32 i;
   Word16 log_en_e, log_en_m;

   st->mem_hist_ptr = (Word16)(st->mem_hist_ptr + 1);

   if(st->mem_hist_ptr == D_DTX_HIST_SIZE)
   {
      st->mem_hist_ptr = 0;
   }

   memcpy(&st->mem_isf_buf[st->mem_hist_ptr * M], isf, M * sizeof(Word16));

   /* compute log energy based on excitation frame energy in Q0 */
   L_frame_en = 0;

   for(i = 0; i < L_FRAME; i++)
   {
      L_frame_en = L_frame_en + (exc[i] * exc[i]);
      if (L_frame_en > MAX_31)
      {
         L_frame_en = MAX_31;
         break;
      }
   }

   /*
    * log_en =
    * (Float32)log10(L_frame_en/(Float32)L_FRAME)/(Float32)log10(2.0f);
    */
   D_UTIL_log2(L_frame_en, &log_en_e, &log_en_m);

   /*
    * convert exponent and mantissa to Word16 Q7.
    * Q7 is used to simplify averaging in dtx_enc
    */
   log_en = log_en_e << 7;   /* Q7 */
   log_en = log_en + (log_en_m >> (15 - 7));

   /* Divide by L_FRAME = 256, i.e subtract 8 in Q7 = 1024 */
   log_en = log_en - 1024;

   /* insert into log energy buffer */
   st->mem_log_en_buf[st->mem_hist_ptr] = (Word16)log_en;

   return;
}