|
|
|
|
//
|
|
|
|
|
// JL_vad.h
|
|
|
|
|
// Test
|
|
|
|
|
//
|
|
|
|
|
// Created by DFung on 2018/2/28.
|
|
|
|
|
// Copyright © 2018年 DFung. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#ifndef JL_vad_h
|
|
|
|
|
#define JL_vad_h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CONT_AD_ADFRMSIZE 16
|
|
|
|
|
#define DEFAULT_RADIX 12
|
|
|
|
|
#define SMOOTH_WINDOW 4
|
|
|
|
|
#define MIN_FIXLOG -2829416
|
|
|
|
|
#define KWSNUM 1 //¥˝ ∂±¥ ∏ˆ ˝
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//typedef long long __int64;
|
|
|
|
|
typedef int int32;
|
|
|
|
|
typedef short int16;
|
|
|
|
|
typedef signed char int8;
|
|
|
|
|
typedef unsigned int uint32;
|
|
|
|
|
typedef unsigned short uint16;
|
|
|
|
|
typedef unsigned char uint8;
|
|
|
|
|
typedef long long int64;
|
|
|
|
|
typedef int32 fixed32;
|
|
|
|
|
typedef float float32;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct noise_stats_s {
|
|
|
|
|
/* Smoothed power */
|
|
|
|
|
int power[40];
|
|
|
|
|
int max_power[40];
|
|
|
|
|
/* Noise estimate */
|
|
|
|
|
int noise[40];
|
|
|
|
|
int max_noise[40];
|
|
|
|
|
/* Signal floor estimate */
|
|
|
|
|
int floor[40];
|
|
|
|
|
int max_floor[40];
|
|
|
|
|
/* Peak for temporal masking */
|
|
|
|
|
int peak[40];
|
|
|
|
|
int max_peak[40];
|
|
|
|
|
|
|
|
|
|
int signal[40];
|
|
|
|
|
int gain[40];
|
|
|
|
|
char remove_noi;
|
|
|
|
|
/* Initialize it next time */
|
|
|
|
|
uint8 undefined;
|
|
|
|
|
/* Number of items to process */
|
|
|
|
|
uint32 num_filters;
|
|
|
|
|
|
|
|
|
|
/* Sum of slow peaks for VAD */
|
|
|
|
|
int64 slow_peak_sum;
|
|
|
|
|
int64 max_slow_peak_sum;
|
|
|
|
|
|
|
|
|
|
/* Precomputed constants */
|
|
|
|
|
int lambda_power;
|
|
|
|
|
int comp_lambda_power;
|
|
|
|
|
int lambda_a;
|
|
|
|
|
int comp_lambda_a;
|
|
|
|
|
int lambda_b;
|
|
|
|
|
int comp_lambda_b;
|
|
|
|
|
int lambda_t;
|
|
|
|
|
int mu_t;
|
|
|
|
|
int max_gain;
|
|
|
|
|
int inv_max_gain;
|
|
|
|
|
|
|
|
|
|
int smooth_scaling[2 * 4 + 3];
|
|
|
|
|
};
|
|
|
|
|
typedef struct noise_stats_s noise_stats_t;
|
|
|
|
|
struct htk_fe_s
|
|
|
|
|
{
|
|
|
|
|
unsigned short pow_hist[98];
|
|
|
|
|
noise_stats_t *noise_stats;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int fe_prior;
|
|
|
|
|
uint32 int_frame;
|
|
|
|
|
char int_flag;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned char sum_cnt;
|
|
|
|
|
|
|
|
|
|
uint8 voice_state;
|
|
|
|
|
uint8 in_speech;
|
|
|
|
|
int16 pre_speech_frames;
|
|
|
|
|
int16 post_speech_frames;
|
|
|
|
|
|
|
|
|
|
int16 inout_spch[410];
|
|
|
|
|
int fe_tmp_spch[250];
|
|
|
|
|
int fe_fft_buf[512];
|
|
|
|
|
int fe_mfspec[40];
|
|
|
|
|
int cmn_sum[13];
|
|
|
|
|
int mfcc_fifo[9][13];
|
|
|
|
|
int *cmn_mfcc_fifo[9];
|
|
|
|
|
//int fe_prior;
|
|
|
|
|
char speech_fifo[16];
|
|
|
|
|
char init_speech_n;
|
|
|
|
|
char vad_cnt;
|
|
|
|
|
int buf_cep[21][13];
|
|
|
|
|
int buf_pow[21];
|
|
|
|
|
uint8 speech_indexw;
|
|
|
|
|
uint8 speech_indexr;
|
|
|
|
|
uint8 speech_cnt;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int16 sp_threshold;
|
|
|
|
|
int16 nsp_threshold;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef struct htk_fe_s htk_fe_t;
|
|
|
|
|
/*
|
|
|
|
|
* How to use? See follow...
|
|
|
|
|
*
|
|
|
|
|
* 1、开启录音前:
|
|
|
|
|
* /--- 开启Vad ---/
|
|
|
|
|
* int need_buf_size = vad_get_need_buf_size();
|
|
|
|
|
* vad_hdl = malloc(need_buf_size);
|
|
|
|
|
* vad_init(vad_hdl, 10, 51);
|
|
|
|
|
*
|
|
|
|
|
* 2、结束录音后:
|
|
|
|
|
* /--- 关闭Vad ---/
|
|
|
|
|
* free(vad_hdl);
|
|
|
|
|
*
|
|
|
|
|
* 3、录音数据回调:
|
|
|
|
|
* buf:录音数据 ,需要320Bytes!
|
|
|
|
|
* ret:0->静音状态 1->正在说话 2->结束说话
|
|
|
|
|
* int ret = vad_main(vad_hdl, (int*)buf);
|
|
|
|
|
*/
|
|
|
|
|
int vad_get_need_buf_size(void);
|
|
|
|
|
int vad_init(void *p, short sp_threshold, short nsp_threshold);
|
|
|
|
|
int vad_main(void *p,int *inout_spch);
|
|
|
|
|
|
|
|
|
|
fixed32 fe_log_add(fixed32 x, fixed32 y);
|
|
|
|
|
fixed32 fe_log_sub(fixed32 x, fixed32 y);
|
|
|
|
|
int32 fixlog2(uint32 x);
|
|
|
|
|
int fixlog(uint32 x);
|
|
|
|
|
|
|
|
|
|
#endif /* JL_vad_h */
|