|
|
// |
|
|
// JL_vad.h |
|
|
// Test |
|
|
// |
|
|
// Created by DFung on 2018/2/28. |
|
|
// Copyright © 2018年 DFung. All rights reserved. |
|
|
// |
|
|
|
|
|
#ifndef JL_vad_h |
|
|
#define JL_vad_h |
|
|
|
|
|
|
|
|
|
|
|
#define CONT_AD_ADFRMSIZE 16 |
|
|
#define DEFAULT_RADIX 12 |
|
|
#define SMOOTH_WINDOW 4 |
|
|
#define MIN_FIXLOG -2829416 |
|
|
#define KWSNUM 1 //¥˝ ∂±¥ ∏ˆ ˝ |
|
|
|
|
|
|
|
|
//typedef long long __int64; |
|
|
typedef int int32; |
|
|
typedef short int16; |
|
|
typedef signed char int8; |
|
|
typedef unsigned int uint32; |
|
|
typedef unsigned short uint16; |
|
|
typedef unsigned char uint8; |
|
|
typedef long long int64; |
|
|
typedef int32 fixed32; |
|
|
typedef float float32; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct noise_stats_s { |
|
|
/* Smoothed power */ |
|
|
int power[40]; |
|
|
int max_power[40]; |
|
|
/* Noise estimate */ |
|
|
int noise[40]; |
|
|
int max_noise[40]; |
|
|
/* Signal floor estimate */ |
|
|
int floor[40]; |
|
|
int max_floor[40]; |
|
|
/* Peak for temporal masking */ |
|
|
int peak[40]; |
|
|
int max_peak[40]; |
|
|
|
|
|
int signal[40]; |
|
|
int gain[40]; |
|
|
char remove_noi; |
|
|
/* Initialize it next time */ |
|
|
uint8 undefined; |
|
|
/* Number of items to process */ |
|
|
uint32 num_filters; |
|
|
|
|
|
/* Sum of slow peaks for VAD */ |
|
|
int64 slow_peak_sum; |
|
|
int64 max_slow_peak_sum; |
|
|
|
|
|
/* Precomputed constants */ |
|
|
int lambda_power; |
|
|
int comp_lambda_power; |
|
|
int lambda_a; |
|
|
int comp_lambda_a; |
|
|
int lambda_b; |
|
|
int comp_lambda_b; |
|
|
int lambda_t; |
|
|
int mu_t; |
|
|
int max_gain; |
|
|
int inv_max_gain; |
|
|
|
|
|
int smooth_scaling[2 * 4 + 3]; |
|
|
}; |
|
|
typedef struct noise_stats_s noise_stats_t; |
|
|
struct htk_fe_s |
|
|
{ |
|
|
unsigned short pow_hist[98]; |
|
|
noise_stats_t *noise_stats; |
|
|
|
|
|
|
|
|
int fe_prior; |
|
|
uint32 int_frame; |
|
|
char int_flag; |
|
|
|
|
|
|
|
|
unsigned char sum_cnt; |
|
|
|
|
|
uint8 voice_state; |
|
|
uint8 in_speech; |
|
|
int16 pre_speech_frames; |
|
|
int16 post_speech_frames; |
|
|
|
|
|
int16 inout_spch[410]; |
|
|
int fe_tmp_spch[250]; |
|
|
int fe_fft_buf[512]; |
|
|
int fe_mfspec[40]; |
|
|
int cmn_sum[13]; |
|
|
int mfcc_fifo[9][13]; |
|
|
int *cmn_mfcc_fifo[9]; |
|
|
//int fe_prior; |
|
|
char speech_fifo[16]; |
|
|
char init_speech_n; |
|
|
char vad_cnt; |
|
|
int buf_cep[21][13]; |
|
|
int buf_pow[21]; |
|
|
uint8 speech_indexw; |
|
|
uint8 speech_indexr; |
|
|
uint8 speech_cnt; |
|
|
|
|
|
|
|
|
int16 sp_threshold; |
|
|
int16 nsp_threshold; |
|
|
}; |
|
|
|
|
|
typedef struct htk_fe_s htk_fe_t; |
|
|
/* |
|
|
* How to use? See follow... |
|
|
* |
|
|
* 1、开启录音前: |
|
|
* /--- 开启Vad ---/ |
|
|
* int need_buf_size = vad_get_need_buf_size(); |
|
|
* vad_hdl = malloc(need_buf_size); |
|
|
* vad_init(vad_hdl, 10, 51); |
|
|
* |
|
|
* 2、结束录音后: |
|
|
* /--- 关闭Vad ---/ |
|
|
* free(vad_hdl); |
|
|
* |
|
|
* 3、录音数据回调: |
|
|
* buf:录音数据 ,需要320Bytes! |
|
|
* ret:0->静音状态 1->正在说话 2->结束说话 |
|
|
* int ret = vad_main(vad_hdl, (int*)buf); |
|
|
*/ |
|
|
int vad_get_need_buf_size(void); |
|
|
int vad_init(void *p, short sp_threshold, short nsp_threshold); |
|
|
int vad_main(void *p,int *inout_spch); |
|
|
|
|
|
fixed32 fe_log_add(fixed32 x, fixed32 y); |
|
|
fixed32 fe_log_sub(fixed32 x, fixed32 y); |
|
|
int32 fixlog2(uint32 x); |
|
|
int fixlog(uint32 x); |
|
|
|
|
|
#endif /* JL_vad_h */
|
|
|
|