You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

144 lines
3.0 KiB

//
// JL_vad.h
// Test
//
// Created by DFung on 2018/2/28.
// Copyright © 2018年 DFung. All rights reserved.
//
#ifndef JL_vad_h
#define JL_vad_h
#define CONT_AD_ADFRMSIZE 16
#define DEFAULT_RADIX 12
#define SMOOTH_WINDOW 4
#define MIN_FIXLOG -2829416
#define KWSNUM 1 //¥˝ ∂±¥ ∏ˆ ˝
//typedef long long __int64;
typedef int int32;
typedef short int16;
typedef signed char int8;
typedef unsigned int uint32;
typedef unsigned short uint16;
typedef unsigned char uint8;
typedef long long int64;
typedef int32 fixed32;
typedef float float32;
struct noise_stats_s {
/* Smoothed power */
int power[40];
int max_power[40];
/* Noise estimate */
int noise[40];
int max_noise[40];
/* Signal floor estimate */
int floor[40];
int max_floor[40];
/* Peak for temporal masking */
int peak[40];
int max_peak[40];
int signal[40];
int gain[40];
char remove_noi;
/* Initialize it next time */
uint8 undefined;
/* Number of items to process */
uint32 num_filters;
/* Sum of slow peaks for VAD */
int64 slow_peak_sum;
int64 max_slow_peak_sum;
/* Precomputed constants */
int lambda_power;
int comp_lambda_power;
int lambda_a;
int comp_lambda_a;
int lambda_b;
int comp_lambda_b;
int lambda_t;
int mu_t;
int max_gain;
int inv_max_gain;
int smooth_scaling[2 * 4 + 3];
};
typedef struct noise_stats_s noise_stats_t;
struct htk_fe_s
{
unsigned short pow_hist[98];
noise_stats_t *noise_stats;
int fe_prior;
uint32 int_frame;
char int_flag;
unsigned char sum_cnt;
uint8 voice_state;
uint8 in_speech;
int16 pre_speech_frames;
int16 post_speech_frames;
int16 inout_spch[410];
int fe_tmp_spch[250];
int fe_fft_buf[512];
int fe_mfspec[40];
int cmn_sum[13];
int mfcc_fifo[9][13];
int *cmn_mfcc_fifo[9];
//int fe_prior;
char speech_fifo[16];
char init_speech_n;
char vad_cnt;
int buf_cep[21][13];
int buf_pow[21];
uint8 speech_indexw;
uint8 speech_indexr;
uint8 speech_cnt;
int16 sp_threshold;
int16 nsp_threshold;
};
typedef struct htk_fe_s htk_fe_t;
/*
* How to use? See follow...
*
* 1、开启录音前:
* /--- 开启Vad ---/
* int need_buf_size = vad_get_need_buf_size();
* vad_hdl = malloc(need_buf_size);
* vad_init(vad_hdl, 10, 51);
*
* 2、结束录音后:
* /--- 关闭Vad ---/
* free(vad_hdl);
*
* 3、录音数据回调:
* buf:录音数据 ,需要320Bytes!
* ret:0->静音状态 1->正在说话 2->结束说话
* int ret = vad_main(vad_hdl, (int*)buf);
*/
int vad_get_need_buf_size(void);
int vad_init(void *p, short sp_threshold, short nsp_threshold);
int vad_main(void *p,int *inout_spch);
fixed32 fe_log_add(fixed32 x, fixed32 y);
fixed32 fe_log_sub(fixed32 x, fixed32 y);
int32 fixlog2(uint32 x);
int fixlog(uint32 x);
#endif /* JL_vad_h */