You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
225 lines
7.9 KiB
225 lines
7.9 KiB
/*************************************************************************/ |
|
/* */ |
|
/* Language Technologies Institute */ |
|
/* Carnegie Mellon University */ |
|
/* Copyright (c) 2001 */ |
|
/* All Rights Reserved. */ |
|
/* */ |
|
/* Permission is hereby granted, free of charge, to use and distribute */ |
|
/* this software and its documentation without restriction, including */ |
|
/* without limitation the rights to use, copy, modify, merge, publish, */ |
|
/* distribute, sublicense, and/or sell copies of this work, and to */ |
|
/* permit persons to whom this work is furnished to do so, subject to */ |
|
/* the following conditions: */ |
|
/* 1. The code must retain the above copyright notice, this list of */ |
|
/* conditions and the following disclaimer. */ |
|
/* 2. Any modifications must be clearly marked as such. */ |
|
/* 3. Original authors' names are not deleted. */ |
|
/* 4. The authors' names are not used to endorse or promote products */ |
|
/* derived from this software without specific prior written */ |
|
/* permission. */ |
|
/* */ |
|
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ |
|
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ |
|
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ |
|
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ |
|
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ |
|
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ |
|
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ |
|
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ |
|
/* THIS SOFTWARE. */ |
|
/* */ |
|
/*************************************************************************/ |
|
/* Author: Alan W Black (awb@cs.cmu.edu) */ |
|
/* Date: January 2001 */ |
|
/*************************************************************************/ |
|
/* */ |
|
/* An F0 model */ |
|
/* This is derived fromthe f2b model freely distributed in Festival */ |
|
/* */ |
|
/*************************************************************************/ |
|
|
|
#include "cst_hrg.h" |
|
#include "cst_phoneset.h" |
|
#include "us_f0.h" |
|
|
|
static void apply_lr_model(cst_item *s, |
|
const us_f0_lr_term *f0_lr_terms, |
|
float *start, |
|
float *mid, |
|
float *end) |
|
{ |
|
int i; |
|
const cst_val *v=0; |
|
float fv; |
|
|
|
/* Interceptors */ |
|
*start = f0_lr_terms[0].start; |
|
*mid = f0_lr_terms[0].mid; |
|
*end = f0_lr_terms[0].end; |
|
for (i=1; f0_lr_terms[i].feature; i++) |
|
{ |
|
if (!cst_streq(f0_lr_terms[i].feature,f0_lr_terms[i-1].feature)) |
|
v = ffeature(s,f0_lr_terms[i].feature); |
|
if (f0_lr_terms[i].type) |
|
{ |
|
if (cst_streq(val_string(v),f0_lr_terms[i].type)) |
|
fv = 1.0; |
|
else |
|
fv = 0.0; |
|
} |
|
else |
|
fv = val_float(v); |
|
(*start) += fv*f0_lr_terms[i].start; |
|
(*mid) += fv*f0_lr_terms[i].mid; |
|
(*end) += fv*f0_lr_terms[i].end; |
|
/* printf("f %s start %f mid %f end %f\n", |
|
f0_lr_terms[i].feature, |
|
*start,*mid,*end); */ |
|
} |
|
} |
|
|
|
static void add_target_point(cst_relation *targ,float pos, float f0) |
|
{ |
|
cst_item *t; |
|
|
|
/* printf("target %f at %f\n",f0,pos); */ |
|
t = relation_append(targ,NULL); |
|
item_set_float(t,"pos",pos); |
|
/* them there can sometimes do silly things, so guard for that */ |
|
if (f0 > 500.0) |
|
item_set_float(t,"f0",500.0); |
|
else if (f0 < 50.0) |
|
item_set_float(t,"f0",50.0); |
|
else |
|
item_set_float(t,"f0",f0); |
|
} |
|
|
|
/* model mean and stddev take from f2b/kal_diphone */ |
|
#define model_mean 170.0 |
|
#define model_stddev 34 |
|
#define map_f0(v,m,s) ((((v-model_mean)/model_stddev)*s)+m) |
|
|
|
static int post_break(cst_item *syl) |
|
{ |
|
if ((item_prev(syl) == 0) || |
|
(cst_streq("pau", |
|
ffeature_string(syl, |
|
"R:SylStructure.daughter.R:Segment.p.name")))) |
|
return TRUE; |
|
else |
|
return FALSE; |
|
} |
|
|
|
static int pre_break(cst_item *syl) |
|
{ |
|
if ((item_next(syl) == 0) || |
|
(cst_streq("pau", |
|
ffeature_string(syl, |
|
"R:SylStructure.daughtern.R:Segment.n.name")))) |
|
return TRUE; |
|
else |
|
return FALSE; |
|
} |
|
|
|
static float vowel_mid(cst_item *syl) |
|
{ |
|
/* return time point mid way in vowel in this syl */ |
|
cst_item *s; |
|
cst_item *ts; |
|
const cst_phoneset *ps = item_phoneset(syl); |
|
|
|
ts = item_daughter(item_as(syl,"SylStructure")); |
|
for (s=ts; s; s = item_next(s)) |
|
{ |
|
if (cst_streq("+", phone_feature_string(ps,item_feat_string(s,"name"), |
|
"vc"))) |
|
{ |
|
return (item_feat_float(s,"end")+ |
|
ffeature_float(s,"R:Segment.p.end"))/2.0; |
|
} |
|
} |
|
|
|
/* no segments, shouldn't happen */ |
|
if (ts == 0) |
|
return 0; |
|
|
|
/* no vowel in syllable, shouldn't happen */ |
|
return (item_feat_float(ts,"end")+ |
|
ffeature_float(ts,"R:Segment.p.end"))/2.0; |
|
} |
|
|
|
cst_utterance *us_f0_model(cst_utterance *u) |
|
{ |
|
/* F0 target model: Black and Hunt ICSLP96, three points per syl */ |
|
cst_item *syl, *t, *nt; |
|
cst_relation *targ_rel; |
|
float mean, stddev, local_mean, local_stddev; |
|
float start, mid, end, lend; |
|
float seg_end; |
|
|
|
if (feat_present(u->features,"no_f0_target_model")) |
|
return u; |
|
|
|
targ_rel = utt_relation_create(u,"Target"); |
|
mean = get_param_float(u->features,"int_f0_target_mean", 100.0); |
|
mean *= get_param_float(u->features,"f0_shift", 1.0); |
|
stddev = get_param_float(u->features,"int_f0_target_stddev", 12.0); |
|
|
|
lend = 0; |
|
for (syl=relation_head(utt_relation(u,"Syllable")); |
|
syl; |
|
syl = item_next(syl)) |
|
|
|
{ |
|
/* printf("word %s, accent %s endtone %s\n", |
|
ffeature_string(syl,"R:SylStructure.parent.name"), |
|
ffeature_string(syl,"accent"), |
|
ffeature_string(syl,"endtone")); */ |
|
if (!item_daughter(item_as(syl,"SylStructure"))) |
|
continue; /* no segs in syl */ |
|
|
|
local_mean = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_shift"); |
|
if (local_mean) |
|
local_mean *= mean; |
|
else |
|
local_mean = mean; |
|
local_stddev = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_range"); |
|
if (local_stddev == 0.0) |
|
local_stddev = stddev; |
|
|
|
apply_lr_model(syl,f0_lr_terms,&start,&mid,&end); |
|
if (post_break(syl)) |
|
lend = map_f0(start,local_mean,local_stddev); |
|
add_target_point(targ_rel, |
|
ffeature_float(syl, |
|
"R:SylStructure.daughter.R:Segment.p.end"), |
|
map_f0((start+lend)/2.0,local_mean,local_stddev)); |
|
add_target_point(targ_rel, |
|
vowel_mid(syl), |
|
map_f0(mid,local_mean,local_stddev)); |
|
lend = map_f0(end,local_mean,local_stddev); |
|
if (pre_break(syl)) |
|
add_target_point(targ_rel, |
|
ffeature_float(syl,"R:SylStructure.daughtern.end"), |
|
map_f0(end,local_mean,local_stddev)); |
|
} |
|
|
|
/* Guarantee targets go from start to end of utterance */ |
|
t = relation_head(targ_rel); |
|
if (t == 0) |
|
add_target_point(targ_rel,0,mean); |
|
else if (item_feat_float(t,"pos") > 0) |
|
{ |
|
nt = item_prepend(t,NULL); |
|
item_set_float(nt,"pos",0.0); |
|
item_set_float(nt,"f0",item_feat_float(t,"f0")); |
|
} |
|
|
|
t = relation_tail(targ_rel); |
|
seg_end = item_feat_float(relation_tail(utt_relation(u,"Segment")),"end"); |
|
if (item_feat_float(t,"pos") < seg_end) |
|
add_target_point(targ_rel,seg_end,item_feat_float(t,"f0")); |
|
|
|
return u; |
|
}
|
|
|