37#include "EST_speech_class.h"
38#include "sigpr/EST_sigpr_utt.h"
39#include "sigpr/EST_filter.h"
42#include "EST_string_aux.h"
63 EST_error(
"Unknown pda %s\n", (
const char *)
method);
80 EST_error(
"Unknown pda %s\n", (
const char *)
method);
92 if (op.
I(
"do_low_pass",0))
93 FIRlowpass_filter(
sig, op.
I(
"lpf_cutoff"),op.
I(
"lpf_order"));
127 fz.set_equal_space(
true);
129 fz.set_channel_name(
"F0", 0);
133 if (!
fz.equal_space())
134 EST_error(
"Pitch tracking algorithm must have equal spaced track\n");
140 for (i = 0; i <
cc.size;
cc.coeff[i++] = 0.0);
184 srpd->L = DEFAULT_DECIMATION;
185 srpd->min_pitch = DEFAULT_MIN_PITCH;
186 srpd->max_pitch = DEFAULT_MAX_PITCH;
187 srpd->shift = DEFAULT_SHIFT;
188 srpd->length = DEFAULT_LENGTH;
189 srpd->Tsilent = DEFAULT_TSILENT;
190 srpd->Tmin = DEFAULT_TMIN;
191 srpd->Tmax_ratio = DEFAULT_TMAX_RATIO;
192 srpd->Thigh = DEFAULT_THIGH;
193 srpd->Tdh = DEFAULT_TDH;
194 srpd->make_ascii = 0;
195 srpd->peak_tracking = 0;
196 srpd->sample_freq = DEFAULT_SF;
203 if (
al.present(
"decimation"))
204 srpd->L =
al.I(
"decimation");
205 if (
al.present(
"min_pitch"))
206 srpd->min_pitch =
al.F(
"min_pitch");
207 if (
al.present(
"max_pitch"))
208 srpd->max_pitch =
al.F(
"max_pitch");
209 if (
al.present(
"pda_frame_shift"))
210 srpd->shift =
al.F(
"pda_frame_shift") * 1000.0;
211 if (
al.present(
"pda_frame_length"))
212 srpd->length =
al.F(
"pda_frame_length") * 1000.0;
213 if (
al.present(
"noise_floor"))
214 srpd->Tsilent =
al.I(
"noise_floor");
215 if (
al.present(
"v2uv_coeff_thresh"))
216 srpd->Thigh =
al.F(
"v2uv_coef_thresh");
217 if (
al.present(
"min_v2uv_coef_thresh"))
218 srpd->Tmin =
al.F(
"min_v2uv_coef_thresh");
219 if (
al.present(
"v2uv_coef_thresh_ratio"))
220 srpd->Tmax_ratio =
al.F(
"v2uv_coef_thresh_ratio");
221 if (
al.present(
"anti_doubling_thresh"))
222 srpd->Tdh =
al.F(
"anti_doubling_thresh");
223 if (
al.present(
"peak_tracking"))
224 srpd->peak_tracking =
al.I(
"peak_tracking");
225 if (
al.present(
"sample_frequency"))
226 srpd->sample_freq =
al.I(
"sample_frequency");
231 al.set(
"min_pitch",
"40.0");
232 al.set(
"max_pitch",
"400.0");
233 al.set(
"pda_frame_shift",
"0.005");
234 al.set(
"pda_frame_length", DEFAULT_LENGTH / 1000.0);
235 al.set(
"lpf_cutoff",
"600");
236 al.set(
"lpf_order",
"49");
237 al.set(
"f0_file_type",
"esps");
238 al.set(
"decimation", DEFAULT_DECIMATION);
239 al.set(
"noise_floor", DEFAULT_TSILENT);
240 al.set(
"min_v2uv_coef_thresh", DEFAULT_TMIN);
241 al.set(
"v2uv_coef_thresh_ratio", DEFAULT_TMAX_RATIO);
242 al.set(
"v2uv_coef_thresh", DEFAULT_THIGH);
243 al.set(
"anti_doubling_thresh", DEFAULT_TDH);
244 al.set(
"peak_tracking", 0);
252 "-L Perform low pass filtering on input. This option should always \n"
253 " be used in normal processing as it usually increases \n"
254 " performance considerably\n\n"
255 "-P perform peak tracking\n\n"
256 "-fmin <float> miniumum F0 value. Sets the minimum allowed F0 in \n"
257 " output track. Default is "+ftoString(DEFAULT_MIN_PITCH)+
".\n "
258 " Changing this to suit the speaker usually increases \n"
259 " performance. Typical recommended values are 60-90Hz for\n"
260 " males and 120-150Hz for females\n\n"
261 "-fmax <float> maxiumum F0 value. Sets the maximum allowed F0 in \n"
262 " output track. Default is "+ftoString(DEFAULT_MAX_PITCH)+
". \n"
263 " Changing this to suit the speaker usually increases \n"
264 " performance. Typical recommended values are 200Hz for \n"
265 " males and 300-400Hz for females\n\n"
266 "-shift <float> frame spacing in seconds for fixed frame analysis. \n"
267 " This doesn't have to be the same as the output file spacing - \n"
268 " the -S option can be used to resample the track before saving \n"
269 " default: "+ftoString(DEFAULT_SHIFT/1000.0) +
"\n\n"
270 "-length <float> analysis frame length in seconds.\n"
271 " default: "+ftoString(DEFAULT_LENGTH/1000.0) +
"\n\n"
272 "-lpfilter <int> Low pass filter, with cutoff frequency in Hz \n"
273 " Filtering is performed by a FIR filter which is built at run \n"
274 " time. The order of the filter can be given by -forder. The \n"
275 " default value is 199\n\n"
276 "-forder <int> Order of FIR filter used for lpfilter and \n"
277 " hpfilter. This must be ODD. Sensible values range \n"
278 " from 19 (quick but with a shallow rolloff) to 199 \n"
279 " (slow but with a steep rolloff). The default is 199.\n\n";
287 "-d <float> decimation factor\n"
288 " set down-sampling for quicker computation so that only one in \n"
289 " <parameter>decimation factor</parameter> samples are used in the first instance. \n"
290 " Must be in the range of one to ten inclusive. Default is four. \n"
291 " For data sampled at 10kHz, it is advised that a decimation \n"
292 " factor of two isselected.\n\n"
294 "-n <float> Inoise floor.\n"
295 " Set the maximum absolute signal amplitude that represents \n"
296 " silence to <parameter>Inoise floor</parameter>. If the absolute amplitude of \n"
297 " the first segment in a given frame is below this level at all \n"
298 " times, then the frame is classified as representing silence. \n"
299 " Must be a positive number. Default is 120 ADC units.\n\n"
301 "-H <float> unvoiced to voiced coeff threshold\n"
302 " set the correlation coefficient threshold which must be \n"
303 " exceeded in a transition from an unvoiced classified frame \n"
304 " of speech to a voiced frame as the unvoiced to voiced coeff \n"
305 " threshold. Must be in the range zero to one inclusive. \n"
306 " Default is 0.88.\n\n"
308 "-m <float> min voiced to unvoiced coeff threshold \n"
309 " set the minimum allowed correlation coefficient threshold \n"
310 " which must not be exceeded in a transition from a voiced \n"
311 " classified frame of speech to an unvoiced frame, as \n"
312 " <parameter>min voiced to unvoiced coeff threshold</parameter>. Must be in the \n"
313 " range zero to <parameter>unvoiced to voiced coeff threshold</parameter> \n"
314 " inclusive. Default is 0.75.\n\n"
316 "-R <float> voiced to unvoiced coeff threshold-ratio \n"
317 " set the scaling factor used in determining the correlation\n"
318 " coefficient threshold which must not be exceeded in a voiced \n"
319 " frame to unvoiced frame transition, as <parameter>voiced to unvoiced</parameter> \n"
320 " coeff threshold -ratio. The voiced to unvoiced coefficient \n"
321 " threshold is determined by multiplying this scaling factor \n"
322 " with the maximum cross-correlation coefficient of the \n"
323 " previously voiced frame. If this product is less than \n"
324 " <parameter>min voiced to unvoiced coeff threshold</parameter> then this is used \n"
325 " instead. Must be in the range zero to one inclusive. \n"
326 " Default is 0.85.\n\n"
328 "-t <float> anti pitch doubling/halving threshold\n"
329 " set the threshold used in eliminating (as far as possible) \n"
330 " pitch doubling and pitch halving errors as <parameter>anti pitch \n"
331 " double/halving threshold</parameter>. Must be in the range zero to \n"
332 " one inclusive. Default is 0.77.\n\n";
const EST_String S(const EST_String &path) const
int present(const EST_String &name) const
const int I(const EST_String &path) const