597 lines
16 KiB
C
597 lines
16 KiB
C
/* atsa.h
|
|
* atsa: ATS analysis implementation (with re-synthesis tools)
|
|
* Oscar Pablo Di Liscia / Pete Moss / Juan Pampin
|
|
*/
|
|
|
|
#ifndef ATSA_H
|
|
#define ATSA_H
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif /* __cplusplus */
|
|
|
|
/* window types */
|
|
#define BLACKMAN 0
|
|
#define BLACKMAN_H 1
|
|
#define HANNING 2
|
|
#define VONHANN 3
|
|
|
|
/* analysis parameters */
|
|
|
|
/* start time */
|
|
#define ATSA_START 0.0
|
|
|
|
/* duration */
|
|
#define ATSA_DUR 0.0
|
|
|
|
/* lowest frequency (hertz) */
|
|
#define ATSA_LFREQ 20.0
|
|
|
|
/* highest frequency (hertz) */
|
|
#define ATSA_HFREQ 20000.0
|
|
|
|
/* frequency deviation (ratio) */
|
|
#define ATSA_FREQDEV 0.1
|
|
|
|
/* number of f0 cycles in window */
|
|
#define ATSA_WCYCLES 4
|
|
|
|
/* window type */
|
|
#define ATSA_WTYPE BLACKMAN_H
|
|
|
|
/* window size */
|
|
#define ATSA_WSIZE 1024
|
|
|
|
/* hop size proportional to window size (ratio) */
|
|
#define ATSA_HSIZE 0.25
|
|
|
|
/* lowest magnitude for peaks (amp) */
|
|
#define ATSA_LMAG -60.0
|
|
|
|
/* length of analysis tracks (frames) */
|
|
#define ATSA_TRKLEN 3
|
|
|
|
/* minimum short partial length (frames) */
|
|
#define ATSA_MSEGLEN 3
|
|
|
|
/* minimum short partial SMR average (dB SPL) */
|
|
#define ATSA_MSEGSMR 60.0
|
|
|
|
/* minimum gap length (frames) */
|
|
#define ATSA_MGAPLEN 3
|
|
|
|
/* threshold for partial SMR average (dB SPL) */
|
|
#define ATSA_SMRTHRES 30.0
|
|
|
|
/* last peak contribution for tracking (ratio) */
|
|
#define ATSA_LPKCONT 0.0
|
|
|
|
/* SMR contribution for tracking (ratio) */
|
|
#define ATSA_SMRCONT 0.0
|
|
|
|
/* minimum number of frames for analysis (frames) */
|
|
#define ATSA_MFRAMES 4
|
|
|
|
/* offset used when increasing the max supported number of partials in ATS_SOUND */
|
|
#define ATSA_INCROFFSET 10
|
|
|
|
/* default analysis file type
|
|
* 1 =only amp. and freq.
|
|
* 2 =amp., freq. and phase
|
|
* 3 =amp., freq. and noise
|
|
* 4 =amp., freq., phase, and noise
|
|
*/
|
|
#define ATSA_TYPE 4
|
|
|
|
/* macros to check sound contents from file type */
|
|
#define FILE_HAS_PHASE(typ) ((typ)==2. || (typ)==4.)
|
|
#define FILE_HAS_NOISE(typ) ((typ)==3. || (typ)==4.)
|
|
|
|
/* constants and macros */
|
|
#define PI 3.141592653589793
|
|
#define TWOPI 6.283185307179586
|
|
#define NIL -1
|
|
#define AMP_DB(amp) (amp!=0.0 ? (double)log10(amp*20.0) : (double)-32767.0)
|
|
#define DB_AMP(db) ((double)pow(10.0, db/20.0))
|
|
#define ATSA_MAX_DB_SPL 100.0
|
|
#define ATSA_NOISE_THRESHOLD -120
|
|
#define ATSA_CRITICAL_BANDS 25
|
|
#define ATSA_NOISE_VARIANCE 0.04
|
|
|
|
/* array of critical band frequency edges based on data from:
|
|
* Zwicker, Fastl (1990) "Psychoacoustics Facts and Models",
|
|
* Berlin ; New York : Springer-Verlag
|
|
*/
|
|
extern double ATSA_CRITICAL_BAND_EDGES[ATSA_CRITICAL_BANDS+1];
|
|
|
|
/* data structures */
|
|
|
|
/* ANARGS
|
|
* ======
|
|
* analysis parameters
|
|
*/
|
|
typedef struct {
|
|
double start;
|
|
double duration;
|
|
double lowest_freq;
|
|
double highest_freq;
|
|
double freq_dev;
|
|
int win_cycles;
|
|
int win_type;
|
|
int win_size;
|
|
double hop_size;
|
|
double lowest_mag;
|
|
int track_len;
|
|
int min_seg_len;
|
|
int min_gap_len;
|
|
double last_peak_cont;
|
|
double SMR_cont;
|
|
double SMR_thres;
|
|
double min_seg_SMR;
|
|
/* parameters computed from command line */
|
|
int first_smp;
|
|
int cycle_smp;
|
|
int hop_smp;
|
|
int total_samps;
|
|
int srate;
|
|
int fft_size;
|
|
double fft_mag;
|
|
int lowest_bin;
|
|
int highest_bin;
|
|
int frames;
|
|
int type;
|
|
} ANARGS;
|
|
|
|
/* ATS_FFT
|
|
* fft data
|
|
*/
|
|
typedef struct {
|
|
int size;
|
|
int rate;
|
|
double *fdr;
|
|
double *fdi;
|
|
} ATS_FFT;
|
|
|
|
/* ATS_PEAK
|
|
* ========
|
|
* spectral peak data
|
|
*/
|
|
typedef struct {
|
|
double amp;
|
|
double frq;
|
|
double pha;
|
|
double smr;
|
|
int track;
|
|
} ATS_PEAK;
|
|
|
|
/* ATS_FRAME
|
|
* =========
|
|
* analysis frame data
|
|
*/
|
|
typedef struct {
|
|
ATS_PEAK *peaks;
|
|
int n_peaks;
|
|
double time;
|
|
} ATS_FRAME;
|
|
|
|
/* ATS_HEADER
|
|
* ==========
|
|
* ats file header data
|
|
*/
|
|
#define ATS_MAGIC_NUMBER ((double)123.0)
|
|
|
|
typedef struct {
|
|
|
|
/* Magic Number for ID of file, must be 123.00 */
|
|
double mag;
|
|
|
|
/* sampling rate */
|
|
double sr;
|
|
|
|
/* Frame size (samples) */
|
|
double fs;
|
|
|
|
/* Window size (samples) */
|
|
double ws;
|
|
|
|
/* number of partials per frame */
|
|
double par;
|
|
|
|
/* number of frames present */
|
|
double fra;
|
|
|
|
/* max. amplitude */
|
|
double ma;
|
|
|
|
/* max. frequency */
|
|
double mf;
|
|
|
|
/* duration (secs) */
|
|
double dur;
|
|
|
|
/* type (1,2 3 or 4)
|
|
* 1 =only amp. and freq.
|
|
* 2 =amp., freq. and phase
|
|
* 3 =amp., freq. and noise
|
|
* 4 =amp., freq., phase, and noise
|
|
*/
|
|
double typ;
|
|
|
|
} ATS_HEADER;
|
|
|
|
/* ATS_SOUND
|
|
* =========
|
|
* ATS analysis data
|
|
*/
|
|
typedef struct {
|
|
|
|
/* global sound info */
|
|
int srate; /* Sampling rate (Hz) */
|
|
int frame_size; /* Number of samples per frame */
|
|
int window_size; /* Number of samples per window */
|
|
int partials; /* Number of detected partials (<= max_partials) */
|
|
int max_partials; /* Max supported number of partials */
|
|
int frames; /* Number of frames */
|
|
double dur; /* Sound duration (s) */
|
|
|
|
/* info deduced from analysis */
|
|
int optimized; /* Number of partials removed by optimization*/
|
|
double ampmax; /* Maximum partial amplitude */
|
|
double frqmax; /* Maximum partial frequency (Hz) */
|
|
ATS_PEAK *av; /* Peak averages; partials are sorted by increasing frequency */
|
|
|
|
/* sinusoidal data (all of these ** are accessed as [partial][frame]) */
|
|
double **time; /* Start time for each frame, only partial 0 used ; don't read/write others */
|
|
double **frq; /* Frequency (Hz) */
|
|
double **amp; /* Amplitude */
|
|
double **pha; /* Phase (radian ?) */
|
|
double **smr; /* Signal to Mask Ratio */
|
|
|
|
/* noise data */
|
|
double **res; /* Noise energy for each [partial][frame] */
|
|
double **band_energy; /* Noise energy for each [critical band][frame] */
|
|
|
|
} ATS_SOUND;
|
|
|
|
|
|
/* SPARAMS
|
|
* =========
|
|
* From ATS synthesis data (was originaly in atsh)
|
|
*/
|
|
typedef struct { //parameters for resynthesis
|
|
|
|
double amp; // Deterministic amplitude scalar
|
|
double ramp; // Residual amplitude scalar
|
|
double frec; // Global frequency scalar
|
|
double max_stretch; // Max time scalar
|
|
double beg; // Begin synthesis time
|
|
double end; // End synthesis time
|
|
double sr; // Sampling Rate
|
|
short allorsel; // when TRUE, use only selected partials ; when FALSE use all
|
|
short upha; // when TRUE, the phase information (if any) is used ; when FALSE is not
|
|
} SPARAMS;
|
|
|
|
/* CURVE
|
|
* =========
|
|
* A curve (was originaly in atsh)
|
|
*/
|
|
typedef struct
|
|
{
|
|
/* control points: */
|
|
int num_ctlpoints; /* number of control points */
|
|
double (*ctlpoint)[2]; /* array of control points (0=x, 1=y) */
|
|
} CURVE;
|
|
|
|
|
|
/* Interface:
|
|
* ==========
|
|
* grouped by file in alphabetical order
|
|
*/
|
|
|
|
/* critical-bands.c */
|
|
|
|
/* evaluate_smr
|
|
* ============
|
|
* evaluates the masking curves of an analysis frame
|
|
* peaks: pointer to an array of peaks
|
|
* peaks_size: number of peaks
|
|
*/
|
|
void evaluate_smr (ATS_PEAK *peaks, int peaks_size);
|
|
|
|
/* curve.c */
|
|
void curve_init(CURVE *curve);
|
|
void curve_free(CURVE *curve);
|
|
int get_nbp(CURVE *curve);
|
|
double get_x_value(CURVE *curve, int i_pt);
|
|
double get_y_value(CURVE *curve, int i_pt);
|
|
double get_minx_value(CURVE *curve);
|
|
double get_maxx_value(CURVE *curve);
|
|
double get_miny_value(CURVE *curve);
|
|
double get_maxy_value(CURVE *curve);
|
|
|
|
/* other-utils.c */
|
|
|
|
/* window_norm
|
|
* ===========
|
|
* computes the norm of a window
|
|
* returns the norm value
|
|
* win: pointer to a window
|
|
* size: window size
|
|
*/
|
|
double window_norm(double *win, int size);
|
|
|
|
/* make_window
|
|
* ===========
|
|
* makes an analysis window, returns a pointer to it.
|
|
* win_type: window type, available types are:
|
|
* BLACKMAN, BLACKMAN_H, HAMMING and VONHANN
|
|
* win_size: window size
|
|
*/
|
|
double *make_window(int win_type, int win_size);
|
|
|
|
/* push_peak
|
|
* =========
|
|
* pushes a peak into an array of peaks
|
|
* re-allocating memory and updating its size
|
|
* returns a pointer to the array of peaks.
|
|
* new_peak: pointer to new peak to push into the array
|
|
* peaks_list: list of peaks
|
|
* peaks_size: pointer to the current size of the array.
|
|
*/
|
|
ATS_PEAK *push_peak(ATS_PEAK *new_peak, ATS_PEAK *peaks, int *peaks_size);
|
|
|
|
/* peak_frq_inc
|
|
* ============
|
|
* function used by qsort to sort an array of peaks
|
|
* in increasing frequency order.
|
|
*/
|
|
int peak_frq_inc(void const *a, void const *b);
|
|
|
|
/* peak_smr_dec
|
|
* ============
|
|
* function used by qsort to sort an array of peaks
|
|
* in decreasing SMR order.
|
|
*/
|
|
int peak_smr_dec(void const *a, void const *b);
|
|
|
|
|
|
/* fft
|
|
* ===
|
|
* standard fft based on simplfft by Joerg Arndt.
|
|
* rl: pointer to real part data
|
|
* im: pointer to imaginary part data
|
|
* n: size of data
|
|
* is: 1=forward trasnform -1=backward transform
|
|
*/
|
|
void fft(double *rl, double *im, int n, int is);
|
|
|
|
/* peak-detection.c */
|
|
|
|
/* peak_detection
|
|
* ==============
|
|
* detects peaks in a ATS_FFT block
|
|
* returns an array of detected peaks.
|
|
* ats_fft: pointer to ATS_FFT structure
|
|
* lowest_bin: lowest fft bin to start detection
|
|
* highest_bin: highest fft bin to end detection
|
|
* lowest_mag: lowest magnitude to detect peaks
|
|
* norm: analysis window norm
|
|
* peaks_size: pointer to size of the returned peaks array
|
|
*/
|
|
ATS_PEAK *peak_detection(ATS_FFT *ats_fft, int lowest_bin, int highest_bin, double lowest_mag, double norm, int *peaks_size);
|
|
|
|
/* peak-tracking.c */
|
|
|
|
/* peak_tracking
|
|
* =============
|
|
* connects peaks from one analysis frame to tracks
|
|
* returns a pointer to the analysis frame.
|
|
* tracks: pointer to the tracks
|
|
* tracks_size: numeber of tracks
|
|
* peaks: peaks to connect
|
|
* peaks_size: number of peaks
|
|
* frq_dev: frequency deviation from tracks
|
|
* SMR_cont: contribution of SMR to tracking
|
|
* n_partials: pointer to the number of partials before tracking
|
|
*/
|
|
ATS_FRAME *peak_tracking(ATS_PEAK *tracks, int tracks_size, ATS_PEAK *peaks, int *peaks_size, double frq_dev, double SMR_cont, int *n_partials);
|
|
|
|
/* update_tracks
|
|
* =============
|
|
* updates analysis tracks
|
|
* returns a pointer to the tracks.
|
|
* tracks: pointer to the tracks
|
|
* tracks_size: numeber of tracks
|
|
* track_len: length of tracks
|
|
* frame_n: analysis frame number
|
|
* ana_frames: pointer to previous analysis frames
|
|
* last_peak_cont: contribution of last peak to the track
|
|
*/
|
|
ATS_PEAK *update_tracks (ATS_PEAK *tracks, int *tracks_size, int track_len, int frame_n, ATS_FRAME *ana_frames, double last_peak_cont);
|
|
|
|
/* save-load-sound.c */
|
|
|
|
/* ats_save
|
|
* ========
|
|
* saves an optimized ATS_SOUND to disk.
|
|
* sound: pointer to ATS_SOUND structure
|
|
* outfile: pointer to output ats file
|
|
* SMR_thres: partials with average SMR under this are not written
|
|
* type: file type
|
|
* NOTE: sound MUST be optimized using optimize_sound before calling this function
|
|
* Returns the total file size (nb of written bytes), or -1 in case of any error
|
|
*/
|
|
int ats_save(ATS_SOUND *sound, const char *outfile, double SMR_thres, int type);
|
|
|
|
/* ats_load
|
|
* ========
|
|
* loads an ATS_SOUND from disk (whatever little/big endian byte order).
|
|
* sound: pointer to ATS_SOUND structure
|
|
* infile: pointer to input ats file
|
|
* Returns the total file size in bytes = number of bytes read (or <0 value upon error)
|
|
*/
|
|
int ats_load(ATS_SOUND *sound, const char *infile, int* sound_type);
|
|
|
|
/* synthesis.c (was originaly in atsh, synth-funcs.c) */
|
|
|
|
/* do_synthesis
|
|
* =======
|
|
* re-synthesis of an ATS_SOUND
|
|
* atshed: pointer to ATS sound header structure
|
|
* ats_sound: pointer to ATS sound structure
|
|
* sparams: pointer to synthesis parameters
|
|
* timenv : time factor envelop to apply
|
|
* (f(t):[0,1]->[0,T], always increasing, applied to [sparams->beg, sparams->beg])
|
|
* selected : array of "selected" flags for partials (size = nb of partials)
|
|
* out_samps: pointer to output array of monophonic samples
|
|
* n_out_samps: pointer to nb output samples
|
|
*/
|
|
void do_atsh_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected,
|
|
double** out_samps, int* n_out_samps);
|
|
void do_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected,
|
|
double** out_samps, int* n_out_samps);
|
|
|
|
/* tracker.c */
|
|
|
|
/* tracker
|
|
* =======
|
|
* partial tracking function
|
|
* returns an ATS_SOUND with data issued from analysis
|
|
* anargs: pointer to analysis parameters
|
|
* in_samps: monophonic input samples
|
|
* s_rate: sampling rate for the samples
|
|
* n_frames: nb input samples
|
|
* res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis)
|
|
* res_len: pointer to output number of samples in residual data
|
|
*/
|
|
ATS_SOUND *tracker (ANARGS *anargs, double* in_samps, int s_rate, int n_frames,
|
|
double ***res, int* res_len);
|
|
|
|
/* utilities.c */
|
|
|
|
/* ppp2
|
|
* ====
|
|
* returns the closest power of two
|
|
* greater than num
|
|
*/
|
|
unsigned int ppp2(unsigned int num);
|
|
|
|
/* various conversion functions
|
|
* to deal with dB and dB SPL
|
|
* they take and return double doubles
|
|
*/
|
|
double amp2db(double amp);
|
|
double db2amp(double db);
|
|
double amp2db_spl(double amp);
|
|
double db2amp_spl(double db_spl);
|
|
|
|
/* init_sound
|
|
* ==========
|
|
* initializes a new sound allocating memory (WARNING: sound pointer must be allocated first)
|
|
*/
|
|
void init_sound(ATS_SOUND *sound, int sampling_rate, int frame_size, int window_size, int frames,
|
|
double duration, int partials, int with_noise);
|
|
|
|
/* copy_sound
|
|
* ==========
|
|
* make a full copy of an existing sound and return a pointer to it
|
|
*/
|
|
ATS_SOUND *copy_sound(ATS_SOUND *sound);
|
|
|
|
/* optimize_sound
|
|
* ==============
|
|
* optimizes an ATS_SOUND in memory before saving
|
|
* anargs: pointer to analysis parameters
|
|
* sound: pointer to ATS_SOUND structure
|
|
*/
|
|
void optimize_sound(ANARGS *anargs, ATS_SOUND *sound);
|
|
|
|
/* set_av
|
|
* ======
|
|
* sets the av structure slot of an ATS_SOUND,
|
|
* it computes the average ampl., freq. and SMR for each partial
|
|
* sound: pointer to ATS_SOUND structure
|
|
*/
|
|
void set_av(ATS_SOUND *sound);
|
|
|
|
/* increase_max_partials
|
|
* =====================
|
|
* increases by a given offset the max supported number of partials
|
|
* by reallocating and copying associated partials data arrays
|
|
* (WARNING: sound must have been initialized first)
|
|
*/
|
|
void increase_max_partials(ATS_SOUND *sound, unsigned int offset);
|
|
|
|
/* free_sound
|
|
* ==========
|
|
* frees sound's memory (WARNING: sound pointer is not deallocated)
|
|
*/
|
|
void free_sound(ATS_SOUND *sound);
|
|
|
|
/* add_partial
|
|
* ============
|
|
* creates a new partial that is the copy of the one with given source index if valid ;
|
|
* invalid source indexes (<0 or >=nb partials) give a new partial with null values
|
|
* (reallocates partials data arrays as necessary, by an offset of ATSA_INCROFFSET,
|
|
* recomputes the partial average data and maintains the partial sorting by increasing freq.)
|
|
*/
|
|
void add_partial(ATS_SOUND *sound, int src_index);
|
|
|
|
/* remove_partials
|
|
* ===============
|
|
* removes the partials of given indexes
|
|
* (recompute the partial average data and maintain the partial sorting by increasing freq).
|
|
*/
|
|
void remove_partials(ATS_SOUND *sound, int* rem_indexes, int nb_rem_indexes);
|
|
|
|
/* residual.c */
|
|
|
|
/* compute_residual
|
|
* ================
|
|
* Computes the difference between the synthesis and the original sound.
|
|
* the <win-samps> array contains the sample numbers in the input file corresponding to each frame
|
|
* fil: pointer to analyzed data
|
|
* fil_len: length of data in samples
|
|
* output_file: output file path
|
|
* sound: pointer to ATS_SOUND
|
|
* win_samps: pointer to array of analysis windows center times
|
|
* file_sampling_rate: sampling rate of analysis file
|
|
* res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis)
|
|
* res_len: pointer to output number of samples in residual data
|
|
*/
|
|
void compute_residual(double *fil, int fil_len, ATS_SOUND *sound, int *win_samps, int file_sampling_rate, double*** res, int* res_len);
|
|
|
|
/* residual-analysis.c */
|
|
|
|
/* residual_analysis
|
|
* =================
|
|
* performs the critical-band analysis of the residual file
|
|
* res: residual samples (channel 0 = residual, channel 1 = partials synthesis)
|
|
* res_len: number of residual samples per channel
|
|
* s_rate: sampling rate for the samples
|
|
* sound: sound to store the residual data
|
|
*/
|
|
void residual_analysis(double** res, int res_len, int s_rate, ATS_SOUND *sound);
|
|
|
|
/* band_energy_to_res
|
|
* ==================
|
|
* transfers residual engergy from bands to partials
|
|
* sound: sound structure containing data
|
|
* frame: frame number
|
|
*/
|
|
void band_energy_to_res(ATS_SOUND *sound, int frame);
|
|
|
|
/* res_to_band_energy
|
|
* ==================
|
|
* transfers residual engergy from partials to bands
|
|
* sound: sound structure containing data
|
|
* frame: frame number
|
|
*/
|
|
void res_to_band_energy(ATS_SOUND *sound, int frame);
|
|
|
|
#ifdef __cplusplus
|
|
} /* extern "C" */
|
|
#endif /* __cplusplus */
|
|
|
|
#endif /* ATSA_H */
|