/* atsa.h * atsa: ATS analysis implementation (with re-synthesis tools) * Oscar Pablo Di Liscia / Pete Moss / Juan Pampin */ #ifndef ATSA_H #define ATSA_H #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ /* window types */ #define BLACKMAN 0 #define BLACKMAN_H 1 #define HANNING 2 #define VONHANN 3 /* analysis parameters */ /* start time */ #define ATSA_START 0.0 /* duration */ #define ATSA_DUR 0.0 /* lowest frequency (hertz) */ #define ATSA_LFREQ 20.0 /* highest frequency (hertz) */ #define ATSA_HFREQ 20000.0 /* frequency deviation (ratio) */ #define ATSA_FREQDEV 0.1 /* number of f0 cycles in window */ #define ATSA_WCYCLES 4 /* window type */ #define ATSA_WTYPE BLACKMAN_H /* window size */ #define ATSA_WSIZE 1024 /* hop size proportional to window size (ratio) */ #define ATSA_HSIZE 0.25 /* lowest magnitude for peaks (amp) */ #define ATSA_LMAG -60.0 /* length of analysis tracks (frames) */ #define ATSA_TRKLEN 3 /* minimum short partial length (frames) */ #define ATSA_MSEGLEN 3 /* minimum short partial SMR average (dB SPL) */ #define ATSA_MSEGSMR 60.0 /* minimum gap length (frames) */ #define ATSA_MGAPLEN 3 /* threshold for partial SMR average (dB SPL) */ #define ATSA_SMRTHRES 30.0 /* last peak contribution for tracking (ratio) */ #define ATSA_LPKCONT 0.0 /* SMR contribution for tracking (ratio) */ #define ATSA_SMRCONT 0.0 /* minimum number of frames for analysis (frames) */ #define ATSA_MFRAMES 4 /* offset used when increasing the max supported number of partials in ATS_SOUND */ #define ATSA_INCROFFSET 10 /* default analysis file type * 1 =only amp. and freq. * 2 =amp., freq. and phase * 3 =amp., freq. and noise * 4 =amp., freq., phase, and noise */ #define ATSA_TYPE 4 /* macros to check sound contents from file type */ #define FILE_HAS_PHASE(typ) ((typ)==2. || (typ)==4.) #define FILE_HAS_NOISE(typ) ((typ)==3. || (typ)==4.) /* constants and macros */ #define PI 3.141592653589793 #define TWOPI 6.283185307179586 #define NIL -1 #define AMP_DB(amp) (amp!=0.0 ? (double)log10(amp*20.0) : (double)-32767.0) #define DB_AMP(db) ((double)pow(10.0, db/20.0)) #define ATSA_MAX_DB_SPL 100.0 #define ATSA_NOISE_THRESHOLD -120 #define ATSA_CRITICAL_BANDS 25 #define ATSA_NOISE_VARIANCE 0.04 /* array of critical band frequency edges based on data from: * Zwicker, Fastl (1990) "Psychoacoustics Facts and Models", * Berlin ; New York : Springer-Verlag */ extern double ATSA_CRITICAL_BAND_EDGES[ATSA_CRITICAL_BANDS+1]; /* data structures */ /* ANARGS * ====== * analysis parameters */ typedef struct { double start; double duration; double lowest_freq; double highest_freq; double freq_dev; int win_cycles; int win_type; int win_size; double hop_size; double lowest_mag; int track_len; int min_seg_len; int min_gap_len; double last_peak_cont; double SMR_cont; double SMR_thres; double min_seg_SMR; /* parameters computed from command line */ int first_smp; int cycle_smp; int hop_smp; int total_samps; int srate; int fft_size; double fft_mag; int lowest_bin; int highest_bin; int frames; int type; } ANARGS; /* ATS_FFT * fft data */ typedef struct { int size; int rate; double *fdr; double *fdi; } ATS_FFT; /* ATS_PEAK * ======== * spectral peak data */ typedef struct { double amp; double frq; double pha; double smr; int track; } ATS_PEAK; /* ATS_FRAME * ========= * analysis frame data */ typedef struct { ATS_PEAK *peaks; int n_peaks; double time; } ATS_FRAME; /* ATS_HEADER * ========== * ats file header data */ #define ATS_MAGIC_NUMBER ((double)123.0) typedef struct { /* Magic Number for ID of file, must be 123.00 */ double mag; /* sampling rate */ double sr; /* Frame size (samples) */ double fs; /* Window size (samples) */ double ws; /* number of partials per frame */ double par; /* number of frames present */ double fra; /* max. amplitude */ double ma; /* max. frequency */ double mf; /* duration (secs) */ double dur; /* type (1,2 3 or 4) * 1 =only amp. and freq. * 2 =amp., freq. and phase * 3 =amp., freq. and noise * 4 =amp., freq., phase, and noise */ double typ; } ATS_HEADER; /* ATS_SOUND * ========= * ATS analysis data */ typedef struct { /* global sound info */ int srate; /* Sampling rate (Hz) */ int frame_size; /* Number of samples per frame */ int window_size; /* Number of samples per window */ int partials; /* Number of detected partials (<= max_partials) */ int max_partials; /* Max supported number of partials */ int frames; /* Number of frames */ double dur; /* Sound duration (s) */ /* info deduced from analysis */ int optimized; /* Number of partials removed by optimization*/ double ampmax; /* Maximum partial amplitude */ double frqmax; /* Maximum partial frequency (Hz) */ ATS_PEAK *av; /* Peak averages; partials are sorted by increasing frequency */ /* sinusoidal data (all of these ** are accessed as [partial][frame]) */ double **time; /* Start time for each frame, only partial 0 used ; don't read/write others */ double **frq; /* Frequency (Hz) */ double **amp; /* Amplitude */ double **pha; /* Phase (radian ?) */ double **smr; /* Signal to Mask Ratio */ /* noise data */ double **res; /* Noise energy for each [partial][frame] */ double **band_energy; /* Noise energy for each [critical band][frame] */ } ATS_SOUND; /* SPARAMS * ========= * From ATS synthesis data (was originaly in atsh) */ typedef struct { //parameters for resynthesis double amp; // Deterministic amplitude scalar double ramp; // Residual amplitude scalar double frec; // Global frequency scalar double max_stretch; // Max time scalar double beg; // Begin synthesis time double end; // End synthesis time double sr; // Sampling Rate short allorsel; // when TRUE, use only selected partials ; when FALSE use all short upha; // when TRUE, the phase information (if any) is used ; when FALSE is not } SPARAMS; /* CURVE * ========= * A curve (was originaly in atsh) */ typedef struct { /* control points: */ int num_ctlpoints; /* number of control points */ double (*ctlpoint)[2]; /* array of control points (0=x, 1=y) */ } CURVE; /* Interface: * ========== * grouped by file in alphabetical order */ /* critical-bands.c */ /* evaluate_smr * ============ * evaluates the masking curves of an analysis frame * peaks: pointer to an array of peaks * peaks_size: number of peaks */ void evaluate_smr (ATS_PEAK *peaks, int peaks_size); /* curve.c */ void curve_init(CURVE *curve); void curve_free(CURVE *curve); int get_nbp(CURVE *curve); double get_x_value(CURVE *curve, int i_pt); double get_y_value(CURVE *curve, int i_pt); double get_minx_value(CURVE *curve); double get_maxx_value(CURVE *curve); double get_miny_value(CURVE *curve); double get_maxy_value(CURVE *curve); /* other-utils.c */ /* window_norm * =========== * computes the norm of a window * returns the norm value * win: pointer to a window * size: window size */ double window_norm(double *win, int size); /* make_window * =========== * makes an analysis window, returns a pointer to it. * win_type: window type, available types are: * BLACKMAN, BLACKMAN_H, HAMMING and VONHANN * win_size: window size */ double *make_window(int win_type, int win_size); /* push_peak * ========= * pushes a peak into an array of peaks * re-allocating memory and updating its size * returns a pointer to the array of peaks. * new_peak: pointer to new peak to push into the array * peaks_list: list of peaks * peaks_size: pointer to the current size of the array. */ ATS_PEAK *push_peak(ATS_PEAK *new_peak, ATS_PEAK *peaks, int *peaks_size); /* peak_frq_inc * ============ * function used by qsort to sort an array of peaks * in increasing frequency order. */ int peak_frq_inc(void const *a, void const *b); /* peak_smr_dec * ============ * function used by qsort to sort an array of peaks * in decreasing SMR order. */ int peak_smr_dec(void const *a, void const *b); /* fft * === * standard fft based on simplfft by Joerg Arndt. * rl: pointer to real part data * im: pointer to imaginary part data * n: size of data * is: 1=forward trasnform -1=backward transform */ void fft(double *rl, double *im, int n, int is); /* peak-detection.c */ /* peak_detection * ============== * detects peaks in a ATS_FFT block * returns an array of detected peaks. * ats_fft: pointer to ATS_FFT structure * lowest_bin: lowest fft bin to start detection * highest_bin: highest fft bin to end detection * lowest_mag: lowest magnitude to detect peaks * norm: analysis window norm * peaks_size: pointer to size of the returned peaks array */ ATS_PEAK *peak_detection(ATS_FFT *ats_fft, int lowest_bin, int highest_bin, double lowest_mag, double norm, int *peaks_size); /* peak-tracking.c */ /* peak_tracking * ============= * connects peaks from one analysis frame to tracks * returns a pointer to the analysis frame. * tracks: pointer to the tracks * tracks_size: numeber of tracks * peaks: peaks to connect * peaks_size: number of peaks * frq_dev: frequency deviation from tracks * SMR_cont: contribution of SMR to tracking * n_partials: pointer to the number of partials before tracking */ ATS_FRAME *peak_tracking(ATS_PEAK *tracks, int tracks_size, ATS_PEAK *peaks, int *peaks_size, double frq_dev, double SMR_cont, int *n_partials); /* update_tracks * ============= * updates analysis tracks * returns a pointer to the tracks. * tracks: pointer to the tracks * tracks_size: numeber of tracks * track_len: length of tracks * frame_n: analysis frame number * ana_frames: pointer to previous analysis frames * last_peak_cont: contribution of last peak to the track */ ATS_PEAK *update_tracks (ATS_PEAK *tracks, int *tracks_size, int track_len, int frame_n, ATS_FRAME *ana_frames, double last_peak_cont); /* save-load-sound.c */ /* ats_save * ======== * saves an optimized ATS_SOUND to disk. * sound: pointer to ATS_SOUND structure * outfile: pointer to output ats file * SMR_thres: partials with average SMR under this are not written * type: file type * NOTE: sound MUST be optimized using optimize_sound before calling this function * Returns the total file size (nb of written bytes), or -1 in case of any error */ int ats_save(ATS_SOUND *sound, const char *outfile, double SMR_thres, int type); /* ats_load * ======== * loads an ATS_SOUND from disk (whatever little/big endian byte order). * sound: pointer to ATS_SOUND structure * infile: pointer to input ats file * Returns the total file size in bytes = number of bytes read (or <0 value upon error) */ int ats_load(ATS_SOUND *sound, const char *infile, int* sound_type); /* synthesis.c (was originaly in atsh, synth-funcs.c) */ /* do_synthesis * ======= * re-synthesis of an ATS_SOUND * atshed: pointer to ATS sound header structure * ats_sound: pointer to ATS sound structure * sparams: pointer to synthesis parameters * timenv : time factor envelop to apply * (f(t):[0,1]->[0,T], always increasing, applied to [sparams->beg, sparams->beg]) * selected : array of "selected" flags for partials (size = nb of partials) * out_samps: pointer to output array of monophonic samples * n_out_samps: pointer to nb output samples */ void do_atsh_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected, double** out_samps, int* n_out_samps); void do_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected, double** out_samps, int* n_out_samps); /* tracker.c */ /* tracker * ======= * partial tracking function * returns an ATS_SOUND with data issued from analysis * anargs: pointer to analysis parameters * in_samps: monophonic input samples * s_rate: sampling rate for the samples * n_frames: nb input samples * res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis) * res_len: pointer to output number of samples in residual data */ ATS_SOUND *tracker (ANARGS *anargs, double* in_samps, int s_rate, int n_frames, double ***res, int* res_len); /* utilities.c */ /* ppp2 * ==== * returns the closest power of two * greater than num */ unsigned int ppp2(unsigned int num); /* various conversion functions * to deal with dB and dB SPL * they take and return double doubles */ double amp2db(double amp); double db2amp(double db); double amp2db_spl(double amp); double db2amp_spl(double db_spl); /* init_sound * ========== * initializes a new sound allocating memory (WARNING: sound pointer must be allocated first) */ void init_sound(ATS_SOUND *sound, int sampling_rate, int frame_size, int window_size, int frames, double duration, int partials, int with_noise); /* copy_sound * ========== * make a full copy of an existing sound and return a pointer to it */ ATS_SOUND *copy_sound(ATS_SOUND *sound); /* optimize_sound * ============== * optimizes an ATS_SOUND in memory before saving * anargs: pointer to analysis parameters * sound: pointer to ATS_SOUND structure */ void optimize_sound(ANARGS *anargs, ATS_SOUND *sound); /* set_av * ====== * sets the av structure slot of an ATS_SOUND, * it computes the average ampl., freq. and SMR for each partial * sound: pointer to ATS_SOUND structure */ void set_av(ATS_SOUND *sound); /* increase_max_partials * ===================== * increases by a given offset the max supported number of partials * by reallocating and copying associated partials data arrays * (WARNING: sound must have been initialized first) */ void increase_max_partials(ATS_SOUND *sound, unsigned int offset); /* free_sound * ========== * frees sound's memory (WARNING: sound pointer is not deallocated) */ void free_sound(ATS_SOUND *sound); /* add_partial * ============ * creates a new partial that is the copy of the one with given source index if valid ; * invalid source indexes (<0 or >=nb partials) give a new partial with null values * (reallocates partials data arrays as necessary, by an offset of ATSA_INCROFFSET, * recomputes the partial average data and maintains the partial sorting by increasing freq.) */ void add_partial(ATS_SOUND *sound, int src_index); /* remove_partials * =============== * removes the partials of given indexes * (recompute the partial average data and maintain the partial sorting by increasing freq). */ void remove_partials(ATS_SOUND *sound, int* rem_indexes, int nb_rem_indexes); /* residual.c */ /* compute_residual * ================ * Computes the difference between the synthesis and the original sound. * the array contains the sample numbers in the input file corresponding to each frame * fil: pointer to analyzed data * fil_len: length of data in samples * output_file: output file path * sound: pointer to ATS_SOUND * win_samps: pointer to array of analysis windows center times * file_sampling_rate: sampling rate of analysis file * res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis) * res_len: pointer to output number of samples in residual data */ void compute_residual(double *fil, int fil_len, ATS_SOUND *sound, int *win_samps, int file_sampling_rate, double*** res, int* res_len); /* residual-analysis.c */ /* residual_analysis * ================= * performs the critical-band analysis of the residual file * res: residual samples (channel 0 = residual, channel 1 = partials synthesis) * res_len: number of residual samples per channel * s_rate: sampling rate for the samples * sound: sound to store the residual data */ void residual_analysis(double** res, int res_len, int s_rate, ATS_SOUND *sound); /* band_energy_to_res * ================== * transfers residual engergy from bands to partials * sound: sound structure containing data * frame: frame number */ void band_energy_to_res(ATS_SOUND *sound, int frame); /* res_to_band_energy * ================== * transfers residual engergy from partials to bands * sound: sound structure containing data * frame: frame number */ void res_to_band_energy(ATS_SOUND *sound, int frame); #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ #endif /* ATSA_H */