soundeditor/atsa/atsa.h

597 lines
16 KiB
C
Raw Permalink Normal View History

/* atsa.h
* atsa: ATS analysis implementation (with re-synthesis tools)
* Oscar Pablo Di Liscia / Pete Moss / Juan Pampin
*/
#ifndef ATSA_H
#define ATSA_H
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* window types */
#define BLACKMAN 0
#define BLACKMAN_H 1
#define HANNING 2
#define VONHANN 3
/* analysis parameters */
/* start time */
#define ATSA_START 0.0
/* duration */
#define ATSA_DUR 0.0
/* lowest frequency (hertz) */
#define ATSA_LFREQ 20.0
/* highest frequency (hertz) */
#define ATSA_HFREQ 20000.0
/* frequency deviation (ratio) */
#define ATSA_FREQDEV 0.1
/* number of f0 cycles in window */
#define ATSA_WCYCLES 4
/* window type */
#define ATSA_WTYPE BLACKMAN_H
/* window size */
#define ATSA_WSIZE 1024
/* hop size proportional to window size (ratio) */
#define ATSA_HSIZE 0.25
/* lowest magnitude for peaks (amp) */
#define ATSA_LMAG -60.0
/* length of analysis tracks (frames) */
#define ATSA_TRKLEN 3
/* minimum short partial length (frames) */
#define ATSA_MSEGLEN 3
/* minimum short partial SMR average (dB SPL) */
#define ATSA_MSEGSMR 60.0
/* minimum gap length (frames) */
#define ATSA_MGAPLEN 3
/* threshold for partial SMR average (dB SPL) */
#define ATSA_SMRTHRES 30.0
/* last peak contribution for tracking (ratio) */
#define ATSA_LPKCONT 0.0
/* SMR contribution for tracking (ratio) */
#define ATSA_SMRCONT 0.0
/* minimum number of frames for analysis (frames) */
#define ATSA_MFRAMES 4
/* offset used when increasing the max supported number of partials in ATS_SOUND */
#define ATSA_INCROFFSET 10
/* default analysis file type
* 1 =only amp. and freq.
* 2 =amp., freq. and phase
* 3 =amp., freq. and noise
* 4 =amp., freq., phase, and noise
*/
#define ATSA_TYPE 4
/* macros to check sound contents from file type */
#define FILE_HAS_PHASE(typ) ((typ)==2. || (typ)==4.)
#define FILE_HAS_NOISE(typ) ((typ)==3. || (typ)==4.)
/* constants and macros */
#define PI 3.141592653589793
#define TWOPI 6.283185307179586
#define NIL -1
#define AMP_DB(amp) (amp!=0.0 ? (double)log10(amp*20.0) : (double)-32767.0)
#define DB_AMP(db) ((double)pow(10.0, db/20.0))
#define ATSA_MAX_DB_SPL 100.0
#define ATSA_NOISE_THRESHOLD -120
#define ATSA_CRITICAL_BANDS 25
#define ATSA_NOISE_VARIANCE 0.04
/* array of critical band frequency edges based on data from:
* Zwicker, Fastl (1990) "Psychoacoustics Facts and Models",
* Berlin ; New York : Springer-Verlag
*/
extern double ATSA_CRITICAL_BAND_EDGES[ATSA_CRITICAL_BANDS+1];
/* data structures */
/* ANARGS
* ======
* analysis parameters
*/
typedef struct {
double start;
double duration;
double lowest_freq;
double highest_freq;
double freq_dev;
int win_cycles;
int win_type;
int win_size;
double hop_size;
double lowest_mag;
int track_len;
int min_seg_len;
int min_gap_len;
double last_peak_cont;
double SMR_cont;
double SMR_thres;
double min_seg_SMR;
/* parameters computed from command line */
int first_smp;
int cycle_smp;
int hop_smp;
int total_samps;
int srate;
int fft_size;
double fft_mag;
int lowest_bin;
int highest_bin;
int frames;
int type;
} ANARGS;
/* ATS_FFT
* fft data
*/
typedef struct {
int size;
int rate;
double *fdr;
double *fdi;
} ATS_FFT;
/* ATS_PEAK
* ========
* spectral peak data
*/
typedef struct {
double amp;
double frq;
double pha;
double smr;
int track;
} ATS_PEAK;
/* ATS_FRAME
* =========
* analysis frame data
*/
typedef struct {
ATS_PEAK *peaks;
int n_peaks;
double time;
} ATS_FRAME;
/* ATS_HEADER
* ==========
* ats file header data
*/
#define ATS_MAGIC_NUMBER ((double)123.0)
typedef struct {
/* Magic Number for ID of file, must be 123.00 */
double mag;
/* sampling rate */
double sr;
/* Frame size (samples) */
double fs;
/* Window size (samples) */
double ws;
/* number of partials per frame */
double par;
/* number of frames present */
double fra;
/* max. amplitude */
double ma;
/* max. frequency */
double mf;
/* duration (secs) */
double dur;
/* type (1,2 3 or 4)
* 1 =only amp. and freq.
* 2 =amp., freq. and phase
* 3 =amp., freq. and noise
* 4 =amp., freq., phase, and noise
*/
double typ;
} ATS_HEADER;
/* ATS_SOUND
* =========
* ATS analysis data
*/
typedef struct {
/* global sound info */
int srate; /* Sampling rate (Hz) */
int frame_size; /* Number of samples per frame */
int window_size; /* Number of samples per window */
int partials; /* Number of detected partials (<= max_partials) */
int max_partials; /* Max supported number of partials */
int frames; /* Number of frames */
double dur; /* Sound duration (s) */
/* info deduced from analysis */
int optimized; /* Number of partials removed by optimization*/
double ampmax; /* Maximum partial amplitude */
double frqmax; /* Maximum partial frequency (Hz) */
ATS_PEAK *av; /* Peak averages; partials are sorted by increasing frequency */
/* sinusoidal data (all of these ** are accessed as [partial][frame]) */
double **time; /* Start time for each frame, only partial 0 used ; don't read/write others */
double **frq; /* Frequency (Hz) */
double **amp; /* Amplitude */
double **pha; /* Phase (radian ?) */
double **smr; /* Signal to Mask Ratio */
/* noise data */
double **res; /* Noise energy for each [partial][frame] */
double **band_energy; /* Noise energy for each [critical band][frame] */
} ATS_SOUND;
/* SPARAMS
* =========
* From ATS synthesis data (was originaly in atsh)
*/
typedef struct { //parameters for resynthesis
double amp; // Deterministic amplitude scalar
double ramp; // Residual amplitude scalar
double frec; // Global frequency scalar
double max_stretch; // Max time scalar
double beg; // Begin synthesis time
double end; // End synthesis time
double sr; // Sampling Rate
short allorsel; // when TRUE, use only selected partials ; when FALSE use all
short upha; // when TRUE, the phase information (if any) is used ; when FALSE is not
} SPARAMS;
/* CURVE
* =========
* A curve (was originaly in atsh)
*/
typedef struct
{
/* control points: */
int num_ctlpoints; /* number of control points */
double (*ctlpoint)[2]; /* array of control points (0=x, 1=y) */
} CURVE;
/* Interface:
* ==========
* grouped by file in alphabetical order
*/
/* critical-bands.c */
/* evaluate_smr
* ============
* evaluates the masking curves of an analysis frame
* peaks: pointer to an array of peaks
* peaks_size: number of peaks
*/
void evaluate_smr (ATS_PEAK *peaks, int peaks_size);
/* curve.c */
void curve_init(CURVE *curve);
void curve_free(CURVE *curve);
int get_nbp(CURVE *curve);
double get_x_value(CURVE *curve, int i_pt);
double get_y_value(CURVE *curve, int i_pt);
double get_minx_value(CURVE *curve);
double get_maxx_value(CURVE *curve);
double get_miny_value(CURVE *curve);
double get_maxy_value(CURVE *curve);
/* other-utils.c */
/* window_norm
* ===========
* computes the norm of a window
* returns the norm value
* win: pointer to a window
* size: window size
*/
double window_norm(double *win, int size);
/* make_window
* ===========
* makes an analysis window, returns a pointer to it.
* win_type: window type, available types are:
* BLACKMAN, BLACKMAN_H, HAMMING and VONHANN
* win_size: window size
*/
double *make_window(int win_type, int win_size);
/* push_peak
* =========
* pushes a peak into an array of peaks
* re-allocating memory and updating its size
* returns a pointer to the array of peaks.
* new_peak: pointer to new peak to push into the array
* peaks_list: list of peaks
* peaks_size: pointer to the current size of the array.
*/
ATS_PEAK *push_peak(ATS_PEAK *new_peak, ATS_PEAK *peaks, int *peaks_size);
/* peak_frq_inc
* ============
* function used by qsort to sort an array of peaks
* in increasing frequency order.
*/
int peak_frq_inc(void const *a, void const *b);
/* peak_smr_dec
* ============
* function used by qsort to sort an array of peaks
* in decreasing SMR order.
*/
int peak_smr_dec(void const *a, void const *b);
/* fft
* ===
* standard fft based on simplfft by Joerg Arndt.
* rl: pointer to real part data
* im: pointer to imaginary part data
* n: size of data
* is: 1=forward trasnform -1=backward transform
*/
void fft(double *rl, double *im, int n, int is);
/* peak-detection.c */
/* peak_detection
* ==============
* detects peaks in a ATS_FFT block
* returns an array of detected peaks.
* ats_fft: pointer to ATS_FFT structure
* lowest_bin: lowest fft bin to start detection
* highest_bin: highest fft bin to end detection
* lowest_mag: lowest magnitude to detect peaks
* norm: analysis window norm
* peaks_size: pointer to size of the returned peaks array
*/
ATS_PEAK *peak_detection(ATS_FFT *ats_fft, int lowest_bin, int highest_bin, double lowest_mag, double norm, int *peaks_size);
/* peak-tracking.c */
/* peak_tracking
* =============
* connects peaks from one analysis frame to tracks
* returns a pointer to the analysis frame.
* tracks: pointer to the tracks
* tracks_size: numeber of tracks
* peaks: peaks to connect
* peaks_size: number of peaks
* frq_dev: frequency deviation from tracks
* SMR_cont: contribution of SMR to tracking
* n_partials: pointer to the number of partials before tracking
*/
ATS_FRAME *peak_tracking(ATS_PEAK *tracks, int tracks_size, ATS_PEAK *peaks, int *peaks_size, double frq_dev, double SMR_cont, int *n_partials);
/* update_tracks
* =============
* updates analysis tracks
* returns a pointer to the tracks.
* tracks: pointer to the tracks
* tracks_size: numeber of tracks
* track_len: length of tracks
* frame_n: analysis frame number
* ana_frames: pointer to previous analysis frames
* last_peak_cont: contribution of last peak to the track
*/
ATS_PEAK *update_tracks (ATS_PEAK *tracks, int *tracks_size, int track_len, int frame_n, ATS_FRAME *ana_frames, double last_peak_cont);
/* save-load-sound.c */
/* ats_save
* ========
* saves an optimized ATS_SOUND to disk.
* sound: pointer to ATS_SOUND structure
* outfile: pointer to output ats file
* SMR_thres: partials with average SMR under this are not written
* type: file type
* NOTE: sound MUST be optimized using optimize_sound before calling this function
* Returns the total file size (nb of written bytes), or -1 in case of any error
*/
int ats_save(ATS_SOUND *sound, const char *outfile, double SMR_thres, int type);
/* ats_load
* ========
* loads an ATS_SOUND from disk (whatever little/big endian byte order).
* sound: pointer to ATS_SOUND structure
* infile: pointer to input ats file
* Returns the total file size in bytes = number of bytes read (or <0 value upon error)
*/
int ats_load(ATS_SOUND *sound, const char *infile, int* sound_type);
/* synthesis.c (was originaly in atsh, synth-funcs.c) */
/* do_synthesis
* =======
* re-synthesis of an ATS_SOUND
* atshed: pointer to ATS sound header structure
* ats_sound: pointer to ATS sound structure
* sparams: pointer to synthesis parameters
* timenv : time factor envelop to apply
* (f(t):[0,1]->[0,T], always increasing, applied to [sparams->beg, sparams->beg])
* selected : array of "selected" flags for partials (size = nb of partials)
* out_samps: pointer to output array of monophonic samples
* n_out_samps: pointer to nb output samples
*/
void do_atsh_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected,
double** out_samps, int* n_out_samps);
void do_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected,
double** out_samps, int* n_out_samps);
/* tracker.c */
/* tracker
* =======
* partial tracking function
* returns an ATS_SOUND with data issued from analysis
* anargs: pointer to analysis parameters
* in_samps: monophonic input samples
* s_rate: sampling rate for the samples
* n_frames: nb input samples
* res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis)
* res_len: pointer to output number of samples in residual data
*/
ATS_SOUND *tracker (ANARGS *anargs, double* in_samps, int s_rate, int n_frames,
double ***res, int* res_len);
/* utilities.c */
/* ppp2
* ====
* returns the closest power of two
* greater than num
*/
unsigned int ppp2(unsigned int num);
/* various conversion functions
* to deal with dB and dB SPL
* they take and return double doubles
*/
double amp2db(double amp);
double db2amp(double db);
double amp2db_spl(double amp);
double db2amp_spl(double db_spl);
/* init_sound
* ==========
* initializes a new sound allocating memory (WARNING: sound pointer must be allocated first)
*/
void init_sound(ATS_SOUND *sound, int sampling_rate, int frame_size, int window_size, int frames,
double duration, int partials, int with_noise);
/* copy_sound
* ==========
* make a full copy of an existing sound and return a pointer to it
*/
ATS_SOUND *copy_sound(ATS_SOUND *sound);
/* optimize_sound
* ==============
* optimizes an ATS_SOUND in memory before saving
* anargs: pointer to analysis parameters
* sound: pointer to ATS_SOUND structure
*/
void optimize_sound(ANARGS *anargs, ATS_SOUND *sound);
/* set_av
* ======
* sets the av structure slot of an ATS_SOUND,
* it computes the average ampl., freq. and SMR for each partial
* sound: pointer to ATS_SOUND structure
*/
void set_av(ATS_SOUND *sound);
/* increase_max_partials
* =====================
* increases by a given offset the max supported number of partials
* by reallocating and copying associated partials data arrays
* (WARNING: sound must have been initialized first)
*/
void increase_max_partials(ATS_SOUND *sound, unsigned int offset);
/* free_sound
* ==========
* frees sound's memory (WARNING: sound pointer is not deallocated)
*/
void free_sound(ATS_SOUND *sound);
/* add_partial
* ============
* creates a new partial that is the copy of the one with given source index if valid ;
* invalid source indexes (<0 or >=nb partials) give a new partial with null values
* (reallocates partials data arrays as necessary, by an offset of ATSA_INCROFFSET,
* recomputes the partial average data and maintains the partial sorting by increasing freq.)
*/
void add_partial(ATS_SOUND *sound, int src_index);
/* remove_partials
* ===============
* removes the partials of given indexes
* (recompute the partial average data and maintain the partial sorting by increasing freq).
*/
void remove_partials(ATS_SOUND *sound, int* rem_indexes, int nb_rem_indexes);
/* residual.c */
/* compute_residual
* ================
* Computes the difference between the synthesis and the original sound.
* the <win-samps> array contains the sample numbers in the input file corresponding to each frame
* fil: pointer to analyzed data
* fil_len: length of data in samples
* output_file: output file path
* sound: pointer to ATS_SOUND
* win_samps: pointer to array of analysis windows center times
* file_sampling_rate: sampling rate of analysis file
* res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis)
* res_len: pointer to output number of samples in residual data
*/
void compute_residual(double *fil, int fil_len, ATS_SOUND *sound, int *win_samps, int file_sampling_rate, double*** res, int* res_len);
/* residual-analysis.c */
/* residual_analysis
* =================
* performs the critical-band analysis of the residual file
* res: residual samples (channel 0 = residual, channel 1 = partials synthesis)
* res_len: number of residual samples per channel
* s_rate: sampling rate for the samples
* sound: sound to store the residual data
*/
void residual_analysis(double** res, int res_len, int s_rate, ATS_SOUND *sound);
/* band_energy_to_res
* ==================
* transfers residual engergy from bands to partials
* sound: sound structure containing data
* frame: frame number
*/
void band_energy_to_res(ATS_SOUND *sound, int frame);
/* res_to_band_energy
* ==================
* transfers residual engergy from partials to bands
* sound: sound structure containing data
* frame: frame number
*/
void res_to_band_energy(ATS_SOUND *sound, int frame);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* ATSA_H */