soundeditor/atsa/atsa.h

/* atsa.h
 * atsa: ATS analysis implementation (with re-synthesis tools)
 * Oscar Pablo Di Liscia / Pete Moss / Juan Pampin
 */

#ifndef ATSA_H
#define ATSA_H

#ifdef __cplusplus
extern "C" {
#endif  /* __cplusplus */

/*  window types */
#define  BLACKMAN   0
#define  BLACKMAN_H 1
#define  HANNING    2
#define  VONHANN    3

/* analysis parameters */

/* start time */
#define  ATSA_START 0.0

/* duration */
#define  ATSA_DUR 0.0

/* lowest frequency (hertz)  */
#define  ATSA_LFREQ 20.0

/* highest frequency (hertz) */
#define  ATSA_HFREQ 20000.0

/* frequency deviation (ratio) */
#define  ATSA_FREQDEV 0.1

/* number of f0 cycles in window */
#define  ATSA_WCYCLES 4

/* window type */
#define  ATSA_WTYPE BLACKMAN_H

/* window size */
#define  ATSA_WSIZE 1024

/* hop size proportional to window size (ratio) */
#define  ATSA_HSIZE 0.25

/* lowest magnitude for peaks (amp) */
#define  ATSA_LMAG  -60.0

/* length of analysis tracks (frames) */
#define  ATSA_TRKLEN 3

/* minimum short partial length (frames) */
#define  ATSA_MSEGLEN 3

/* minimum short partial SMR average (dB SPL) */
#define  ATSA_MSEGSMR 60.0

/* minimum gap length (frames) */
#define  ATSA_MGAPLEN 3

/* threshold for partial SMR average (dB SPL) */
#define  ATSA_SMRTHRES 30.0

/* last peak contribution for tracking (ratio) */
#define  ATSA_LPKCONT 0.0

/* SMR contribution for tracking (ratio) */
#define  ATSA_SMRCONT 0.0

/* minimum number of frames for analysis (frames) */
#define ATSA_MFRAMES 4

/* offset used when increasing the max supported number of partials in ATS_SOUND */
#define ATSA_INCROFFSET 10

/* default analysis file type
 * 1 =only amp. and freq.
 * 2 =amp., freq. and phase
 * 3 =amp., freq. and noise
 * 4 =amp., freq., phase, and noise
 */
#define ATSA_TYPE 4

/* macros to check sound contents from file type */
#define FILE_HAS_PHASE(typ) ((typ)==2. || (typ)==4.)
#define FILE_HAS_NOISE(typ) ((typ)==3. || (typ)==4.)

/* constants and macros */
#define  PI         3.141592653589793
#define  TWOPI      6.283185307179586
#define  NIL        -1
#define  AMP_DB(amp) (amp!=0.0 ? (double)log10(amp*20.0) : (double)-32767.0)
#define  DB_AMP(db) ((double)pow(10.0, db/20.0))
#define  ATSA_MAX_DB_SPL 100.0
#define  ATSA_NOISE_THRESHOLD -120
#define  ATSA_CRITICAL_BANDS 25
#define  ATSA_NOISE_VARIANCE 0.04

/* array of critical band frequency edges based on data from:
 * Zwicker, Fastl (1990) "Psychoacoustics Facts and Models",
 * Berlin ; New York : Springer-Verlag
 */
extern double ATSA_CRITICAL_BAND_EDGES[ATSA_CRITICAL_BANDS+1];

/* data structures */

/* ANARGS
 * ======
 * analysis parameters
 */
typedef struct {
  double start;
  double duration;
  double lowest_freq;
  double highest_freq;
  double freq_dev;
  int win_cycles;
  int win_type;
  int win_size;
  double hop_size;
  double lowest_mag;
  int track_len;
  int min_seg_len;
  int min_gap_len;
  double last_peak_cont;
  double SMR_cont;
  double SMR_thres;
  double min_seg_SMR;
  /* parameters computed from command line */
  int first_smp;
  int cycle_smp;
  int hop_smp;
  int total_samps;
  int srate;
  int fft_size;
  double fft_mag;
  int lowest_bin;
  int highest_bin;
  int frames;
  int type;
} ANARGS;

/* ATS_FFT
 * fft data
 */
typedef struct {
  int size;
  int rate;
  double *fdr;
  double *fdi;
} ATS_FFT;

/* ATS_PEAK
 * ========
 * spectral peak data
 */
typedef struct {
  double amp;
  double frq;
  double pha;
  double smr;
  int track;
} ATS_PEAK;

/* ATS_FRAME
 * =========
 * analysis frame data
 */
typedef struct {
  ATS_PEAK *peaks;
  int n_peaks;
  double time;
} ATS_FRAME;

/* ATS_HEADER
 * ==========
 * ats file header data
 */
#define ATS_MAGIC_NUMBER ((double)123.0)

typedef struct {

  /* Magic Number for ID of file, must be 123.00 */
  double mag;

  /* sampling rate */
  double sr;

  /* Frame size (samples) */
  double fs;

  /* Window size (samples) */
  double ws;

  /* number of partials per frame */
  double par;

  /* number of frames present */
  double fra;

  /* max. amplitude */
  double ma;

  /* max. frequency */
  double mf;

  /* duration (secs) */
  double dur;

  /* type (1,2 3 or 4)
   * 1 =only amp. and freq.
   * 2 =amp., freq. and phase
   * 3 =amp., freq. and noise
   * 4 =amp., freq., phase, and noise
   */
  double typ;

} ATS_HEADER;

/* ATS_SOUND
 * =========
 * ATS analysis data
 */
typedef struct {

  /* global sound info */
  int srate;        /* Sampling rate (Hz) */
  int frame_size;   /* Number of samples per frame */
  int window_size;  /* Number of samples per window */
  int partials;     /* Number of detected partials (<= max_partials) */
  int max_partials; /* Max supported number of partials */
  int frames;       /* Number of frames */
  double dur;       /* Sound duration (s) */

  /* info deduced from analysis */
  int optimized; /* Number of partials removed by optimization*/
  double ampmax; /* Maximum partial amplitude */
  double frqmax; /* Maximum partial frequency (Hz) */
  ATS_PEAK *av;  /* Peak averages; partials are sorted by increasing frequency */

  /* sinusoidal data (all of these ** are accessed as [partial][frame]) */
  double **time; /* Start time for each frame, only partial 0 used ; don't read/write others */
  double **frq;  /* Frequency (Hz) */
  double **amp;  /* Amplitude */
  double **pha;  /* Phase (radian ?) */
  double **smr;  /* Signal to Mask Ratio */

  /* noise data */
  double **res; /* Noise energy for each [partial][frame] */
  double **band_energy; /* Noise energy for each [critical band][frame] */

} ATS_SOUND;


/* SPARAMS
 * =========
 * From ATS synthesis data (was originaly in atsh)
 */
typedef struct { //parameters for resynthesis

  double amp;         // Deterministic amplitude scalar
  double ramp;        // Residual amplitude scalar
  double frec;        // Global frequency scalar
  double max_stretch; // Max time scalar
  double beg;         // Begin synthesis time
  double end;         // End synthesis time
  double sr;          // Sampling Rate
  short allorsel;     // when TRUE, use only selected partials ; when FALSE use all
  short upha;         // when TRUE, the phase information (if any) is used ; when FALSE is not
} SPARAMS;

/* CURVE
 * =========
 * A curve (was originaly in atsh)
 */
typedef struct
{
  /* control points: */
  int num_ctlpoints;         /* number of control points */
  double (*ctlpoint)[2];     /* array of control points (0=x, 1=y) */
} CURVE;


/* Interface:
 * ==========
 * grouped by file in alphabetical order
 */

/* critical-bands.c */

/* evaluate_smr
 * ============
 * evaluates the masking curves of an analysis frame
 * peaks: pointer to an array of peaks
 * peaks_size: number of peaks
 */
void evaluate_smr (ATS_PEAK *peaks, int peaks_size);

/* curve.c */
void curve_init(CURVE *curve);
void curve_free(CURVE *curve);
int   get_nbp(CURVE *curve);
double get_x_value(CURVE *curve, int i_pt);
double get_y_value(CURVE *curve, int i_pt);
double get_minx_value(CURVE *curve);
double get_maxx_value(CURVE *curve);
double get_miny_value(CURVE *curve);
double get_maxy_value(CURVE *curve);

/* other-utils.c */

/* window_norm
 * ===========
 * computes the norm of a window
 * returns the norm value
 * win: pointer to a window
 * size: window size
 */
double window_norm(double *win, int size);

/* make_window
 * ===========
 * makes an analysis window, returns a pointer to it.
 * win_type: window type, available types are:
 * BLACKMAN, BLACKMAN_H, HAMMING and VONHANN
 * win_size: window size
 */
double *make_window(int win_type, int win_size);

/* push_peak
 * =========
 * pushes a peak into an array of peaks
 * re-allocating memory and updating its size
 * returns a pointer to the array of peaks.
 * new_peak: pointer to new peak to push into the array
 * peaks_list: list of peaks
 * peaks_size: pointer to the current size of the array.
 */
ATS_PEAK *push_peak(ATS_PEAK *new_peak, ATS_PEAK *peaks, int *peaks_size);

/* peak_frq_inc
 * ============
 * function used by qsort to sort an array of peaks
 * in increasing frequency order.
 */
int peak_frq_inc(void const *a, void const *b);

/* peak_smr_dec
 * ============
 * function used by qsort to sort an array of peaks
 * in decreasing SMR order.
 */
int peak_smr_dec(void const *a, void const *b);


/* fft
 * ===
 * standard fft based on simplfft by Joerg Arndt.
 * rl: pointer to real part data
 * im: pointer to imaginary part data
 * n: size of data
 * is: 1=forward trasnform -1=backward transform
 */
void fft(double *rl, double *im, int n, int is);

/* peak-detection.c */

/* peak_detection
 * ==============
 * detects peaks in a ATS_FFT block
 * returns an array of detected peaks.
 * ats_fft: pointer to ATS_FFT structure
 * lowest_bin: lowest fft bin to start detection
 * highest_bin: highest fft bin to end detection
 * lowest_mag: lowest magnitude to detect peaks
 * norm: analysis window norm
 * peaks_size: pointer to size of the returned peaks array
 */
ATS_PEAK *peak_detection(ATS_FFT *ats_fft, int lowest_bin, int highest_bin, double lowest_mag, double norm, int *peaks_size);

/* peak-tracking.c */

/* peak_tracking
 * =============
 * connects peaks from one analysis frame to tracks
 * returns a pointer to the analysis frame.
 * tracks: pointer to the tracks
 * tracks_size: numeber of tracks
 * peaks: peaks to connect
 * peaks_size: number of peaks
 * frq_dev: frequency deviation from tracks
 * SMR_cont: contribution of SMR to tracking
 * n_partials: pointer to the number of partials before tracking
 */
ATS_FRAME *peak_tracking(ATS_PEAK *tracks, int tracks_size, ATS_PEAK *peaks, int *peaks_size, double frq_dev, double SMR_cont, int *n_partials);

/* update_tracks
 * =============
 * updates analysis tracks
 * returns a pointer to the tracks.
 * tracks: pointer to the tracks
 * tracks_size: numeber of tracks
 * track_len: length of tracks
 * frame_n: analysis frame number
 * ana_frames: pointer to previous analysis frames
 * last_peak_cont: contribution of last peak to the track
 */
ATS_PEAK *update_tracks (ATS_PEAK *tracks, int *tracks_size, int track_len, int frame_n, ATS_FRAME *ana_frames, double last_peak_cont);

/* save-load-sound.c */

/* ats_save
 * ========
 * saves an optimized ATS_SOUND to disk.
 * sound: pointer to ATS_SOUND structure
 * outfile: pointer to output ats file
 * SMR_thres: partials with average SMR under this are not written
 * type: file type
 * NOTE: sound MUST be optimized using optimize_sound before calling this function
 * Returns the total file size (nb of written bytes), or -1 in case of any error
 */
int ats_save(ATS_SOUND *sound, const char *outfile, double SMR_thres, int type);

/* ats_load
 * ========
 * loads an ATS_SOUND from disk (whatever little/big endian byte order).
 * sound: pointer to ATS_SOUND structure
 * infile: pointer to input ats file
 * Returns the total file size in bytes = number of bytes read (or <0 value upon error)
 */
int ats_load(ATS_SOUND *sound, const char *infile, int* sound_type);

/* synthesis.c (was originaly in atsh, synth-funcs.c) */

/* do_synthesis
 * =======
 * re-synthesis of an ATS_SOUND
 * atshed: pointer to ATS sound header structure
 * ats_sound: pointer to ATS sound structure
 * sparams: pointer to synthesis parameters
 * timenv : time factor envelop to apply
 *          (f(t):[0,1]->[0,T], always increasing, applied to [sparams->beg, sparams->beg])
 * selected : array of "selected" flags for partials (size = nb of partials)
 * out_samps: pointer to output array of monophonic samples
 * n_out_samps: pointer to nb output samples
 */
void do_atsh_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected,
                       double** out_samps, int* n_out_samps);
void do_synthesis(ATS_SOUND *ats_sound, SPARAMS* sparams, CURVE* timenv, int *selected,
                  double** out_samps, int* n_out_samps);

/* tracker.c */

/* tracker
 * =======
 * partial tracking function
 * returns an ATS_SOUND with data issued from analysis
 * anargs: pointer to analysis parameters
 * in_samps: monophonic input samples
 * s_rate: sampling rate for the samples
 * n_frames: nb input samples
 * res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis)
 * res_len: pointer to output number of samples in residual data
 */
ATS_SOUND *tracker (ANARGS *anargs, double* in_samps, int s_rate, int n_frames,
		    double ***res, int* res_len);

/* utilities.c */

/* ppp2
 * ====
 * returns the closest power of two
 * greater than num
 */
unsigned int ppp2(unsigned int num);

/* various conversion functions
 * to deal with dB and dB SPL
 * they take and return double doubles
 */
double amp2db(double amp);
double db2amp(double db);
double amp2db_spl(double amp);
double db2amp_spl(double db_spl);

/* init_sound
 * ==========
 * initializes a new sound allocating memory (WARNING: sound pointer must be allocated first)
 */
void init_sound(ATS_SOUND *sound, int sampling_rate, int frame_size, int window_size, int frames,
		double duration, int partials, int with_noise);

/* copy_sound
 * ==========
 * make a full copy of an existing sound and return a pointer to it
 */
ATS_SOUND *copy_sound(ATS_SOUND *sound);

/* optimize_sound
 * ==============
 * optimizes an ATS_SOUND in memory before saving
 * anargs: pointer to analysis parameters
 * sound: pointer to ATS_SOUND structure
 */
void optimize_sound(ANARGS *anargs, ATS_SOUND *sound);

/* set_av
 * ======
 * sets the av structure slot of an ATS_SOUND,
 * it computes the average ampl., freq. and SMR for each partial
 * sound: pointer to ATS_SOUND structure
 */
void set_av(ATS_SOUND *sound);

/* increase_max_partials
 * =====================
 * increases by a given offset the max supported number of partials
 * by reallocating and copying associated partials data arrays
 * (WARNING: sound must have been initialized first)
 */
void increase_max_partials(ATS_SOUND *sound, unsigned int offset);

/* free_sound
 * ==========
 * frees sound's memory (WARNING: sound pointer is not deallocated)
 */
void free_sound(ATS_SOUND *sound);

/* add_partial
 * ============
 * creates a new partial that is the copy of the one with given source index if valid ;
 * invalid source indexes (<0 or >=nb partials) give a new partial with null values
 * (reallocates partials data arrays as necessary, by an offset of ATSA_INCROFFSET,
 *  recomputes the partial average data and maintains the partial sorting by increasing freq.)
 */
void add_partial(ATS_SOUND *sound, int src_index);

/* remove_partials
 * ===============
 * removes the partials of given indexes
 * (recompute the partial average data and maintain the partial sorting by increasing freq).
 */
void remove_partials(ATS_SOUND *sound, int* rem_indexes, int nb_rem_indexes);

/* residual.c */

/* compute_residual
 * ================
 * Computes the difference between the synthesis and the original sound.
 * the <win-samps> array contains the sample numbers in the input file corresponding to each frame
 * fil: pointer to analyzed data
 * fil_len: length of data in samples
 * output_file: output file path
 * sound: pointer to ATS_SOUND
 * win_samps: pointer to array of analysis windows center times
 * file_sampling_rate: sampling rate of analysis file
 * res: pointer to output residual data (channel 0 = residual, channel 1 = partials synthesis)
 * res_len: pointer to output number of samples in residual data
 */
void compute_residual(double *fil, int fil_len, ATS_SOUND *sound, int *win_samps, int file_sampling_rate, double*** res, int* res_len);

/* residual-analysis.c */

/* residual_analysis
 * =================
 * performs the critical-band analysis of the residual file
 * res: residual samples (channel 0 = residual, channel 1 = partials synthesis)
 * res_len: number of residual samples per channel
 * s_rate: sampling rate for the samples
 * sound: sound to store the residual data
 */
void residual_analysis(double** res, int res_len, int s_rate, ATS_SOUND *sound);

/* band_energy_to_res
 * ==================
 * transfers residual engergy from bands to partials
 * sound: sound structure containing data
 * frame: frame number
 */
void band_energy_to_res(ATS_SOUND *sound, int frame);

/* res_to_band_energy
 * ==================
 * transfers residual engergy from partials to bands
 * sound: sound structure containing data
 * frame: frame number
 */
void res_to_band_energy(ATS_SOUND *sound, int frame);

#ifdef __cplusplus
}               /* extern "C" */
#endif  /* __cplusplus */

#endif  /* ATSA_H */