230 lines
6.9 KiB
C
230 lines
6.9 KiB
C
/* residual-analysis.c
|
|
* atsa: ATS analysis implementation
|
|
* Oscar Pablo Di Liscia / Pete Moss / Juan Pampin
|
|
*/
|
|
|
|
#include "atsa.h"
|
|
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#define ATSA_RES_MIN_FFT_SIZE 4096
|
|
#define ATSA_RES_PAD_FACTOR 2
|
|
#define MAG_SQUARED(re, im, norm) (norm * (re*re+im*im))
|
|
|
|
/* private function prototypes */
|
|
int residual_get_N(int M, int min_fft_size, int factor);
|
|
void residual_get_bands(double fft_mag, double *true_bands, int *limits, int bands);
|
|
double residual_compute_time_domain_energy(ATS_FFT *fft_struct);
|
|
double residual_get_band_energy(int lo, int hi, ATS_FFT *fft_struct, double norm);
|
|
void residual_compute_band_energy(ATS_FFT *fft_struct, int *band_limits, int bands, double *band_energy, double norm);
|
|
|
|
int residual_get_N(int M, int min_fft_size, int factor)
|
|
{
|
|
int def_size;
|
|
def_size = factor * M;
|
|
while( def_size < min_fft_size ){
|
|
def_size = ppp2(def_size+1);
|
|
}
|
|
return(def_size);
|
|
}
|
|
|
|
void residual_get_bands(double fft_mag, double *true_bands, int *limits, int bands)
|
|
{
|
|
int k;
|
|
for(k = 0 ; k < bands ; k++){
|
|
limits[k] = (int)floor(true_bands[k] / fft_mag);
|
|
}
|
|
}
|
|
|
|
|
|
double residual_compute_time_domain_energy(ATS_FFT *fft_struct)
|
|
{
|
|
// Parseval's Theorem states:
|
|
// N-1 N-1
|
|
// sum(|x(n)^2|) = 1/N* sum (|X(k)|^2)
|
|
// n=0 k=0
|
|
// then we multiply the time domain energy by 1/2
|
|
// because we only compute frequency energy between
|
|
// 0 Hz and Nyquist only (0 -> N/2)
|
|
int n;
|
|
double sum=0.0;
|
|
for(n = 0 ; n < fft_struct->size ; n++){
|
|
sum += fabs( fft_struct->fdr[n] * fft_struct->fdr[n] );
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
double residual_get_band_energy(int lo, int hi, ATS_FFT *fft_struct, double norm)
|
|
{
|
|
// does 1/N * sum(re^2+im^2) within a band around <center>
|
|
// from <lo> lower bin to <hi> upper bin in <fft-struct>
|
|
int k;
|
|
double sum = 0.0;
|
|
if(lo<0)
|
|
lo = 0;
|
|
if(hi> floor(fft_struct->size * 0.5))
|
|
hi = (int)floor(fft_struct->size * 0.5);
|
|
for(k = lo ; k < hi ; k++)
|
|
sum += MAG_SQUARED( fft_struct->fdr[k], fft_struct->fdi[k], norm);
|
|
return sum/fft_struct->size;
|
|
}
|
|
|
|
void residual_compute_band_energy(ATS_FFT *fft_struct, int *band_limits, int bands, double *band_energy, double norm)
|
|
{
|
|
// loop trough bands and evaluate energy
|
|
// we compute energy of one band as:
|
|
// (N-1)/2
|
|
// 1/N * sum(|X(k)|^2)
|
|
// k=0
|
|
// N=fft size, K=bins in band
|
|
int b;
|
|
for(b = 0 ; b<bands-1 ; b++){
|
|
band_energy[b] = residual_get_band_energy(band_limits[b], band_limits[b+1], fft_struct, norm);
|
|
}
|
|
}
|
|
|
|
/* residual_analysis
|
|
* =================
|
|
* performs the critical-band analysis of the residual file
|
|
* res: residual samples (channel 0 = residual, channel 1 = partials synthesis)
|
|
* res_len: number of residual samples per channel
|
|
* s_rate: sampling rate for the samples
|
|
* sound: sound to store the residual data
|
|
*/
|
|
void residual_analysis(double** res, int res_len, int s_rate, ATS_SOUND *sound)
|
|
{
|
|
int file_sampling_rate, hop, M, N, frames, *band_limits;
|
|
int M_2, st_pt, filptr, i, frame_n, k;
|
|
double norm=1.0, threshold, fft_mag, **band_arr, *band_energy;
|
|
//int smp=0;
|
|
//double time_domain_energy=0.0, freq_domain_energy=0.0, sum=0.0;
|
|
//double e_ratio=1.0;
|
|
double *edges = ATSA_CRITICAL_BAND_EDGES;
|
|
ATS_FFT fft_struct;
|
|
file_sampling_rate = s_rate;
|
|
hop = sound->frame_size;
|
|
M = sound->window_size;
|
|
N = residual_get_N(M, ATSA_RES_MIN_FFT_SIZE, ATSA_RES_PAD_FACTOR);
|
|
fft_struct.size = N;
|
|
fft_struct.rate = file_sampling_rate;
|
|
fft_struct.fdr = (double *)malloc(N * sizeof(double));
|
|
fft_struct.fdi = (double *)malloc(N * sizeof(double));
|
|
threshold = AMP_DB(ATSA_NOISE_THRESHOLD);
|
|
frames = sound->frames;
|
|
fft_mag = (double)file_sampling_rate / (double)N;
|
|
band_limits = (int *)malloc(sizeof(int)*(ATSA_CRITICAL_BANDS+1));
|
|
residual_get_bands(fft_mag, edges, band_limits, ATSA_CRITICAL_BANDS+1);
|
|
band_arr = (double **)malloc(ATSA_CRITICAL_BANDS*sizeof(double *));
|
|
for(i =0 ; i<ATSA_CRITICAL_BANDS ; i++){
|
|
band_arr[i] = (double *)malloc(frames*sizeof(double));
|
|
}
|
|
band_energy = (double *)malloc(ATSA_CRITICAL_BANDS*sizeof(double));
|
|
M_2 = (int)floor( ((double)M - 1) * 0.5 );
|
|
st_pt = N - M_2;
|
|
filptr = M_2 * -1;
|
|
|
|
fprintf(stderr, "Analyzing residual ...\n");
|
|
|
|
for(frame_n = 0 ; frame_n < frames ; frame_n++){
|
|
for(i = 0 ; i < N ; i++){
|
|
fft_struct.fdr[i] = 0.0;
|
|
fft_struct.fdi[i] = 0.0;
|
|
}
|
|
for(k = 0 ; k < M ; k++){
|
|
if (filptr >= 0 && filptr < res_len)
|
|
fft_struct.fdr[(k+st_pt)%N] = res[0][filptr];
|
|
filptr++;
|
|
}
|
|
//smp = filptr - M_2 - 1;
|
|
//time_domain_energy = residual_compute_time_domain_energy(&fft_struct);
|
|
fft(fft_struct.fdr, fft_struct.fdi, fft_struct.size, 1);
|
|
residual_compute_band_energy(&fft_struct, band_limits, ATSA_CRITICAL_BANDS+1, band_energy, norm);
|
|
//sum = 0.0;
|
|
//for(k = 0; k < ATSA_CRITICAL_BANDS; k++){
|
|
// sum += band_energy[k];
|
|
//}
|
|
//freq_domain_energy = 2.0 * sum;
|
|
//e_ratio = (freq_domain_energy > 0.0) ? (time_domain_energy / freq_domain_energy) : 1.0;
|
|
//fprintf(stderr, "[FDE: %f TDE: %f e_ratio: %f]\n", freq_domain_energy, time_domain_energy, e_ratio);
|
|
for(k = 0; k < ATSA_CRITICAL_BANDS; k++){
|
|
if( band_energy[k] < threshold) {
|
|
band_arr[k][frame_n] = 0.0;
|
|
} else {
|
|
band_arr[k][frame_n] = band_energy[k];
|
|
}
|
|
}
|
|
filptr = filptr - M + hop;
|
|
}
|
|
// save data in sound
|
|
sound->band_energy = band_arr;
|
|
free(fft_struct.fdr);
|
|
free(fft_struct.fdi);
|
|
free(band_energy);
|
|
free(band_limits);
|
|
}
|
|
|
|
/* band_energy_to_res
|
|
* ==================
|
|
* transfers residual engergy from bands to partials
|
|
* sound: sound structure containing data
|
|
* frame: frame number
|
|
*/
|
|
void band_energy_to_res(ATS_SOUND *sound, int frame)
|
|
{
|
|
int j, k, par, first_par, last_par=-1;
|
|
double sum;
|
|
double *edges = ATSA_CRITICAL_BAND_EDGES;
|
|
par = 0;
|
|
/* find partials by band */
|
|
for(j=0 ; j<ATSA_CRITICAL_BANDS ; j++){
|
|
first_par = par;
|
|
sum = 0.0;
|
|
while( par < sound->partials &&
|
|
(sound->band_energy[j][frame] > 0.0) &&
|
|
(sound->frq[par][frame] >= edges[j]) &&
|
|
(sound->frq[par][frame] < edges[j+1]))
|
|
{
|
|
sum += sound->amp[par][frame];
|
|
last_par = par;
|
|
par++;
|
|
}
|
|
if( sum > 0.0 ){
|
|
/* transfer band energy to partials */
|
|
for(k=first_par ; k<last_par+1; k++){
|
|
if(k >= sound->partials) {
|
|
break;
|
|
}
|
|
sound->res[k][frame] = sound->amp[k][frame] * sound->band_energy[j][frame] / sum;
|
|
// sound->band_energy[j][frame] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* res_to_band_energy
|
|
* ==================
|
|
* transfers residual engergy from partials to bands
|
|
* sound: sound structure containing data
|
|
* frame: frame number
|
|
*/
|
|
void res_to_band_energy(ATS_SOUND *sound, int frame)
|
|
{
|
|
int j, par;
|
|
double sum;
|
|
double *edges = ATSA_CRITICAL_BAND_EDGES;
|
|
par = 0;
|
|
for(j=0 ; j<ATSA_CRITICAL_BANDS ; j++){
|
|
sum = 0.0;
|
|
while(sound->frq[par][frame] >= edges[j] &&
|
|
sound->frq[par][frame] < edges[j+1])
|
|
{
|
|
sum += sound->res[par][frame];
|
|
par++;
|
|
}
|
|
sound->band_energy[j][frame] = sum;
|
|
}
|
|
}
|