// ATSSound class implementation // // C++ interface to ATS_SOUND C structure and functions from atsa // // QATSH Copyright 2009 Jean-Philippe MEURET #include #include #include #include #include #include "mathexpr.h" #include "ATSSound.h" // Constructors / Destructor =============================================== ATSSound::ATSSound(ATS_SOUND* pSoundData) : _pSoundData(pSoundData), _eSoundType((ESoundTypeId)ATSA_TYPE), _nFileSize(-1), _bNeedFree(false) { if (_pSoundData) { // We'll need to free _pSoundData internals in destructor. _bNeedFree = true; // Backward compatibility : ATSH didn't save it. updateResidualMaxEnergy(); } } ATSSound::~ATSSound() { if ( _pSoundData) { if (_bNeedFree) // free_sound(_pSoundData); free(_pSoundData); } } // Static accessors ======================================================== bool ATSSound::soundHasPhase(ATSSound::ESoundTypeId eType) { return (eType == eSoundTypePartialsAmplFreqPhase || eType == eSoundTypePartialsAmplFreqPhaseNoise); } bool ATSSound::soundHasNoise(ATSSound::ESoundTypeId eType) { return (eType == eSoundTypePartialsAmplFreqNoise || eType == eSoundTypePartialsAmplFreqPhaseNoise); } // Properties get/setters ================================================== double ATSSound::samplingRate() const { return _pSoundData->srate; } //void ATSSound::setSamplingRate(double dSamplingRate) //{ //} int ATSSound::frameSize() const { return _pSoundData->frame_size; } //void ATSSound::setFrameSize(int nbSamples) //{ //} int ATSSound::windowSize() const { return _pSoundData->window_size; } //void ATSSound::setWindowSize(int nbSamples) //{ //} int ATSSound::nbFrames() const { return _pSoundData->frames; } //void ATSSound::setNbFrames(int nbFrames) //{ //} //void ATSSound::setMaxFrequency(double dFreq) //{ //} double ATSSound::duration() const { return _pSoundData->dur; } //void ATSSound::setDuration(double dDuration) //{ //} int ATSSound::fileSize() const { return _nFileSize; } ATSSound::ESoundTypeId ATSSound::type() const { return _eSoundType; } bool ATSSound::hasPhase() const { return soundHasPhase(_eSoundType); } bool ATSSound::hasNoise() const { return soundHasNoise(_eSoundType); } int ATSSound::nbPartialProperties() const { return 3 + (hasPhase() ? 1 : 0) + (hasNoise() ? 1 : 0); } //void ATSSound::setType(ESoundTypeId eType) //{ //} bool ATSSound::isOptimized() const { return _pSoundData->optimized != NIL; } // Partials properties =================================================== int ATSSound::nbPartials() const { return _pSoundData->partials; } double ATSSound::partialsMaxAmplitude() const { return _pSoundData->ampmax; } double ATSSound::partialsMaxFrequency() const { return _pSoundData->frqmax; } double ATSSound::partialTime(int nFrameInd) const { return _pSoundData->time[0][nFrameInd]; } double ATSSound::partialFrequency(int nPartInd, int nFrameInd) const { return _pSoundData->frq[_pSoundData->av[nPartInd].track][nFrameInd]; } double ATSSound::partialAmplitude(int nPartInd, int nFrameInd) const { return _pSoundData->amp[_pSoundData->av[nPartInd].track][nFrameInd]; } double ATSSound::partialPhase(int nPartInd, int nFrameInd) const { return _pSoundData->pha[_pSoundData->av[nPartInd].track][nFrameInd]; } double ATSSound::partialSMR(int nPartInd, int nFrameInd) const { return _pSoundData->smr[_pSoundData->av[nPartInd].track][nFrameInd]; } // Residual properties =================================================== int ATSSound::nbResidualBands() const { return ATSA_CRITICAL_BANDS; } double ATSSound::residualBandMinFrequency(int nBandInd) const { return ATSA_CRITICAL_BAND_EDGES[nBandInd]; } double ATSSound::residualBandMaxFrequency(int nBandInd) const { return ATSA_CRITICAL_BAND_EDGES[nBandInd+1]; } double ATSSound::residualBandsMaxFrequency() const { return ATSA_CRITICAL_BAND_EDGES[ATSA_CRITICAL_BANDS]; } double ATSSound::residualBandsMaxEnergy() const { return _dResBandFrameMaxEnergy; } double ATSSound::residualBandEnergy(int nBandInd, int nFrameInd) const { return _pSoundData->band_energy[nBandInd][nFrameInd]; } double ATSSound::residualBandTime(int nFrameInd) const { return _pSoundData->time[0][nFrameInd]; } // Add/Remove partials =================================================== void ATSSound::addPartial(int nSrcPartIndex) { std::cout << "ATSSound::addPartial(" << nSrcPartIndex << ")" << std::endl; add_partial(_pSoundData, nSrcPartIndex); } void ATSSound::removePartials(const int aPartIndexes[], int nParts) { std::cout << "ATSSound::removePartials("; for (int nInd = 0; nInd < nParts; nInd++) std::cout << aPartIndexes[nInd] << ','; std::cout << ')' << std::endl; remove_partials(_pSoundData, const_cast(aPartIndexes), nParts); } // ESoundFunction class ==================================================== // MathExpr EFunction-derived class for access to sound data from partial and frame index. class ESoundFunction : public EFunction { public: ESoundFunction(ATS_SOUND* pSoundData, TypesAndConstants::EPartialMagnitude eMagnitude) : EFunction(2), _eMagnitude(eMagnitude), _pSoundData(pSoundData) { } // Function evaluation function. double Val() { // Force partial index inside the right interval. int nPartInd = (int)pvars[0]; if (nPartInd < 0) nPartInd = 0; else if (nPartInd >= _pSoundData->partials) nPartInd = _pSoundData->partials - 1; // Force frame index inside the right interval. int nFrameInd = (int)pvars[1]; if (nFrameInd < 0) nFrameInd = 0; else if (nFrameInd >= _pSoundData->frames) nFrameInd = _pSoundData->frames - 1; // Get the target magnitude value. switch (_eMagnitude) { case TypesAndConstants::eAmplitude: return _pSoundData->amp[_pSoundData->av[nPartInd].track][nFrameInd]; case TypesAndConstants::eFrequency: return _pSoundData->frq[_pSoundData->av[nPartInd].track][nFrameInd]; case TypesAndConstants::ePhase: return _pSoundData->pha[_pSoundData->av[nPartInd].track][nFrameInd]; default: // N/A. return 0; } } private: TypesAndConstants::EPartialMagnitude _eMagnitude; ATS_SOUND* _pSoundData; }; // Modify partials ======================================================== std::string ATSSound::modifyPartials(const int aPartIndexes[], int nParts, const std::string& strFormula, TypesAndConstants::EPartialMagnitude eTargetMagnitude) { static const int nMaxVars = 20; RVar* aVars[nMaxVars]; int nVars = 0; // Warning: MUST never be larger that nMaxVars. // Define the Partial number constant "variable". double dPartNum = (int)nbPartials(); RVar varPartials("P", &dPartNum); aVars[nVars++] = &varPartials; // Define the Frame number constant "variable". double dFrameNum = (int)nbFrames(); RVar varFrames("T", &dFrameNum); aVars[nVars++] = &varFrames; // Define the Partial index variable. double dPartInd; RVar varPartial("p", &dPartInd); aVars[nVars++] = &varPartial; // Define the Frame index variable. double dFrameInd; RVar varFrame("t", &dFrameInd); aVars[nVars++] = &varFrame; // Define the amplitude, frequency and phase variables // (current partial and frame, then min, max and mean for the partial among all its frames). double dAmpl; RVar varAmpl("a", &dAmpl); aVars[nVars++] = &varAmpl; double dMinAmpl; RVar varMinAmpl("amin", &dMinAmpl); aVars[nVars++] = &varMinAmpl; double dMaxAmpl; RVar varMaxAmpl("amax", &dMaxAmpl); aVars[nVars++] = &varMaxAmpl; double dMeanAmpl; RVar varMeanAmpl("amean", &dMeanAmpl); aVars[nVars++] = &varMeanAmpl; double dFreq; RVar varFreq("f", &dFreq); aVars[nVars++] = &varFreq; double dMinFreq; RVar varMinFreq("fmin", &dMinFreq); aVars[nVars++] = &varMinFreq; double dMaxFreq; RVar varMaxFreq("fmax", &dMaxFreq); aVars[nVars++] = &varMaxFreq; double dMeanFreq; RVar varMeanFreq("fmean", &dMeanFreq); aVars[nVars++] = &varMeanFreq; double dPhase; RVar varPhase("h", &dPhase); aVars[nVars++] = &varPhase; double dMinPhase; RVar varMinPhase("hmin", &dMinPhase); aVars[nVars++] = &varMinPhase; double dMaxPhase; RVar varMaxPhase("hmax", &dMaxPhase); aVars[nVars++] = &varMaxPhase; double dMeanPhase; RVar varMeanPhase("hmean", &dMeanPhase); aVars[nVars++] = &varMeanPhase; // Target magnitude (value(frame), min/frames, max/frames, mean/frames). double** pTgtMagnitude = 0; double* pTgtValue = 0; double* pTgtMinValue = 0; double* pTgtMaxValue = 0; double* pTgtMeanValue = 0; switch (eTargetMagnitude) { case TypesAndConstants::eAmplitude: pTgtMagnitude = _pSoundData->amp; pTgtValue = &dAmpl; pTgtMinValue = &dMinAmpl; pTgtMaxValue = &dMaxAmpl; pTgtMeanValue = &dMeanAmpl; break; case TypesAndConstants::eFrequency: pTgtMagnitude = _pSoundData->frq; pTgtValue = &dFreq; pTgtMinValue = &dMinFreq; pTgtMaxValue = &dMaxFreq; pTgtMeanValue = &dMeanFreq; break; case TypesAndConstants::ePhase: pTgtMagnitude = _pSoundData->pha; pTgtValue = &dPhase; pTgtMinValue = &dMinPhase; pTgtMaxValue = &dMaxPhase; pTgtMeanValue = &dMeanPhase; break; default: // N/A. break; } // Define the target magnitude variable (2nd possible name). RVar varValue("v", pTgtValue); aVars[nVars++] = &varValue; // Define the target magnitude min, max, mean, ... "constant" variables (2nd possible name). RVar varMinValue("vmin", pTgtMinValue); aVars[nVars++] = &varMinValue; RVar varMaxValue("vmax", pTgtMaxValue); aVars[nVars++] = &varMaxValue; RVar varMeanValue("vmean", pTgtMeanValue); aVars[nVars++] = &varMeanValue; clock_t start_time = clock(); // Make a copy of the ATS_SOUND to process : write operations will process _pSoundData, // while read operations will process this virgin copy (necessary for some formulas). ATS_SOUND* pSourceSoundData = copy_sound(_pSoundData); // Define the amplitude, frequency and phase functions : p,t => ampl/freq/pha[p][t] static const int nMaxFuncs = 5; RFunction* aFuncs[nMaxFuncs]; int nFuncs = 0; // Warning: MUST never be larger that nMaxFuncs. ESoundFunction eFuncAmpl(pSourceSoundData, TypesAndConstants::eAmplitude); RFunction funcAmpl(eFuncAmpl); funcAmpl.SetName("A"); aFuncs[nFuncs++] = &funcAmpl; ESoundFunction eFuncFreq(pSourceSoundData, TypesAndConstants::eFrequency); RFunction funcFreq(eFuncFreq); funcFreq.SetName("F"); aFuncs[nFuncs++] = &funcFreq; ESoundFunction eFuncPhase(pSourceSoundData, TypesAndConstants::ePhase); RFunction funcPhase(eFuncPhase); funcPhase.SetName("H"); aFuncs[nFuncs++] = &funcPhase; ESoundFunction* pTgtValueEFunc = 0; switch (eTargetMagnitude) { case TypesAndConstants::eAmplitude: pTgtValueEFunc = &eFuncAmpl; break; case TypesAndConstants::eFrequency: pTgtValueEFunc = &eFuncFreq; break; case TypesAndConstants::ePhase: pTgtValueEFunc = &eFuncPhase; break; default: // N/A. break; } RFunction funcValue(*pTgtValueEFunc); funcValue.SetName("V"); aFuncs[nFuncs++] = &funcValue; // Check nFuncs and nVars (in case we didn't crash before ;-) if (nVars > nMaxVars) { std::cout << "ATSSound::modifyPartials(" << nParts << ") : Too many variables ; memory overwritten !" << std::endl; return "Error: Variables memory overwritten"; } if (nFuncs > nMaxFuncs) { std::cout << "ATSSound::modifyPartials(" << nParts << ") : Too many functions ; memory overwritten !" << std::endl; return "Error: Functions memory overwritten"; } // Define and check the formula. ROperation opFormula(strFormula.c_str(), nVars, aVars, nFuncs, aFuncs); char* pszParsedFormula = opFormula.Expr(); const std::string strParsedFormula(pszParsedFormula); delete [] pszParsedFormula; if (strParsedFormula.find("Error") != std::string::npos) { std::cout << "ATSSound::modifyPartials(" << nParts << ") : Skipping on erroneous formula " << strParsedFormula << std::endl; return strParsedFormula; } // Apply the formula. for (int nPartIndInd = 0; nPartIndInd < nParts; nPartIndInd++) { // Set the value of the Partial index variable. const int nPartInd = pSourceSoundData->av[aPartIndexes[nPartIndInd]].track; dPartInd = (double)nPartInd; // Compute amin, amax, amean (vmin set at the same time). dMinAmpl = std::numeric_limits::max(); dMaxAmpl = -dMinAmpl; dMeanAmpl = 0.0; for (int nFrameInd = 0; nFrameInd < nbFrames(); nFrameInd++) { dAmpl = pSourceSoundData->amp[nPartInd][nFrameInd]; dMeanAmpl += dAmpl; if (dMaxAmpl < dAmpl) dMaxAmpl = dAmpl; if (dMinAmpl > dAmpl) dMinAmpl = dAmpl; } dMeanAmpl /= nbFrames(); // Compute fmin, fmax, fmean (vmin set at the same time). dMinFreq = std::numeric_limits::max(); dMaxFreq = -dMinFreq; dMeanFreq = 0.0; for (int nFrameInd = 0; nFrameInd < nbFrames(); nFrameInd++) { dFreq = pSourceSoundData->frq[nPartInd][nFrameInd]; dMeanFreq += dFreq; if (dMaxFreq < dFreq) dMaxFreq = dFreq; if (dMinFreq > dFreq) dMinFreq = dFreq; } dMeanFreq /= nbFrames(); // Compute hmin, hmax, hmean (vmin set at the same time). dMinPhase = std::numeric_limits::max(); dMaxPhase = -dMinPhase; dMeanPhase = 0.0; for (int nFrameInd = 0; nFrameInd < nbFrames(); nFrameInd++) { dPhase = pSourceSoundData->pha[nPartInd][nFrameInd]; dMeanPhase += dPhase; if (dMaxPhase < dPhase) dMaxPhase = dPhase; if (dMinPhase > dPhase) dMinPhase = dPhase; } dMeanPhase /= nbFrames(); // Apply the formula to each frame. for (int nFrameInd = 0; nFrameInd < nbFrames(); nFrameInd++) { // Set the value of the Frame index variable. dFrameInd = (double)nFrameInd; // Set the value of the a, f and h variables (v also set at the same time). dAmpl = pSourceSoundData->amp[nPartInd][nFrameInd]; dFreq = pSourceSoundData->frq[nPartInd][nFrameInd]; dPhase = pSourceSoundData->pha[nPartInd][nFrameInd]; // Evaluate the formula and change the frame magnitude. pTgtMagnitude[nPartInd][nFrameInd] = opFormula.Val(); } } // Free sound copy. free_sound(pSourceSoundData); std::cout << "ATSSound::modifyPartials(" << nParts << ") : " << clock() - start_time << std::endl; // << (double)(clock() - start_time) / (double)CLOCKS_PER_SEC << " s" << std::endl; return strParsedFormula; } // .ats file input ======================================================== bool ATSSound::load(const char* pszATSFileName) { // Re-initialize sound data structure if it has already been used. if (_pSoundData) { if (_bNeedFree) { free_sound(_pSoundData); _bNeedFree = false; } } else _pSoundData = (ATS_SOUND*)malloc(sizeof(ATS_SOUND)); // Loads the .ats file. _nFileSize = ats_load(_pSoundData, pszATSFileName, (int*)&_eSoundType); // Compute the maximum of energy in residual band frames if successfully loaded. if (_nFileSize >= 0) { // We'll need to free _pSoundData internals in destructor. _bNeedFree = true; // Backward compatibility : ATSH didn't save it. updateResidualMaxEnergy(); } return _nFileSize >= 0; } // .ats file output ======================================================= bool ATSSound::store(const char* pszATSFileName) const { // TODO : Customizable partial amplitude threshold (0 for the moment). return ats_save(_pSoundData, pszATSFileName, 0.0, (int)_eSoundType) > 0; } // Compute the maximum of energy in residual band frames ================== void ATSSound::updateResidualMaxEnergy() { _dResBandFrameMaxEnergy = 0.0; for (int nBandInd = 0; nBandInd < nbResidualBands() ; nBandInd++) for (int nFrameInd = 0; nFrameInd < nbFrames() ; nFrameInd++) if (_pSoundData->band_energy[nBandInd][nFrameInd] > _dResBandFrameMaxEnergy) _dResBandFrameMaxEnergy = _pSoundData->band_energy[nBandInd][nFrameInd]; } // Direct access to the internal ATS_SOUND structure ====================== ATS_SOUND* ATSSound::data() { return _pSoundData; } // Tools ================================================================== void ATSSound::dump(const char* pszHeader, std::ostream& oStream) const { oStream << pszHeader << "ATSSound :" << std::endl; oStream << " File type : " << type() << std::endl; oStream << " File size (bytes) : " << fileSize() << std::endl; oStream << " Duration (s) : " << duration() << std::endl; oStream << " Sampling rate (Hz) : " << samplingRate() << std::endl; oStream << " Nb partials : " << nbPartials() << std::endl; oStream << " Max. part. freq. (Hz) : " << partialsMaxFrequency() << std::endl; oStream << " Max. part. ampl. (?) : " << partialsMaxAmplitude() << std::endl; oStream << " With noise : " << (hasNoise() ? "Yes" : "No") << std::endl; oStream << " With phase : " << (hasPhase() ? "Yes" : "No") << std::endl; oStream << " Nb frames : " << nbFrames() << std::endl; oStream << " Frame size (samples) : " << frameSize() << std::endl; oStream << " Window size (samples) : " << windowSize() << std::endl; oStream << " Optimized : " << (isOptimized() ? "Yes" : "No") << std::endl; }