blob: d76c36c9414eb0aedab66cdb1b5be55a4e92b483 [file] [log] [blame]
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "audio_prep/mfcc.h"
#include "pw_unit_test/framework.h"
static MfccParams golden_params = {.num_frames = 3,
.num_mel_bins = 10,
.audio_samp_rate = 16000,
.low_edge_hz = 125,
.upper_edge_hz = 7600,
.win_len_sec = 0.000625,
.hop_len_sec = 0.0003125,
.log_floor = 0.01,
.log_scaler = 20};
static int16_t golden_input[] = {7954, 10085, 8733, 10844, 29949,
-549, 20833, 30345, 18086, 11375,
-27309, 12323, -22891, -23360, 11958};
static uint8_t golden_output[] = {0, 0, 191, 187, 174, 186, 179, 175, 179, 173,
0, 0, 209, 205, 181, 193, 182, 199, 209, 216,
0, 0, 198, 194, 192, 204, 196, 201, 212, 223};
TEST(MfccTest, AgreesWithGoldenValues) {
// set parameters
set_mfcc_params(&golden_params);
int out_len = golden_params.num_frames * golden_params.num_mel_bins;
uint8_t* out = reinterpret_cast<uint8_t*>(malloc(out_len * sizeof(uint8_t)));
// extract MFCC
extract_mfcc(golden_input, out, sizeof(golden_input) / sizeof(int16_t));
int kTolerance = 0;
#ifdef MFCC_WITH_RVV
kTolerance = 1;
#endif
for (int i = 0; i < out_len; i++) {
int diff = out[i] - golden_output[i];
if (diff < 0) diff = -diff;
ASSERT_LE(diff, kTolerance);
}
free(out);
}
TEST(MfccTest, DcInputSaneResult) {
MfccParams params = {.num_frames = 10,
.num_mel_bins = 64,
.audio_samp_rate = 16000,
.low_edge_hz = 125,
.upper_edge_hz = 7500,
.win_len_sec = 0.032,
.hop_len_sec = 0.016,
.log_floor = 0.01,
.log_scaler = 20};
// set parameters
set_mfcc_params(&params);
int hop_len = static_cast<int>(params.audio_samp_rate * params.hop_len_sec);
int in_len = hop_len * params.num_frames;
int16_t* in = reinterpret_cast<int16_t*>(
malloc(params.num_frames * hop_len * sizeof(int16_t)));
int out_len = params.num_frames * params.num_mel_bins;
uint8_t* out = reinterpret_cast<uint8_t*>(malloc(out_len * sizeof(uint8_t)));
// DC Input
memset(in, 255, in_len * sizeof(int16_t));
// extract MFCC
extract_mfcc(in, out, in_len);
// ignore the 1st frame due to pre zero-padding
// expect zero outputs
for (int i = params.num_mel_bins; i < out_len; i++) {
ASSERT_EQ(out[i], 0);
}
free(in);
free(out);
}
TEST(MfccTest, NyquistFreqInputSaneResult) {
MfccParams params = {.num_frames = 15,
.num_mel_bins = 32,
.audio_samp_rate = 16000,
.low_edge_hz = 125,
.upper_edge_hz = 7500,
.win_len_sec = 0.016,
.hop_len_sec = 0.008,
.log_floor = 0.01,
.log_scaler = 20};
// set parameters
set_mfcc_params(&params);
int hop_len = static_cast<int>(params.audio_samp_rate * params.hop_len_sec);
int in_len = hop_len * params.num_frames;
int16_t* in = reinterpret_cast<int16_t*>(
malloc(params.num_frames * hop_len * sizeof(int16_t)));
int out_len = params.num_frames * params.num_mel_bins;
uint8_t* out = reinterpret_cast<uint8_t*>(malloc(out_len * sizeof(uint8_t)));
// High (Nyquest) frequency Input
memset(in, 255, in_len * sizeof(int16_t));
for (int i = 1; i < in_len; i += 2) in[i] *= -1;
// extract MFCC
extract_mfcc(in, out, in_len);
// ignore the 1st frame due to pre zero-padding
// expect zero outputs
for (int i = params.num_mel_bins; i < out_len; i++) {
ASSERT_EQ(out[i], 0);
}
free(in);
free(out);
}