Lun Dong | a43730e | 2022-06-22 21:52:14 -0700 | [diff] [blame] | 1 | // Copyright 2022 Google LLC |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
Cindy Liu | 3c4d627 | 2022-08-04 18:54:12 -0700 | [diff] [blame] | 15 | #include "audio_prep/mfcc.h" |
| 16 | |
Lun Dong | a43730e | 2022-06-22 21:52:14 -0700 | [diff] [blame] | 17 | #include "pw_unit_test/framework.h" |
Lun Dong | a43730e | 2022-06-22 21:52:14 -0700 | [diff] [blame] | 18 | |
| 19 | static MfccParams golden_params = {.num_frames = 3, |
| 20 | .num_mel_bins = 10, |
| 21 | .audio_samp_rate = 16000, |
| 22 | .low_edge_hz = 125, |
| 23 | .upper_edge_hz = 7600, |
| 24 | .win_len_sec = 0.000625, |
| 25 | .hop_len_sec = 0.0003125, |
| 26 | .log_floor = 0.01, |
| 27 | .log_scaler = 20}; |
| 28 | static int16_t golden_input[] = {7954, 10085, 8733, 10844, 29949, |
| 29 | -549, 20833, 30345, 18086, 11375, |
| 30 | -27309, 12323, -22891, -23360, 11958}; |
| 31 | static uint8_t golden_output[] = {0, 0, 191, 187, 174, 186, 179, 175, 179, 173, |
| 32 | 0, 0, 209, 205, 181, 193, 182, 199, 209, 216, |
| 33 | 0, 0, 198, 194, 192, 204, 196, 201, 212, 223}; |
| 34 | |
Lun Dong | a43730e | 2022-06-22 21:52:14 -0700 | [diff] [blame] | 35 | TEST(MfccTest, AgreesWithGoldenValues) { |
| 36 | // set parameters |
| 37 | set_mfcc_params(&golden_params); |
| 38 | int out_len = golden_params.num_frames * golden_params.num_mel_bins; |
| 39 | uint8_t* out = reinterpret_cast<uint8_t*>(malloc(out_len * sizeof(uint8_t))); |
| 40 | // extract MFCC |
| 41 | extract_mfcc(golden_input, out, sizeof(golden_input) / sizeof(int16_t)); |
| 42 | |
Lun Dong | c4839b3 | 2022-07-28 17:02:09 -0700 | [diff] [blame] | 43 | int kTolerance = 0; |
| 44 | #ifdef MFCC_WITH_RVV |
| 45 | kTolerance = 1; |
| 46 | #endif |
Lun Dong | a43730e | 2022-06-22 21:52:14 -0700 | [diff] [blame] | 47 | for (int i = 0; i < out_len; i++) { |
Lun Dong | c4839b3 | 2022-07-28 17:02:09 -0700 | [diff] [blame] | 48 | int diff = out[i] - golden_output[i]; |
| 49 | if (diff < 0) diff = -diff; |
| 50 | ASSERT_LE(diff, kTolerance); |
Lun Dong | a43730e | 2022-06-22 21:52:14 -0700 | [diff] [blame] | 51 | } |
| 52 | free(out); |
| 53 | } |
| 54 | |
| 55 | TEST(MfccTest, DcInputSaneResult) { |
| 56 | MfccParams params = {.num_frames = 10, |
| 57 | .num_mel_bins = 64, |
| 58 | .audio_samp_rate = 16000, |
| 59 | .low_edge_hz = 125, |
| 60 | .upper_edge_hz = 7500, |
| 61 | .win_len_sec = 0.032, |
| 62 | .hop_len_sec = 0.016, |
| 63 | .log_floor = 0.01, |
| 64 | .log_scaler = 20}; |
| 65 | |
| 66 | // set parameters |
| 67 | set_mfcc_params(¶ms); |
| 68 | |
| 69 | int hop_len = static_cast<int>(params.audio_samp_rate * params.hop_len_sec); |
| 70 | int in_len = hop_len * params.num_frames; |
| 71 | int16_t* in = reinterpret_cast<int16_t*>( |
| 72 | malloc(params.num_frames * hop_len * sizeof(int16_t))); |
| 73 | int out_len = params.num_frames * params.num_mel_bins; |
| 74 | uint8_t* out = reinterpret_cast<uint8_t*>(malloc(out_len * sizeof(uint8_t))); |
| 75 | |
| 76 | // DC Input |
| 77 | memset(in, 255, in_len * sizeof(int16_t)); |
| 78 | // extract MFCC |
| 79 | extract_mfcc(in, out, in_len); |
| 80 | // ignore the 1st frame due to pre zero-padding |
| 81 | // expect zero outputs |
| 82 | for (int i = params.num_mel_bins; i < out_len; i++) { |
| 83 | ASSERT_EQ(out[i], 0); |
| 84 | } |
| 85 | |
| 86 | free(in); |
| 87 | free(out); |
| 88 | } |
| 89 | |
| 90 | TEST(MfccTest, NyquistFreqInputSaneResult) { |
| 91 | MfccParams params = {.num_frames = 15, |
| 92 | .num_mel_bins = 32, |
| 93 | .audio_samp_rate = 16000, |
| 94 | .low_edge_hz = 125, |
| 95 | .upper_edge_hz = 7500, |
| 96 | .win_len_sec = 0.016, |
| 97 | .hop_len_sec = 0.008, |
| 98 | .log_floor = 0.01, |
| 99 | .log_scaler = 20}; |
| 100 | |
| 101 | // set parameters |
| 102 | set_mfcc_params(¶ms); |
| 103 | |
| 104 | int hop_len = static_cast<int>(params.audio_samp_rate * params.hop_len_sec); |
| 105 | int in_len = hop_len * params.num_frames; |
| 106 | int16_t* in = reinterpret_cast<int16_t*>( |
| 107 | malloc(params.num_frames * hop_len * sizeof(int16_t))); |
| 108 | int out_len = params.num_frames * params.num_mel_bins; |
| 109 | uint8_t* out = reinterpret_cast<uint8_t*>(malloc(out_len * sizeof(uint8_t))); |
| 110 | |
| 111 | // High (Nyquest) frequency Input |
| 112 | memset(in, 255, in_len * sizeof(int16_t)); |
| 113 | for (int i = 1; i < in_len; i += 2) in[i] *= -1; |
| 114 | // extract MFCC |
| 115 | extract_mfcc(in, out, in_len); |
| 116 | // ignore the 1st frame due to pre zero-padding |
| 117 | // expect zero outputs |
| 118 | for (int i = params.num_mel_bins; i < out_len; i++) { |
| 119 | ASSERT_EQ(out[i], 0); |
| 120 | } |
| 121 | |
| 122 | free(in); |
| 123 | free(out); |
| 124 | } |