2018-01-12 23:45:42 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
* A copy of the License is located at
|
|
|
|
*
|
|
|
|
* http://aws.amazon.com/apache2.0/
|
|
|
|
*
|
|
|
|
* or in the "license" file accompanying this file. This file is distributed
|
|
|
|
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
|
|
|
* express or implied. See the License for the specific language governing
|
|
|
|
* permissions and limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2017-05-18 05:02:48 +00:00
|
|
|
#include <chrono>
|
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
#include <thread>
|
|
|
|
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
|
|
|
|
#include <AVSCommon/SDKInterfaces/KeyWordObserverInterface.h>
|
|
|
|
#include <AVSCommon/SDKInterfaces/KeyWordDetectorStateObserverInterface.h>
|
2017-06-23 23:26:34 +00:00
|
|
|
#include <AVSCommon/AVS/AudioInputStream.h>
|
2017-05-18 05:02:48 +00:00
|
|
|
#include <AVSCommon/Utils/SDS/SharedDataStream.h>
|
|
|
|
|
|
|
|
#include "KittAi/KittAiKeyWordDetector.h"
|
|
|
|
|
|
|
|
namespace alexaClientSDK {
|
|
|
|
namespace kwd {
|
2017-06-09 23:23:31 +00:00
|
|
|
namespace test {
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
using namespace avsCommon;
|
2017-06-23 23:26:34 +00:00
|
|
|
using namespace avsCommon::avs;
|
2017-05-18 05:02:48 +00:00
|
|
|
using namespace avsCommon::sdkInterfaces;
|
2017-06-23 23:26:34 +00:00
|
|
|
using namespace avsCommon::utils;
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
/// The path to the inputs folder that should be passed in via command line argument.
|
|
|
|
std::string inputsDirPath;
|
|
|
|
|
|
|
|
/// The name of the resource file required for Kitt.ai.
|
|
|
|
static const std::string RESOURCE_FILE = "/KittAiModels/common.res";
|
|
|
|
|
|
|
|
/// The name of the Alexa model file for Kitt.ai.
|
|
|
|
static const std::string MODEL_FILE = "/KittAiModels/alexa.umdl";
|
|
|
|
|
|
|
|
/// The keyword associated with alexa.umdl.
|
|
|
|
static const std::string MODEL_KEYWORD = "ALEXA";
|
|
|
|
|
|
|
|
/// The name of a test audio file.
|
|
|
|
static const std::string FOUR_ALEXAS_AUDIO_FILE = "/four_alexa.wav";
|
|
|
|
|
|
|
|
/// The name of a test audio file.
|
|
|
|
static const std::string ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE = "/alexa_stop_alexa_joke.wav";
|
|
|
|
|
|
|
|
/// The number of samples per millisecond, assuming a sample rate of 16 kHz.
|
|
|
|
static const int SAMPLES_PER_MS = 16;
|
|
|
|
|
|
|
|
/// The margin in milliseconds for testing indices of keyword detections.
|
|
|
|
static const std::chrono::milliseconds MARGIN = std::chrono::milliseconds(100);
|
|
|
|
|
|
|
|
/// The margin in samples for testing indices of keyword detections.
|
|
|
|
static const AudioInputStream::Index MARGIN_IN_SAMPLES = MARGIN.count() * SAMPLES_PER_MS;
|
|
|
|
|
|
|
|
/// The number of "Alexa" keywords in the four_alexa.wav file.
|
2017-10-02 22:59:05 +00:00
|
|
|
static const size_t NUM_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE = 4;
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
/// The approximate end indices of the four "Alexa" hotwords in the four_alexa.wav file.
|
|
|
|
std::vector<AudioInputStream::Index> END_INDICES_OF_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE = {21440, 52800, 72480, 91552};
|
|
|
|
|
|
|
|
/// The number of "Alexa" keywords in the alexa_stop_alexa_joke.wav file.
|
2017-10-02 22:59:05 +00:00
|
|
|
static const size_t NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE = 2;
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
/// The approximate end indices of the two "Alexa" hotwords in the alexa_stop_alexa_joke.wav file.
|
|
|
|
std::vector<AudioInputStream::Index> END_INDICES_OF_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE = {20960, 51312};
|
|
|
|
|
|
|
|
/// The compatible encoding for Kitt.ai.
|
2017-10-02 22:59:05 +00:00
|
|
|
static const avsCommon::utils::AudioFormat::Encoding COMPATIBLE_ENCODING =
|
|
|
|
avsCommon::utils::AudioFormat::Encoding::LPCM;
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
/// The compatible endianness for Kitt.ai.
|
2017-10-02 22:59:05 +00:00
|
|
|
static const avsCommon::utils::AudioFormat::Endianness COMPATIBLE_ENDIANNESS =
|
|
|
|
avsCommon::utils::AudioFormat::Endianness::LITTLE;
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
/// The compatible sample rate for Kitt.ai.
|
|
|
|
static const unsigned int COMPATIBLE_SAMPLE_RATE = 16000;
|
|
|
|
|
|
|
|
/// The compatible bits per sample for Kitt.ai.
|
|
|
|
static const unsigned int COMPATIBLE_SAMPLE_SIZE_IN_BITS = 16;
|
|
|
|
|
|
|
|
/// The compatible number of channels for Kitt.ai
|
|
|
|
static const unsigned int COMPATIBLE_NUM_CHANNELS = 1;
|
|
|
|
|
|
|
|
/// Timeout for expected callbacks.
|
2017-06-09 23:23:31 +00:00
|
|
|
static const auto DEFAULT_TIMEOUT = std::chrono::milliseconds(4000);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
/// The audio gain to apply to the detectors so that the expected detections occur.
|
|
|
|
static const double KITTAI_AUDIO_GAIN = 2.0;
|
|
|
|
|
|
|
|
/// Whether to tell Kitt.ai to apply front end processing. This is false since this only works on Raspberry Pi.
|
|
|
|
static const bool KITTAI_APPLY_FRONTEND_PROCESSING = false;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The sensitivity to the keyword in the model. Set to 0.6 as this is what was described as optimal on the Kitt.ai
|
|
|
|
* Github page.
|
|
|
|
*/
|
|
|
|
static const double KITTAI_SENSITIVITY = 0.6;
|
|
|
|
|
|
|
|
/// A test observer that mocks out the KeyWordObserverInterface##onKeyWordDetected() call.
|
|
|
|
class testKeyWordObserver : public KeyWordObserverInterface {
|
|
|
|
public:
|
|
|
|
/// A struct used for bookkeeping of keyword detections.
|
|
|
|
struct detectionResult {
|
|
|
|
AudioInputStream::Index endIndex;
|
|
|
|
std::string keyword;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Implementation of the KeyWordObserverInterface##onKeyWordDetected() call.
|
|
|
|
void onKeyWordDetected(
|
2017-10-02 22:59:05 +00:00
|
|
|
std::shared_ptr<AudioInputStream> stream,
|
|
|
|
std::string keyword,
|
|
|
|
AudioInputStream::Index beginIndex,
|
Version 1.7.0 of the avs-device-sdk
Changes in this update:
**Enhancements**
* `AuthDelegate` and `AuthServer.py` have been replaced by `CBLAUthDelegate`, which provides a more straightforward path to authorization.
* Added a new configuration property called [`cblAuthDelegate`](https://github.com/alexa/avs-device-sdk/blob/master/Integration/AlexaClientSDKConfig.json#L2). This object specifies parameters for `CBLAuthDelegate`.
* Added a new configuration property called [`miscDatabase`](https://github.com/alexa/avs-device-sdk/blob/master/Integration/AlexaClientSDKConfig.json#L34), which is a generic key/value database to be used by various components.
* Added a new configuration property called [`dcfDelegate`](https://github.com/alexa/avs-device-sdk/blob/master/Integration/AlexaClientSDKConfig.json#L17) This object specifies parameters for `DCFDelegate`. Within this object, values were added for the 'endpoint' and `overridenDcfPublishMessageBody`. 'endpoint' is the endpoint to connect to in order to send device capabilities. `overridenDcfPublishMessageBody`is the message that will get sent out to the Capabilities API. Note: values within the `dcfDelegate` object will only work in `DEBUG` builds.
* Added a new configuration property called [`deviceInfo`](https://github.com/alexa/avs-device-sdk/blob/master/Integration/AlexaClientSDKConfig.json#L9) which specifies device-identifying information for use by the Device Capability Framework (DCF), and for authorization (CBLAuthDelegate).
* Updated the Directive Sequencer to support wildcard directive handlers. This allows a handler for a given AVS interface to register at the namespace level, rather than specifying the names of all directives within that namespace.
* Updated the Raspberry Pi installation script to include `alsasink` in the configuration file.
* Added `audioSink` as a configuration option. This allows users to override the audio sink element used in `Gstreamer`.
* Added an interface for monitoring internet connection status: `InternetConnectionMonitorInterface.h`.
* The Alexa Communications Library (ACL) is no longer required to wait until authorization has succeeded before attempting to connect to AVS. Instead, `HTTP2Transport` handles waiting for authorization to complete.
* Added the Device Capabilities Framework (DCF) delegate. Device capabilities can now be sent for each capability interface using DCF publish messages.
* The sample app has been updated to send DCF publish messages, which will automatically occur when the sample app starts. Note: a DCF publish message must be successfully sent in order for communication with AVS to occur.
* The SDK now supports HTTP PUT messages.
* Added support for opt-arg style arguments and multiple configuration files. Now, the sample app can be invoked by either of these commands: `SampleApp <configfile> <debuglevel>` OR `SampleApp -C file1 -C file2 ... -L loglevel`.
**Bug Fixes**
* Issues [447](https://github.com/alexa/avs-device-sdk/issues/447) and [553](https://github.com/alexa/avs-device-sdk/issues/553) Fixed the `AttachmentRenderSource`'s handling of `BLOCKING` `AttachmentReaders`.
* Updated the `Logger` implementation to be more resilient to `nullptr` string inputs.
* Fixed a `TimeUtils` utility-related compile issue.
* Fixed a bug in which alerts failed to activate if the system was restarted without network connection.
* Fixed Android 64-bit build failure issue.
**Known Issues**
* The `ACL` may encounter issues if audio attachments are received but not consumed.
* `SpeechSynthesizerState` currently uses `GAINING_FOCUS` and `LOSING_FOCUS` as a workaround for handling intermediate state. These states may be removed in a future release.
* Some ERROR messages may be printed during start-up event if initialization proceeds normally and successfully.
* If an unrecoverable authorization error or an unrecoverable DCF error is encountered, the sample app may crash on shutdown.
* If a non-CBL `clientId` is included in the `deviceInfo` section of `AlexaClientSDKConfig.json`, the error will be reported as an unrecoverable authorization error, rather than a more specific error.
2018-04-18 22:17:28 +00:00
|
|
|
AudioInputStream::Index endIndex,
|
|
|
|
std::shared_ptr<const std::vector<char>> KWDMetadata) {
|
2017-05-18 05:02:48 +00:00
|
|
|
std::lock_guard<std::mutex> lock(m_mutex);
|
|
|
|
m_detectionResults.push_back({endIndex, keyword});
|
|
|
|
m_detectionOccurred.notify_one();
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Waits for the KeyWordObserverInterface##onKeyWordDetected() call N times.
|
|
|
|
*
|
|
|
|
* @param numDetectionsExpected The number of detections expected.
|
|
|
|
* @param timeout The amount of time to wait for the calls.
|
|
|
|
* @return The detection results that actually occurred.
|
|
|
|
*/
|
|
|
|
std::vector<detectionResult> waitForNDetections(
|
2017-10-02 22:59:05 +00:00
|
|
|
unsigned int numDetectionsExpected,
|
|
|
|
std::chrono::milliseconds timeout) {
|
2017-05-18 05:02:48 +00:00
|
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
2017-10-02 22:59:05 +00:00
|
|
|
m_detectionOccurred.wait_for(lock, timeout, [this, numDetectionsExpected]() {
|
2017-05-18 05:02:48 +00:00
|
|
|
return m_detectionResults.size() == numDetectionsExpected;
|
|
|
|
});
|
|
|
|
return m_detectionResults;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
/// The detection results that have occurred.
|
|
|
|
std::vector<detectionResult> m_detectionResults;
|
|
|
|
|
|
|
|
/// A lock to guard against new detections.
|
|
|
|
std::mutex m_mutex;
|
|
|
|
|
|
|
|
/// A condition variable to wait for detection calls.
|
|
|
|
std::condition_variable m_detectionOccurred;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// A test observer that mocks out the KeyWordDetectorStateObserverInterface##onStateChanged() call.
|
|
|
|
class testStateObserver : public KeyWordDetectorStateObserverInterface {
|
|
|
|
public:
|
|
|
|
/**
|
|
|
|
* Constructor.
|
|
|
|
*/
|
2017-10-02 22:59:05 +00:00
|
|
|
testStateObserver() :
|
|
|
|
m_state(KeyWordDetectorStateObserverInterface::KeyWordDetectorState::STREAM_CLOSED),
|
|
|
|
m_stateChangeOccurred{false} {
|
2017-05-18 05:02:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Implementation of the KeyWordDetectorStateObserverInterface##onStateChanged() call.
|
|
|
|
void onStateChanged(KeyWordDetectorStateObserverInterface::KeyWordDetectorState keyWordDetectorState) {
|
|
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
|
|
|
m_state = keyWordDetectorState;
|
|
|
|
m_stateChangeOccurred = true;
|
|
|
|
m_stateChanged.notify_one();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Waits for the KeyWordDetectorStateObserverInterface##onStateChanged() call.
|
|
|
|
*
|
|
|
|
* @param timeout The amount of time to wait for the call.
|
|
|
|
* @param stateChanged An output parameter that notifies the caller whether a call occurred.
|
|
|
|
* @return Returns the state of the observer.
|
|
|
|
*/
|
|
|
|
KeyWordDetectorStateObserverInterface::KeyWordDetectorState waitForStateChange(
|
2017-10-02 22:59:05 +00:00
|
|
|
std::chrono::milliseconds timeout,
|
|
|
|
bool* stateChanged) {
|
2017-05-18 05:02:48 +00:00
|
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
2017-10-02 22:59:05 +00:00
|
|
|
bool success = m_stateChanged.wait_for(lock, timeout, [this]() { return m_stateChangeOccurred; });
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
if (!success) {
|
|
|
|
*stateChanged = false;
|
|
|
|
} else {
|
|
|
|
m_stateChangeOccurred = false;
|
|
|
|
*stateChanged = true;
|
|
|
|
}
|
|
|
|
return m_state;
|
|
|
|
}
|
2017-10-02 22:59:05 +00:00
|
|
|
|
2017-05-18 05:02:48 +00:00
|
|
|
private:
|
|
|
|
/// The state of the observer.
|
|
|
|
KeyWordDetectorStateObserverInterface::KeyWordDetectorState m_state;
|
|
|
|
|
|
|
|
/// A boolean flag so that we can re-use the observer even after a callback has occurred.
|
|
|
|
bool m_stateChangeOccurred;
|
|
|
|
|
|
|
|
/// A lock to guard against state changes.
|
|
|
|
std::mutex m_mutex;
|
|
|
|
|
|
|
|
/// A condition variable to wait for state changes.
|
|
|
|
std::condition_variable m_stateChanged;
|
|
|
|
};
|
|
|
|
|
|
|
|
class KittAiKeyWordTest : public ::testing::Test {
|
2017-10-02 22:59:05 +00:00
|
|
|
protected:
|
|
|
|
std::vector<int16_t> readAudioFromFile(const std::string& fileName, bool* errorOccurred) {
|
2017-05-18 05:02:48 +00:00
|
|
|
const int RIFF_HEADER_SIZE = 44;
|
|
|
|
|
|
|
|
std::ifstream inputFile(fileName.c_str(), std::ifstream::binary);
|
|
|
|
if (!inputFile.good()) {
|
|
|
|
std::cout << "Couldn't open audio file!" << std::endl;
|
|
|
|
if (errorOccurred) {
|
|
|
|
*errorOccurred = true;
|
|
|
|
}
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
inputFile.seekg(0, std::ios::end);
|
|
|
|
int fileLengthInBytes = inputFile.tellg();
|
|
|
|
if (fileLengthInBytes <= RIFF_HEADER_SIZE) {
|
|
|
|
std::cout << "File should be larger than 44 bytes, which is the size of the RIFF header" << std::endl;
|
|
|
|
if (errorOccurred) {
|
|
|
|
*errorOccurred = true;
|
|
|
|
}
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
inputFile.seekg(RIFF_HEADER_SIZE, std::ios::beg);
|
|
|
|
|
|
|
|
int numSamples = (fileLengthInBytes - RIFF_HEADER_SIZE) / 2;
|
|
|
|
|
|
|
|
std::vector<int16_t> retVal(numSamples, 0);
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
inputFile.read((char*)&retVal[0], numSamples * 2);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
if (inputFile.gcount() != numSamples * 2) {
|
2017-05-18 05:02:48 +00:00
|
|
|
std::cout << "Error reading audio file" << std::endl;
|
|
|
|
if (errorOccurred) {
|
|
|
|
*errorOccurred = true;
|
|
|
|
}
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
inputFile.close();
|
|
|
|
if (errorOccurred) {
|
|
|
|
*errorOccurred = false;
|
|
|
|
}
|
|
|
|
return retVal;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isResultPresent(
|
2017-10-02 22:59:05 +00:00
|
|
|
std::vector<testKeyWordObserver::detectionResult>& results,
|
|
|
|
AudioInputStream::Index expectedEndIndex,
|
|
|
|
const std::string& expectedKeyword) {
|
2017-05-18 05:02:48 +00:00
|
|
|
AudioInputStream::Index highBound = expectedEndIndex + MARGIN_IN_SAMPLES;
|
|
|
|
AudioInputStream::Index lowBound = expectedEndIndex - MARGIN_IN_SAMPLES;
|
|
|
|
for (auto result : results) {
|
|
|
|
if (result.endIndex <= highBound && result.endIndex >= lowBound && expectedKeyword == result.keyword) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::shared_ptr<testKeyWordObserver> keyWordObserver1;
|
|
|
|
|
|
|
|
std::shared_ptr<testKeyWordObserver> keyWordObserver2;
|
|
|
|
|
|
|
|
std::shared_ptr<testStateObserver> stateObserver;
|
|
|
|
|
|
|
|
AudioFormat compatibleAudioFormat;
|
|
|
|
|
|
|
|
KittAiKeyWordDetector::KittAiConfiguration config;
|
|
|
|
|
|
|
|
virtual void SetUp() {
|
|
|
|
keyWordObserver1 = std::make_shared<testKeyWordObserver>();
|
|
|
|
keyWordObserver2 = std::make_shared<testKeyWordObserver>();
|
|
|
|
stateObserver = std::make_shared<testStateObserver>();
|
|
|
|
|
|
|
|
compatibleAudioFormat.sampleRateHz = COMPATIBLE_SAMPLE_RATE;
|
|
|
|
compatibleAudioFormat.sampleSizeInBits = COMPATIBLE_SAMPLE_SIZE_IN_BITS;
|
|
|
|
compatibleAudioFormat.numChannels = COMPATIBLE_NUM_CHANNELS;
|
|
|
|
compatibleAudioFormat.endianness = COMPATIBLE_ENDIANNESS;
|
|
|
|
compatibleAudioFormat.encoding = COMPATIBLE_ENCODING;
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::ifstream filePresent((inputsDirPath + MODEL_FILE).c_str());
|
|
|
|
ASSERT_TRUE(filePresent.good()) << "Unable to find " + inputsDirPath + MODEL_FILE
|
|
|
|
<< ". Please place model file within this location.";
|
2017-05-18 05:02:48 +00:00
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::ifstream filePresent2((inputsDirPath + RESOURCE_FILE).c_str());
|
|
|
|
ASSERT_TRUE(filePresent2.good()) << "Unable to find " + inputsDirPath + RESOURCE_FILE
|
|
|
|
<< ". Please place model file within this location.";
|
2017-05-18 05:02:48 +00:00
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
config = {inputsDirPath + MODEL_FILE, MODEL_KEYWORD, KITTAI_SENSITIVITY};
|
2017-05-18 05:02:48 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Tests that we don't get back a valid detector if an invalid stream is passed in.
|
|
|
|
TEST_F(KittAiKeyWordTest, invalidStream) {
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
nullptr,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
{keyWordObserver1},
|
|
|
|
std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
KITTAI_AUDIO_GAIN,
|
|
|
|
KITTAI_APPLY_FRONTEND_PROCESSING);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_FALSE(detector);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tests that we don't get back a valid detector if an invalid endianness is passed in.
|
|
|
|
TEST_F(KittAiKeyWordTest, incompatibleEndianness) {
|
2017-06-23 23:26:34 +00:00
|
|
|
auto rawBuffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(500000);
|
|
|
|
auto uniqueSds = avsCommon::avs::AudioInputStream::create(rawBuffer, 2, 1);
|
2017-05-18 05:02:48 +00:00
|
|
|
std::shared_ptr<AudioInputStream> sds = std::move(uniqueSds);
|
|
|
|
|
|
|
|
compatibleAudioFormat.endianness = AudioFormat::Endianness::BIG;
|
|
|
|
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
sds,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
{keyWordObserver1},
|
|
|
|
std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
1.0,
|
2017-05-18 05:02:48 +00:00
|
|
|
false);
|
|
|
|
ASSERT_FALSE(detector);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tests that we get back the expected number of keywords for the four_alexa.wav file for one keyword observer.
|
|
|
|
TEST_F(KittAiKeyWordTest, getExpectedNumberOfDetectionsInFourAlexasAudioFileForOneObserver) {
|
2017-06-23 23:26:34 +00:00
|
|
|
auto fourAlexasBuffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(500000);
|
|
|
|
auto fourAlexasSds = avsCommon::avs::AudioInputStream::create(fourAlexasBuffer, 2, 1);
|
2017-05-18 05:02:48 +00:00
|
|
|
std::shared_ptr<AudioInputStream> fourAlexasAudioBuffer = std::move(fourAlexasSds);
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::unique_ptr<AudioInputStream::Writer> fourAlexasAudioBufferWriter =
|
|
|
|
fourAlexasAudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
std::string audioFilePath = inputsDirPath + FOUR_ALEXAS_AUDIO_FILE;
|
|
|
|
bool error;
|
|
|
|
std::vector<int16_t> audioData = readAudioFromFile(audioFilePath, &error);
|
|
|
|
ASSERT_FALSE(error);
|
|
|
|
|
|
|
|
fourAlexasAudioBufferWriter->write(audioData.data(), audioData.size());
|
|
|
|
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
fourAlexasAudioBuffer,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
{keyWordObserver1},
|
|
|
|
std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
KITTAI_AUDIO_GAIN,
|
2017-05-18 05:02:48 +00:00
|
|
|
KITTAI_APPLY_FRONTEND_PROCESSING);
|
|
|
|
ASSERT_TRUE(detector);
|
2017-10-02 22:59:05 +00:00
|
|
|
auto detections =
|
|
|
|
keyWordObserver1->waitForNDetections(END_INDICES_OF_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE.size(), DEFAULT_TIMEOUT);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_EQ(detections.size(), NUM_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE);
|
|
|
|
|
|
|
|
for (auto index : END_INDICES_OF_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE) {
|
|
|
|
ASSERT_TRUE(isResultPresent(detections, index, MODEL_KEYWORD));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tests that we get back the expected number of keywords for the four_alexa.wav file for two keyword observers.
|
|
|
|
TEST_F(KittAiKeyWordTest, getExpectedNumberOfDetectionsInFourAlexasAudioFileForTwoObservers) {
|
2017-06-23 23:26:34 +00:00
|
|
|
auto fourAlexasBuffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(500000);
|
|
|
|
auto fourAlexasSds = avsCommon::avs::AudioInputStream::create(fourAlexasBuffer, 2, 1);
|
2017-05-18 05:02:48 +00:00
|
|
|
std::shared_ptr<AudioInputStream> fourAlexasAudioBuffer = std::move(fourAlexasSds);
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::unique_ptr<AudioInputStream::Writer> fourAlexasAudioBufferWriter =
|
|
|
|
fourAlexasAudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
std::string audioFilePath = inputsDirPath + FOUR_ALEXAS_AUDIO_FILE;
|
|
|
|
bool error;
|
|
|
|
std::vector<int16_t> audioData = readAudioFromFile(audioFilePath, &error);
|
|
|
|
ASSERT_FALSE(error);
|
|
|
|
|
|
|
|
fourAlexasAudioBufferWriter->write(audioData.data(), audioData.size());
|
|
|
|
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
fourAlexasAudioBuffer,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
{keyWordObserver1, keyWordObserver2},
|
|
|
|
std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
KITTAI_AUDIO_GAIN,
|
2017-05-18 05:02:48 +00:00
|
|
|
KITTAI_APPLY_FRONTEND_PROCESSING);
|
|
|
|
ASSERT_TRUE(detector);
|
2017-10-02 22:59:05 +00:00
|
|
|
auto detections = keyWordObserver1->waitForNDetections(NUM_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE, DEFAULT_TIMEOUT);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_EQ(detections.size(), NUM_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE);
|
|
|
|
|
|
|
|
for (auto index : END_INDICES_OF_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE) {
|
|
|
|
ASSERT_TRUE(isResultPresent(detections, index, MODEL_KEYWORD));
|
|
|
|
}
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
detections = keyWordObserver2->waitForNDetections(NUM_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE, DEFAULT_TIMEOUT);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_EQ(detections.size(), NUM_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE);
|
|
|
|
|
|
|
|
for (auto index : END_INDICES_OF_ALEXAS_IN_FOUR_ALEXAS_AUDIO_FILE) {
|
|
|
|
ASSERT_TRUE(isResultPresent(detections, index, MODEL_KEYWORD));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2017-10-02 22:59:05 +00:00
|
|
|
* Tests that we get back the expected number of keywords for the alexa_stop_alexa_joke.wav file for one keyword
|
2017-05-18 05:02:48 +00:00
|
|
|
* observer.
|
|
|
|
*/
|
|
|
|
TEST_F(KittAiKeyWordTest, getExpectedNumberOfDetectionsInAlexaStopAlexaJokeAudioFileForOneObserver) {
|
2017-06-23 23:26:34 +00:00
|
|
|
auto alexaStopAlexaJokeBuffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(500000);
|
|
|
|
auto alexaStopAlexaJokeSds = avsCommon::avs::AudioInputStream::create(alexaStopAlexaJokeBuffer, 2, 1);
|
2017-05-18 05:02:48 +00:00
|
|
|
std::shared_ptr<AudioInputStream> alexaStopAlexaJokeAudioBuffer = std::move(alexaStopAlexaJokeSds);
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::unique_ptr<AudioInputStream::Writer> alexaStopAlexaJokeAudioBufferWriter =
|
|
|
|
alexaStopAlexaJokeAudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
std::string audioFilePath = inputsDirPath + ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE;
|
|
|
|
bool error;
|
|
|
|
std::vector<int16_t> audioData = readAudioFromFile(audioFilePath, &error);
|
|
|
|
ASSERT_FALSE(error);
|
|
|
|
|
|
|
|
alexaStopAlexaJokeAudioBufferWriter->write(audioData.data(), audioData.size());
|
|
|
|
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
alexaStopAlexaJokeAudioBuffer,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
{keyWordObserver1},
|
|
|
|
std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
KITTAI_AUDIO_GAIN,
|
|
|
|
KITTAI_APPLY_FRONTEND_PROCESSING);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_TRUE(detector);
|
2017-10-02 22:59:05 +00:00
|
|
|
auto detections =
|
|
|
|
keyWordObserver1->waitForNDetections(NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE, DEFAULT_TIMEOUT);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_EQ(detections.size(), NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE);
|
|
|
|
|
|
|
|
for (auto index : END_INDICES_OF_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE) {
|
|
|
|
ASSERT_TRUE(isResultPresent(detections, index, MODEL_KEYWORD));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2017-10-02 22:59:05 +00:00
|
|
|
* Tests that we get back the expected number of keywords for the alexa_stop_alexa_joke.wav file for two keyword
|
2017-05-18 05:02:48 +00:00
|
|
|
* observer.
|
|
|
|
*/
|
|
|
|
TEST_F(KittAiKeyWordTest, getExpectedNumberOfDetectionsInAlexaStopAlexaJokeAudioFileForTwoObservers) {
|
2017-06-23 23:26:34 +00:00
|
|
|
auto alexaStopAlexaJokeBuffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(500000);
|
|
|
|
auto alexaStopAlexaJokeSds = avsCommon::avs::AudioInputStream::create(alexaStopAlexaJokeBuffer, 2, 1);
|
2017-05-18 05:02:48 +00:00
|
|
|
std::shared_ptr<AudioInputStream> alexaStopAlexaJokeAudioBuffer = std::move(alexaStopAlexaJokeSds);
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::unique_ptr<AudioInputStream::Writer> alexaStopAlexaJokeAudioBufferWriter =
|
|
|
|
alexaStopAlexaJokeAudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
std::string audioFilePath = inputsDirPath + ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE;
|
|
|
|
bool error;
|
|
|
|
std::vector<int16_t> audioData = readAudioFromFile(audioFilePath, &error);
|
|
|
|
ASSERT_FALSE(error);
|
|
|
|
|
|
|
|
alexaStopAlexaJokeAudioBufferWriter->write(audioData.data(), audioData.size());
|
|
|
|
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
alexaStopAlexaJokeAudioBuffer,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
{keyWordObserver1, keyWordObserver2},
|
|
|
|
std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
KITTAI_AUDIO_GAIN,
|
|
|
|
KITTAI_APPLY_FRONTEND_PROCESSING);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_TRUE(detector);
|
2017-10-02 22:59:05 +00:00
|
|
|
auto detections =
|
|
|
|
keyWordObserver1->waitForNDetections(NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE, DEFAULT_TIMEOUT);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_EQ(detections.size(), NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE);
|
|
|
|
|
|
|
|
for (auto index : END_INDICES_OF_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE) {
|
|
|
|
ASSERT_TRUE(isResultPresent(detections, index, MODEL_KEYWORD));
|
|
|
|
}
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
detections = keyWordObserver2->waitForNDetections(NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE, DEFAULT_TIMEOUT);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_EQ(detections.size(), NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE);
|
|
|
|
|
|
|
|
for (auto index : END_INDICES_OF_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE) {
|
|
|
|
ASSERT_TRUE(isResultPresent(detections, index, MODEL_KEYWORD));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tests that the detector state changes to ACTIVE when the detector is initialized properly.
|
|
|
|
TEST_F(KittAiKeyWordTest, getActiveState) {
|
2017-06-23 23:26:34 +00:00
|
|
|
auto alexaStopAlexaJokeBuffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(500000);
|
|
|
|
auto alexaStopAlexaJokeSds = avsCommon::avs::AudioInputStream::create(alexaStopAlexaJokeBuffer, 2, 1);
|
2017-05-18 05:02:48 +00:00
|
|
|
std::shared_ptr<AudioInputStream> alexaStopAlexaJokeAudioBuffer = std::move(alexaStopAlexaJokeSds);
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::unique_ptr<AudioInputStream::Writer> alexaStopAlexaJokeAudioBufferWriter =
|
|
|
|
alexaStopAlexaJokeAudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
std::string audioFilePath = inputsDirPath + ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE;
|
|
|
|
bool error;
|
|
|
|
std::vector<int16_t> audioData = readAudioFromFile(audioFilePath, &error);
|
|
|
|
ASSERT_FALSE(error);
|
|
|
|
|
|
|
|
alexaStopAlexaJokeAudioBufferWriter->write(audioData.data(), audioData.size());
|
|
|
|
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
alexaStopAlexaJokeAudioBuffer,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
std::unordered_set<std::shared_ptr<KeyWordObserverInterface>>(),
|
|
|
|
{stateObserver},
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
KITTAI_AUDIO_GAIN,
|
|
|
|
false);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_TRUE(detector);
|
|
|
|
bool stateChanged = false;
|
2017-10-02 22:59:05 +00:00
|
|
|
KeyWordDetectorStateObserverInterface::KeyWordDetectorState stateReceived =
|
|
|
|
stateObserver->waitForStateChange(DEFAULT_TIMEOUT, &stateChanged);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_TRUE(stateChanged);
|
|
|
|
ASSERT_EQ(stateReceived, KeyWordDetectorStateObserverInterface::KeyWordDetectorState::ACTIVE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Tests that the stream is closed and that the detector state changes to STREAM_CLOSED when we close the only writer
|
|
|
|
* of the SDS passed in and all keyword detections have occurred.
|
|
|
|
*/
|
|
|
|
TEST_F(KittAiKeyWordTest, getStreamClosedState) {
|
2017-06-23 23:26:34 +00:00
|
|
|
auto alexaStopAlexaJokeBuffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(500000);
|
|
|
|
auto alexaStopAlexaJokeSds = avsCommon::avs::AudioInputStream::create(alexaStopAlexaJokeBuffer, 2, 1);
|
2017-05-18 05:02:48 +00:00
|
|
|
std::shared_ptr<AudioInputStream> alexaStopAlexaJokeAudioBuffer = std::move(alexaStopAlexaJokeSds);
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
std::unique_ptr<AudioInputStream::Writer> alexaStopAlexaJokeAudioBufferWriter =
|
|
|
|
alexaStopAlexaJokeAudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE);
|
2017-05-18 05:02:48 +00:00
|
|
|
|
|
|
|
std::string audioFilePath = inputsDirPath + ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE;
|
|
|
|
bool error;
|
|
|
|
std::vector<int16_t> audioData = readAudioFromFile(audioFilePath, &error);
|
|
|
|
ASSERT_FALSE(error);
|
|
|
|
|
|
|
|
alexaStopAlexaJokeAudioBufferWriter->write(audioData.data(), audioData.size());
|
|
|
|
|
|
|
|
auto detector = KittAiKeyWordDetector::create(
|
2017-10-02 22:59:05 +00:00
|
|
|
alexaStopAlexaJokeAudioBuffer,
|
|
|
|
compatibleAudioFormat,
|
|
|
|
{keyWordObserver1},
|
|
|
|
{stateObserver},
|
|
|
|
inputsDirPath + RESOURCE_FILE,
|
|
|
|
{config},
|
|
|
|
KITTAI_AUDIO_GAIN,
|
|
|
|
false);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_TRUE(detector);
|
|
|
|
|
|
|
|
// so that when we close the writer, we know for sure that the reader will be closed
|
2017-10-02 22:59:05 +00:00
|
|
|
auto detections =
|
|
|
|
keyWordObserver1->waitForNDetections(NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE, DEFAULT_TIMEOUT);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_EQ(detections.size(), NUM_ALEXAS_IN_ALEXA_STOP_ALEXA_JOKE_AUDIO_FILE);
|
|
|
|
|
|
|
|
bool stateChanged = false;
|
2017-10-02 22:59:05 +00:00
|
|
|
KeyWordDetectorStateObserverInterface::KeyWordDetectorState stateReceived =
|
|
|
|
stateObserver->waitForStateChange(DEFAULT_TIMEOUT, &stateChanged);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_TRUE(stateChanged);
|
|
|
|
ASSERT_EQ(stateReceived, KeyWordDetectorStateObserverInterface::KeyWordDetectorState::ACTIVE);
|
|
|
|
|
|
|
|
alexaStopAlexaJokeAudioBufferWriter->close();
|
|
|
|
stateChanged = false;
|
2017-10-02 22:59:05 +00:00
|
|
|
stateReceived = stateObserver->waitForStateChange(DEFAULT_TIMEOUT, &stateChanged);
|
2017-05-18 05:02:48 +00:00
|
|
|
ASSERT_TRUE(stateChanged);
|
|
|
|
ASSERT_EQ(stateReceived, KeyWordDetectorStateObserverInterface::KeyWordDetectorState::STREAM_CLOSED);
|
|
|
|
}
|
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
} // namespace test
|
|
|
|
} // namespace kwd
|
|
|
|
} // namespace alexaClientSDK
|
2017-05-18 05:02:48 +00:00
|
|
|
|
2017-10-02 22:59:05 +00:00
|
|
|
int main(int argc, char** argv) {
|
2017-05-18 05:02:48 +00:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
if (argc < 2) {
|
2017-12-09 00:07:37 +00:00
|
|
|
std::cerr << "USAGE: " << std::string(argv[0]) << " <path_to_inputs_folder>" << std::endl;
|
2017-05-18 05:02:48 +00:00
|
|
|
return 1;
|
|
|
|
} else {
|
2017-06-09 23:23:31 +00:00
|
|
|
alexaClientSDK::kwd::test::inputsDirPath = std::string(argv[1]);
|
2017-05-18 05:02:48 +00:00
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|
2017-06-23 23:26:34 +00:00
|
|
|
}
|