/* * Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ /// @file AudioInputProcessorIntegrationTest.cpp #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "Integration/ACLTestContext.h" #include "Integration/ObservableMessageRequest.h" #include "Integration/AipStateObserver.h" #include "Integration/TestMessageSender.h" #include "Integration/TestDirectiveHandler.h" #include "Integration/TestExceptionEncounteredSender.h" #include "System/UserInactivityMonitor.h" // If the tests are created with both Kittai and Sensory, Kittai is chosen. #ifdef KWD_KITTAI #include "KittAi/KittAiKeyWordDetector.h" #elif KWD_SENSORY #include "Sensory/SensoryKeywordDetector.h" #endif namespace alexaClientSDK { namespace integration { namespace test { using namespace alexaClientSDK::acl; using namespace alexaClientSDK::adsl; using namespace alexaClientSDK::avsCommon; using namespace alexaClientSDK::avsCommon::avs; using namespace alexaClientSDK::avsCommon::utils; using namespace alexaClientSDK::avsCommon::avs::attachment; using namespace alexaClientSDK::avsCommon::sdkInterfaces; using namespace capabilityAgents::aip; using namespace capabilityAgents::system; using namespace sdkInterfaces; using namespace avsCommon::utils::sds; using namespace avsCommon::utils::json; using namespace afml; using namespace contextManager; // This is a 16 bit 16 kHz little endian linear PCM audio file of "Tell me a Joke" to be recognized. static const std::string JOKE_AUDIO_FILE = "/recognize_joke_test.wav"; // This is a 16 bit 16 kHz little endian linear PCM audio file of "Wikipedia" to be recognized. static const std::string WIKI_AUDIO_FILE = "/recognize_wiki_test.wav"; // This is a 16 bit 16 kHz little endian linear PCM audio file of "Lions" to be recognized. static const std::string LIONS_AUDIO_FILE = "/recognize_lions_test.wav"; // This is a 16 bit 16 kHz little endian linear PCM audio file of silence to be recognized. static const std::string SILENCE_AUDIO_FILE = "/recognize_silence_test.wav"; // This is a 16 bit 16 kHz little endian linear PCM audio file of "Alexa, Tell me a Joke" to be recognized. static const std::string ALEXA_JOKE_AUDIO_FILE = "/alexa_recognize_joke_test.wav"; // This is a 16 bit 16 kHz little endian linear PCM audio file of "Alexa, Wikipedia" to be recognized. static const std::string ALEXA_WIKI_AUDIO_FILE = "/alexa_recognize_wiki_test.wav"; // This is a 16 bit 16 kHz little endian linear PCM audio file of "Alexa" then silence to be recognized. static const std::string ALEXA_SILENCE_AUDIO_FILE = "/alexa_recognize_silence_test.wav"; // This is a 32KHz little endian OPUS audio file with Constant Bit rate of "What time is it?" to be recognized. static const std::string TIME_AUDIO_FILE_OPUS = "/utterance_time_success.opus"; // This string to be used for Speak Directives which use the NAMESPACE_SPEECH_SYNTHESIZER namespace. static const std::string NAME_VOLUME_STATE = "VolumeState"; // This string to be used for Speak Directives which use the NAMESPACE_SPEECH_SYNTHESIZER namespace. static const std::string NAME_SPEAK = "Speak"; // This string to be used for ExpectSpeech Directives which use the NAMESPACE_SPEECH_RECOGNIZER namespace. static const std::string NAME_EXPECT_SPEECH = "ExpectSpeech"; // This string to be used for ExpectSpeechTimedOut Events which use the NAMESPACE_SPEECH_RECOGNIZER namespace. static const std::string NAME_EXPECT_SPEECH_TIMED_OUT = "ExpectSpeechTimedOut"; // This string to be used for SetMute Directives which use the NAMESPACE_SPEAKER namespace. static const std::string NAME_SET_MUTE = "SetMute"; // This string to be used for Play Directives which use the NAMESPACE_AUDIO_PLAYER namespace. static const std::string NAME_PLAY = "Play"; // This string to be used for StopCapture Directives which use the NAMESPACE_SPEECH_RECOGNIZER namespace. static const std::string NAME_STOP_CAPTURE = "StopCapture"; // This string to be used for Recognize Directives which use the NAMESPACE_SPEECH_RECOGNIZER namespace. static const std::string NAME_RECOGNIZE = "Recognize"; // This String to be used to register the SpeechRecognizer namespace to a DirectiveHandler. static const std::string NAMESPACE_SPEECH_RECOGNIZER = "SpeechRecognizer"; // This String to be used to register the SpeechSynthesizer namespace to a DirectiveHandler. static const std::string NAMESPACE_SPEECH_SYNTHESIZER = "SpeechSynthesizer"; // This String to be used to register the AudioPlayer namespace to a DirectiveHandler. static const std::string NAMESPACE_AUDIO_PLAYER = "AudioPlayer"; // This String to be used to register the Speaker namespace to a DirectiveHandler. static const std::string NAMESPACE_SPEAKER = "Speaker"; // This pair connects a Speak name and SpeechSynthesizer namespace for use in DirectiveHandler registration. static const NamespaceAndName SPEAK_PAIR = {NAMESPACE_SPEECH_SYNTHESIZER, NAME_SPEAK}; // This pair connects a ExpectSpeech name and SpeechRecognizer namespace for use in DirectiveHandler registration. static const NamespaceAndName EXPECT_SPEECH_PAIR = {NAMESPACE_SPEECH_RECOGNIZER, NAME_EXPECT_SPEECH}; // This pair connects a SetMute name and Speaker namespace for use in DirectiveHandler registration. static const NamespaceAndName SET_MUTE_PAIR = {NAMESPACE_SPEAKER, NAME_SET_MUTE}; // This pair connects a SetMute name and Speaker namespace for use in DirectiveHandler registration. static const NamespaceAndName VOLUME_STATE_PAIR = {NAMESPACE_SPEAKER, NAME_VOLUME_STATE}; // This pair connects a Play name and AudioPlayer namespace for use in DirectiveHandler registration. static const NamespaceAndName PLAY_PAIR = {NAMESPACE_AUDIO_PLAYER, NAME_PLAY}; // This pair connects a StopCapture name and SpeechRecognizer namespace for use in DirectiveHandler registration. static const NamespaceAndName STOP_CAPTURE_PAIR = {NAMESPACE_SPEECH_RECOGNIZER, NAME_STOP_CAPTURE}; /// Sample dialog activity id. static const std::string DIALOG_ACTIVITY_ID = "Dialog"; /// Sample alerts activity id. static const std::string ALARM_ACTIVITY_ID = "Alarms"; /// Sample content activity id. static const std::string CONTENT_ACTIVITY_ID = "Content"; // This Integer to be used to specify a timeout in seconds for long operations. static const std::chrono::seconds LONG_TIMEOUT_DURATION(10); // This Integer to be used when it is expected the duration will timeout but some wait time is still desired. static const std::chrono::seconds SHORT_TIMEOUT_DURATION(2); // This Integer to be used when no timeout is desired. static const std::chrono::seconds NO_TIMEOUT_DURATION(0); // The length of RIFF container format which is the header of a wav file. static const int RIFF_HEADER_SIZE = 44; /// The compatible sample rate for OPUS 32KHz. static const unsigned int COMPATIBLE_SAMPLE_RATE_OPUS_32 = 32000; #ifdef KWD_KITTAI /// The name of the resource file required for Kitt.ai. static const std::string RESOURCE_FILE = "/KittAiModels/common.res"; /// The name of the Alexa model file for Kitt.ai. static const std::string MODEL_FILE = "/KittAiModels/alexa.umdl"; /// The keyword associated with alexa.umdl. static const std::string MODEL_KEYWORD = "ALEXA"; #elif KWD_SENSORY /// The name of the resource file required for Sensory static const std::string RESOURCE_FILE = "/SensoryModels/spot-alexa-rpi-31000.snsr"; #endif /// JSON key to get the directive object of a message. static const std::string JSON_MESSAGE_DIRECTIVE_KEY = "directive"; /// JSON key to get the header object of a message. static const std::string JSON_MESSAGE_HEADER_KEY = "header"; /// JSON key to get the namespace value of a header. static const std::string JSON_MESSAGE_NAMESPACE_KEY = "namespace"; /// JSON key to get the name value of a header. static const std::string JSON_MESSAGE_NAME_KEY = "name"; /// JSON key to get the messageId value of a header. static const std::string JSON_MESSAGE_MESSAGE_ID_KEY = "messageId"; /// JSON key to get the dialogRequestId value of a header. static const std::string JSON_MESSAGE_DIALOG_REQUEST_ID_KEY = "dialogRequestId"; /// JSON key to get the payload object of a message. static const std::string JSON_MESSAGE_PAYLOAD_KEY = "payload"; /** * The sensitivity to the keyword in the model. Set to 0.6 as this is what was described as optimal on the Kitt.ai * Github page. */ #ifdef KWD_KITTAI static const double KITTAI_SENSITIVITY = 0.6; #endif /// The compatible encoding for Kitt.ai. static const avsCommon::utils::AudioFormat::Encoding COMPATIBLE_ENCODING = avsCommon::utils::AudioFormat::Encoding::LPCM; /// The compatible endianness for Kitt.ai. static const avsCommon::utils::AudioFormat::Endianness COMPATIBLE_ENDIANNESS = avsCommon::utils::AudioFormat::Endianness::LITTLE; /// The compatible sample rate for Kitt.ai. static const unsigned int COMPATIBLE_SAMPLE_RATE = 16000; /// The compatible bits per sample for Kitt.ai. static const unsigned int COMPATIBLE_SAMPLE_SIZE_IN_BITS = 16; /// The compatible number of channels for Kitt.ai static const unsigned int COMPATIBLE_NUM_CHANNELS = 1; /// String to identify log entries originating from this file. static const std::string TAG("AlexaDirectiveSequencerLibraryTest"); /// Path to the AlexaClientSDKConfig.json file (from command line arguments). static std::string g_configPath; /// Path to resources (e.g. audio files) for tests (from command line arguments). static std::string g_inputPath; /** * Create a LogEntry using this file's TAG and the specified event string. * * @param The event string for this @c LogEntry. */ #define LX(event) ::alexaClientSDK::avsCommon::utils::logger::LogEntry(TAG, event) class tapToTalkButton { public: bool startRecognizing(std::shared_ptr aip, std::shared_ptr audioProvider) { return aip->recognize(*audioProvider, Initiator::TAP).get(); } }; class holdToTalkButton { public: bool startRecognizing(std::shared_ptr aip, std::shared_ptr audioProvider) { return aip->recognize(*audioProvider, Initiator::PRESS_AND_HOLD).get(); } bool stopRecognizing(std::shared_ptr aip) { return aip->stopCapture().get(); } }; #if defined(KWD_KITTAI) || defined(KWD_SENSORY) class wakeWordTrigger : public KeyWordObserverInterface { public: wakeWordTrigger(AudioFormat compatibleAudioFormat, std::shared_ptr aip) { m_compatibleAudioFormat = compatibleAudioFormat; m_aip = aip; } void onKeyWordDetected( std::shared_ptr stream, std::string keyword, AudioInputStream::Index beginIndex, AudioInputStream::Index endIndex, std::shared_ptr> KWDMetadata = nullptr) { keyWordDetected = true; ASSERT_NE(nullptr, stream); bool alwaysReadable = true; bool canOverride = false; bool canBeOverridden = true; auto audioProvider = AudioProvider( stream, m_compatibleAudioFormat, ASRProfile::NEAR_FIELD, alwaysReadable, !canOverride, canBeOverridden); if (m_aip) { AudioInputStream::Index aipBegin = AudioInputProcessor::INVALID_INDEX; AudioInputStream::Index aipEnd = AudioInputProcessor::INVALID_INDEX; if (endIndex != KeyWordObserverInterface::UNSPECIFIED_INDEX) { if (beginIndex != KeyWordObserverInterface::UNSPECIFIED_INDEX) { // If we know where the keyword starts and ends, pass both of those along to AIP. aipBegin = beginIndex; aipEnd = endIndex; } else { // If we only know where the keyword ends, AIP should begin recording there. aipBegin = endIndex; } } // Else we don't have any indices to pass along; AIP will begin recording ASAP. #ifdef KWD_KITTAI m_aip->recognize(audioProvider, Initiator::TAP, aipBegin, aipEnd, keyword); #elif KWD_SENSORY m_aip->recognize(audioProvider, Initiator::WAKEWORD, aipBegin, aipEnd, keyword); #endif } } bool keyWordDetected = false; AudioFormat m_compatibleAudioFormat; std::shared_ptr m_aip; }; #endif class testStateProvider : public StateProviderInterface , public RequiresShutdown { public: testStateProvider(std::shared_ptr contextManager) : RequiresShutdown("testStateProvider") { m_contextManager = contextManager; } ~testStateProvider() { } void provideState(const NamespaceAndName& nsname, const unsigned int stateRequestToken) override { std::ostringstream context; context << R"({)" R"("volume":)" << 50 << R"(,)" << R"("muted":)" << false << R"(})"; m_contextManager->setState( VOLUME_STATE_PAIR, context.str(), avsCommon::avs::StateRefreshPolicy::ALWAYS, stateRequestToken); m_stateRequested = true; } bool checkStateRequested() { bool savedResult = false; if (m_stateRequested) { savedResult = true; m_stateRequested = false; } return savedResult; } protected: void doShutdown() override { m_contextManager.reset(); } private: bool m_stateRequested = false; std::shared_ptr m_contextManager; }; /// A test observer that mocks out the ChannelObserverInterface##onFocusChanged() call. class TestClient : public ChannelObserverInterface { public: /** * Constructor. */ TestClient() : m_focusState(FocusState::NONE) { } /** * Implementation of the ChannelObserverInterface##onFocusChanged() callback. * * @param focusState The new focus state of the Channel observer. */ void onFocusChanged(FocusState focusState) override { std::unique_lock lock(m_mutex); m_queue.push_back(focusState); m_focusState = focusState; m_wakeTrigger.notify_all(); } /** * Waits for the ChannelObserverInterface##onFocusChanged() callback. * * @param timeout The amount of time to wait for the callback. * @param focusChanged An output parameter that notifies the caller whether a callback occurred. * @return Returns @c true if the callback occured within the timeout period and @c false otherwise. */ FocusState waitForFocusChange(std::chrono::milliseconds timeout) { FocusState ret; std::unique_lock lock(m_mutex); if (!m_wakeTrigger.wait_for(lock, timeout, [this]() { return !m_queue.empty(); })) { ret = FocusState::NONE; return ret; } ret = m_queue.front(); m_queue.pop_front(); return ret; } private: /// The focus state of the observer. FocusState m_focusState; /// A lock to guard against focus state changes. std::mutex m_mutex; /// Trigger to wake up waitForNext calls. std::condition_variable m_wakeTrigger; /// Queue of received directives that have not been waited on. std::deque m_queue; }; class AudioInputProcessorTest : public ::testing::Test { protected: void SetUp() override { m_context = ACLTestContext::create(g_configPath); ASSERT_TRUE(m_context); m_exceptionEncounteredSender = std::make_shared(); DirectiveHandlerConfiguration handlerConfig; handlerConfig[SET_MUTE_PAIR] = BlockingPolicy::NON_BLOCKING; handlerConfig[SPEAK_PAIR] = BlockingPolicy::BLOCKING; m_directiveHandler = std::make_shared(handlerConfig); m_directiveSequencer = DirectiveSequencer::create(m_exceptionEncounteredSender); ASSERT_NE(nullptr, m_directiveSequencer); m_messageInterpreter = std::make_shared( m_exceptionEncounteredSender, m_directiveSequencer, m_context->getAttachmentManager()); m_compatibleAudioFormat.sampleRateHz = COMPATIBLE_SAMPLE_RATE; m_compatibleAudioFormat.sampleSizeInBits = COMPATIBLE_SAMPLE_SIZE_IN_BITS; m_compatibleAudioFormat.numChannels = COMPATIBLE_NUM_CHANNELS; m_compatibleAudioFormat.endianness = COMPATIBLE_ENDIANNESS; m_compatibleAudioFormat.encoding = COMPATIBLE_ENCODING; size_t nWords = 1024 * 1024; size_t wordSize = 2; size_t maxReaders = 3; size_t bufferSize = AudioInputStream::calculateBufferSize(nWords, wordSize, maxReaders); auto m_Buffer = std::make_shared(bufferSize); auto m_Sds = avsCommon::avs::AudioInputStream::create(m_Buffer, wordSize, maxReaders); ASSERT_NE(nullptr, m_Sds); m_AudioBuffer = std::move(m_Sds); m_AudioBufferWriter = m_AudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE); ASSERT_NE(nullptr, m_AudioBufferWriter); // Set up tap and hold to talk buttons. bool alwaysReadable = true; bool canOverride = true; bool canBeOverridden = true; m_HoldToTalkAudioProvider = std::make_shared( m_AudioBuffer, m_compatibleAudioFormat, ASRProfile::CLOSE_TALK, !alwaysReadable, canOverride, !canBeOverridden); m_TapToTalkAudioProvider = std::make_shared( m_AudioBuffer, m_compatibleAudioFormat, ASRProfile::NEAR_FIELD, alwaysReadable, canOverride, !canBeOverridden); m_tapToTalkButton = std::make_shared(); m_holdToTalkButton = std::make_shared(); m_focusManager = std::make_shared(FocusManager::DEFAULT_AUDIO_CHANNELS); m_dialogUXStateAggregator = std::make_shared(); m_stateProvider = std::make_shared(m_context->getContextManager()); ASSERT_TRUE(m_context->getContextManager()); m_context->getContextManager()->setStateProvider(VOLUME_STATE_PAIR, m_stateProvider); // Set up connection and connect m_avsConnectionManager = std::make_shared( m_context->getMessageRouter(), false, m_context->getConnectionStatusObserver(), m_messageInterpreter); ASSERT_TRUE(m_avsConnectionManager); connect(); m_userInactivityMonitor = UserInactivityMonitor::create(m_avsConnectionManager, m_exceptionEncounteredSender); m_AudioInputProcessor = AudioInputProcessor::create( m_directiveSequencer, m_avsConnectionManager, m_context->getContextManager(), m_focusManager, m_dialogUXStateAggregator, m_exceptionEncounteredSender, m_userInactivityMonitor); ASSERT_NE(nullptr, m_AudioInputProcessor); m_AudioInputProcessor->addObserver(m_dialogUXStateAggregator); m_testClient = std::make_shared(); ASSERT_TRUE(m_focusManager->acquireChannel(FocusManager::ALERTS_CHANNEL_NAME, m_testClient, ALARM_ACTIVITY_ID)); ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); m_StateObserver = std::make_shared(); ASSERT_NE(nullptr, m_StateObserver); m_AudioInputProcessor->addObserver(m_StateObserver); ASSERT_TRUE(m_directiveSequencer->addDirectiveHandler(m_AudioInputProcessor)); #if defined(KWD_KITTAI) || defined(KWD_SENSORY) m_wakeWordTrigger = std::make_shared(m_compatibleAudioFormat, m_AudioInputProcessor); #ifdef KWD_KITTAI kwd::KittAiKeyWordDetector::KittAiConfiguration config; config = {g_inputPath + MODEL_FILE, MODEL_KEYWORD, KITTAI_SENSITIVITY}; m_detector = kwd::KittAiKeyWordDetector::create( m_AudioBuffer, m_compatibleAudioFormat, {m_wakeWordTrigger}, // Not using an empty initializer list here to account for a GCC 4.9.2 regression std::unordered_set>(), g_inputPath + RESOURCE_FILE, {config}, 2.0, false); ASSERT_TRUE(m_detector); #elif KWD_SENSORY m_detector = kwd::SensoryKeywordDetector::create( m_AudioBuffer, m_compatibleAudioFormat, {m_wakeWordTrigger}, // Not using an empty initializer list here to account for a GCC 4.9.2 regression std::unordered_set>(), g_inputPath + RESOURCE_FILE); ASSERT_TRUE(m_detector); #endif #endif ASSERT_TRUE(m_directiveSequencer->addDirectiveHandler(m_directiveHandler)); ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, NO_TIMEOUT_DURATION)); } void TearDown() override { disconnect(); // Note that these nullptr checks are needed to avoid segaults if @c SetUp() failed. if (m_AudioInputProcessor) { m_AudioInputProcessor->shutdown(); } if (m_directiveSequencer) { m_directiveSequencer->shutdown(); } if (m_avsConnectionManager) { m_avsConnectionManager->shutdown(); } if (m_stateProvider) { m_stateProvider->shutdown(); } if (m_userInactivityMonitor) { m_userInactivityMonitor->shutdown(); } m_context.reset(); } /** * Connect to AVS. */ void connect() { m_avsConnectionManager->enable(); m_context->waitForConnected(); } /** * Disconnect from AVS. */ void disconnect() { if (m_avsConnectionManager) { m_avsConnectionManager->disable(); m_context->waitForDisconnected(); } } bool checkSentEventName(std::shared_ptr connectionManager, std::string expectedName) { TestMessageSender::SendParams sendParams = connectionManager->waitForNext(SHORT_TIMEOUT_DURATION); if (TestMessageSender::SendParams::Type::SEND == sendParams.type) { std::string eventString; std::string eventHeader; std::string eventName; jsonUtils::retrieveValue(sendParams.request->getJsonContent(), "event", &eventString); jsonUtils::retrieveValue(eventString, "header", &eventHeader); jsonUtils::retrieveValue(eventHeader, "name", &eventName); if (eventName == expectedName) { return true; } else { return false; } } else { return false; } } /// Context for running ACL based tests. std::unique_ptr m_context; std::shared_ptr m_avsConnectionManager; std::shared_ptr m_directiveHandler; std::shared_ptr m_exceptionEncounteredSender; std::shared_ptr m_directiveSequencer; std::shared_ptr m_messageInterpreter; std::shared_ptr m_focusManager; std::shared_ptr m_dialogUXStateAggregator; std::shared_ptr m_testClient; std::shared_ptr m_userInactivityMonitor; std::shared_ptr m_AudioInputProcessor; std::shared_ptr m_StateObserver; std::shared_ptr m_tapToTalkButton; std::shared_ptr m_holdToTalkButton; std::shared_ptr m_stateProvider; std::unique_ptr m_AudioBufferWriter; std::shared_ptr m_AudioBuffer; std::shared_ptr m_TapToTalkAudioProvider; std::shared_ptr m_HoldToTalkAudioProvider; avsCommon::utils::AudioFormat m_compatibleAudioFormat; #if defined(KWD_KITTAI) || defined(KWD_SENSORY) std::shared_ptr m_wakeWordTrigger; #ifdef KWD_KITTAI std::unique_ptr m_detector; #elif KWD_SENSORY std::unique_ptr m_detector; #endif #endif }; template std::vector readAudioFromFile(const std::string& fileName, const int& headerPosition, bool* errorOccurred) { std::ifstream inputFile(fileName.c_str(), std::ifstream::binary); if (!inputFile.good()) { std::cout << "Couldn't open audio file!" << std::endl; if (errorOccurred) { *errorOccurred = true; } return {}; } inputFile.seekg(0, std::ios::end); int fileLengthInBytes = inputFile.tellg(); if (fileLengthInBytes <= headerPosition) { std::cout << "File should be larger than header position" << std::endl; if (errorOccurred) { *errorOccurred = true; } return {}; } inputFile.seekg(headerPosition, std::ios::beg); int numSamples = (fileLengthInBytes - headerPosition) / sizeof(T); std::vector retVal(numSamples, 0); inputFile.read((char*)&retVal[0], numSamples * sizeof(T)); if (static_cast(inputFile.gcount()) != numSamples * sizeof(T)) { std::cout << "Error reading audio file" << std::endl; if (errorOccurred) { *errorOccurred = true; } return {}; } inputFile.close(); if (errorOccurred) { *errorOccurred = false; } return retVal; } /** * Test AudioInputProcessor's ability to handle a simple interation triggered by a wakeword. * * To do this, audio of "Alexa, tell me a joke" is fed into a stream that is being read by a wake word engine. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute and Speak * directive. */ #if defined(KWD_KITTAI) || defined(KWD_SENSORY) TEST_F(AudioInputProcessorTest, wakeWordJoke) { // Put audio onto the SDS saying "Alexa, Tell me a joke". bool error; std::string file = g_inputPath + ALEXA_JOKE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(std::chrono::seconds(LONG_TIMEOUT_DURATION)); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } } #endif /** * Test AudioInputProcessor's ability to handle a recognize triggered by a wakeword followed by silence . * * To do this, audio of "Alexa, ........." is fed into a stream that is being read by a wake word engine. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with no directives. */ #if defined(KWD_KITTAI) || defined(KWD_SENSORY) TEST_F(AudioInputProcessorTest, wakeWordSilence) { // Put audio onto the SDS saying "Alexa ......". bool error; std::string file = g_inputPath + ALEXA_SILENCE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // Check that a recognize event was sent ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that no prehandle or handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } #endif /** * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a wakeword. * * To do this, audio of "Alexa, wikipedia" is fed into a stream that is being read by a wake word engine. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak, * and ExpectSpeech directive. Audio of "Lions" is then fed into the stream and another recognize event is sent. */ #if defined(KWD_KITTAI) || defined(KWD_SENSORY) TEST_F(AudioInputProcessorTest, wakeWordMultiturn) { // Put audio onto the SDS saying "Alexa, wikipedia". bool error; std::string file = g_inputPath + ALEXA_WIKI_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(std::chrono::seconds(LONG_TIMEOUT_DURATION)); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } // Check that AIP is now in EXPECTING_SPEECH state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Lions". bool secondError; std::string secondFile = g_inputPath + LIONS_AUDIO_FILE; std::vector secondAudioData = readAudioFromFile(secondFile, RIFF_HEADER_SIZE, &secondError); ASSERT_FALSE(secondError); m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size()); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that prehandle and handle for setMute and Speak has reached the test SS. params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } } #endif /** * Test AudioInputProcessor's ability to handle a simple interation triggered by a wakeword but no user response. * * To do this, audio of "Alexa, wikipedia" is fed into a stream that is being read by a wake word engine. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak, * and ExpectSpeech directive. Audio of "...." is then fed into the stream and another recognize event is sent * but no directives are given in response. */ #if defined(KWD_KITTAI) || defined(KWD_SENSORY) TEST_F(AudioInputProcessorTest, wakeWordMultiturnWithoutUserResponse) { // Put audio onto the SDS saying "Alexa, wikipedia". bool error; std::string file = g_inputPath + ALEXA_WIKI_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // Check that a recognize event was sent ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that prehandle and handle for setMute and Speak has reached the test SS TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(std::chrono::seconds(LONG_TIMEOUT_DURATION)); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { std::this_thread::sleep_for(std::chrono::seconds(2)); params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } // Check that AIP is now in EXPECTING_SPEECH state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying ".......". bool secondError; std::string secondFile = g_inputPath + SILENCE_AUDIO_FILE; std::vector secondAudioData = readAudioFromFile(secondFile, RIFF_HEADER_SIZE, &secondError); ASSERT_FALSE(secondError); m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was not sent. ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that the test context provider was asked to provide context for the event. ASSERT_FALSE(m_stateProvider->checkStateRequested()); // The test channel client has not changed. ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE); } #endif /** * Test AudioInputProcessor's ability to handle a simple interation triggered by a tap to talk button. * * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute and Speak * directive. */ TEST_F(AudioInputProcessorTest, DISABLED_tapToTalkJoke) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Tell me a joke". bool error; std::string file = g_inputPath + JOKE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } } TEST_F(AudioInputProcessorTest, tapToTalkTimeOpus) { m_compatibleAudioFormat.sampleRateHz = COMPATIBLE_SAMPLE_RATE_OPUS_32; m_compatibleAudioFormat.numChannels = COMPATIBLE_NUM_CHANNELS; m_compatibleAudioFormat.endianness = COMPATIBLE_ENDIANNESS; m_compatibleAudioFormat.encoding = avsCommon::utils::AudioFormat::Encoding::OPUS; bool alwaysReadable = true; bool canOverride = true; bool canBeOverridden = true; std::shared_ptr tapToTalkAudioProvider; tapToTalkAudioProvider = std::make_shared( m_AudioBuffer, m_compatibleAudioFormat, ASRProfile::NEAR_FIELD, alwaysReadable, canOverride, !canBeOverridden); // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, tapToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "What time is it?". bool error; std::string file = g_inputPath + TIME_AUDIO_FILE_OPUS; int headerSize = 0; std::vector audioData = readAudioFromFile(file, headerSize, &error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); } /** * Test AudioInputProcessor's ability to handle a silent interation triggered by a tap to talk button. * * To do this, audio of "....." is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds no directives. */ TEST_F(AudioInputProcessorTest, tapToTalkSilence) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying ".......". bool error; std::string file = g_inputPath + SILENCE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that no directives arrived to the fake SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } /** * Test AudioInputProcessor's ability to handle no audio being written triggered by a tap to talk button. * * To do this, no audio is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with no directive. */ TEST_F(AudioInputProcessorTest, tapToTalkNoAudio) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider)); // Put no audio onto the SDS. // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that a recognize event was sent ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has not changed. ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE); // Check that no directives arrived to the fake SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } /** * Test AudioInputProcessor's ability to handle an interation triggered by a tap to talk button with wake word. * * To do this, audio of "Alexa, Tell me a joke" is fed into a stream after button sends recognize to * AudioInputProcessor. The AudioInputProcessor is then observed to send only one Recognize event to AVS which responds * with a SetMute and Speak directive. */ #if defined(KWD_KITTAI) || defined(KWD_SENSORY) TEST_F(AudioInputProcessorTest, tapToTalkWithWakeWordConflict) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Alexa, Tell me a joke". bool error; std::string file = g_inputPath + ALEXA_JOKE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } } #endif /** * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a tap to talk button. * * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak, * and ExpectSpeech directive. Audio of "Lions" is then fed into the stream and another recognize event is sent. */ TEST_F(AudioInputProcessorTest, tapToTalkMultiturn) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Wikipedia". bool error; std::string file = g_inputPath + WIKI_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { std::this_thread::sleep_for(std::chrono::seconds(2)); params.result->setCompleted(); } params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); } // Check that AIP is now in EXPECTING_SPEECH state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, SHORT_TIMEOUT_DURATION)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, SHORT_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Lions". bool secondError; std::string secondFile = g_inputPath + LIONS_AUDIO_FILE; std::vector secondAudioData = readAudioFromFile(secondFile, RIFF_HEADER_SIZE, &secondError); ASSERT_FALSE(secondError); m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } } /** * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a tap to talk button but no user * response. * * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak, * and ExpectSpeech directive. Audio of "...." is then fed into the stream and another recognize event is sent * but no directives are given in response. */ TEST_F(AudioInputProcessorTest, tapToTalkMultiturnWithoutUserResponse) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Wikipedia". bool error; std::string file = g_inputPath + WIKI_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); bool expectSpeechFound = true; TestDirectiveHandler::DirectiveParams params; while (expectSpeechFound) { // Check that AIP is in an IDLE state before starting. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that prehandle and handle for setMute and Speak has reached the test SS. params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { std::this_thread::sleep_for(std::chrono::seconds(2)); params.result->setCompleted(); } params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); } // Check that AIP is now in EXPECTING_SPEECH state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, SHORT_TIMEOUT_DURATION)); // Put audio onto the SDS saying ".......". bool secondError; std::string secondFile = g_inputPath + SILENCE_AUDIO_FILE; std::vector secondAudioData = readAudioFromFile(secondFile, RIFF_HEADER_SIZE, &secondError); ASSERT_FALSE(secondError); m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that the test context provider was asked to provide context for the event. ASSERT_FALSE(m_stateProvider->checkStateRequested()); params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); if (params.type == TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { expectSpeechFound = false; } } // The test channel client has not changed. ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE); // Check that a recognize event was not sent. ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that no directives arrived to the fake SS. ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } /** * Test AudioInputProcessor's ability to handle a cancel partway through an interaction. * * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The * button then sends a reset command and no recognize event is sent. */ TEST_F(AudioInputProcessorTest, tapToTalkCancel) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider)); // Cancel the interaction. m_AudioInputProcessor->resetState(); // Check that AIP was briefly in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Tell me a joke". bool error; std::string file = g_inputPath + JOKE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that no directives arrived to the fake SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } /** * Test AudioInputProcessor's ability to handle a simple interation triggered by a hold to talk button. * * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute and Speak * directive. */ TEST_F(AudioInputProcessorTest, holdToTalkJoke) { // Signal to the AIP to start recognizing. ASSERT_NE(nullptr, m_HoldToTalkAudioProvider); ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Tell me a joke". bool error; std::string file = g_inputPath + JOKE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Stop holding the button. ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } } /** * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a hold to talk button. * * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak, * and ExpectSpeech directive. Audio of "Lions" is then fed into the stream and another recognize event is sent. */ TEST_F(AudioInputProcessorTest, holdToTalkMultiturn) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Wikipedia". bool error; std::string file = g_inputPath + WIKI_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Stop holding the button. ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { std::this_thread::sleep_for(std::chrono::seconds(2)); params.result->setCompleted(); } params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); } // Check that AIP is now in EXPECTING_SPEECH state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION)); // Signal to the AIP to start recognizing. ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Put audio onto the SDS of "Lions". bool secondError; file = g_inputPath + LIONS_AUDIO_FILE; std::vector secondAudioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &secondError); ASSERT_FALSE(secondError); m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Stop holding the button. ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { params.result->setCompleted(); } params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); } } /** * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a hold to talk button but no user * response. * * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak, * and ExpectSpeech directive. Audio of "...." is then fed into the stream and another recognize event is sent * but no directives are given in response. */ TEST_F(AudioInputProcessorTest, holdToTalkMultiTurnWithSilence) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Wikipedia". bool error; std::string file = g_inputPath + WIKI_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Stop holding the button. ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); bool expectSpeechFound = true; TestDirectiveHandler::DirectiveParams params; while (expectSpeechFound) { // Check that AIP is in an IDLE state before starting. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that prehandle and handle for setMute and Speak has reached the test SS. params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { std::this_thread::sleep_for(std::chrono::seconds(2)); params.result->setCompleted(); } params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); } // Check that AIP is now in EXPECTING_SPEECH state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, SHORT_TIMEOUT_DURATION)); // Signal to the AIP to start recognizing. ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Put audio onto the SDS saying ".......". bool secondError; std::string secondFile = g_inputPath + SILENCE_AUDIO_FILE; std::vector secondAudioData = readAudioFromFile(secondFile, RIFF_HEADER_SIZE, &secondError); ASSERT_FALSE(secondError); m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size()); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Stop holding the button. ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(NO_TIMEOUT_DURATION), FocusState::BACKGROUND); // Check that AIP is in BUSY state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that the test context provider was asked to provide context for the event. ASSERT_FALSE(m_stateProvider->checkStateRequested()); params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); if (params.type == TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { expectSpeechFound = false; } } // The test channel client has not changed. ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE); // Check that a recognize event was not sent. ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that no directives arrived to the fake SS. ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } /** * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a hold to talk button that times * out. * * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak, * and ExpectSpeech directive. The button does not trigger another recognize so no recognize event is sent * and no directives are given in response. ExpectSpeechTimedOut event is observed to be sent. */ TEST_F(AudioInputProcessorTest, holdToTalkMultiturnWithTimeOut) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Put audio onto the SDS saying "Wikipedia". bool error; std::string file = g_inputPath + WIKI_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Stop holding the button. ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that prehandle and handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION); ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) { if (params.isHandle() && params.directive->getName() == NAME_SPEAK) { std::this_thread::sleep_for(std::chrono::seconds(2)); params.result->setCompleted(); } params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION); } // Do not signal to the AIP to start recognizing. // Check that AIP is now in EXPECTING_SPEECH state. ASSERT_TRUE(m_StateObserver->checkState( AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION)); // The test channel client has stayed foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that ExpectSpeechTimeOut event has been sent. ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_EXPECT_SPEECH_TIMED_OUT)); } /** * Test AudioInputProcessor's ability to handle no audio being written triggered by a hold to talk button. * * To do this, no audio is fed into a stream after button sends recognize to AudioInputProcessor. The * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with no directive. */ TEST_F(AudioInputProcessorTest, holdToTalkNoAudio) { // Signal to the AIP to start recognizing. ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Write nothing to the SDS. // The test channel client has been notified the alarm channel has been backgrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND); // Stop holding the button. ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that AIP is in BUSY state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND); // Check that a recognize event was sent ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that no prehandle or handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } /** * Test AudioInputProcessor's ability to handle a cancel partway through a hold to talk interaction. * * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The * button then sends a cancel command and no recognize event is sent. */ TEST_F(AudioInputProcessorTest, holdToTalkCancel) { // Signal to the AIP to start recognizing. ASSERT_NE(nullptr, m_HoldToTalkAudioProvider); ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider)); // Check that AIP is now in RECOGNIZING state. ASSERT_TRUE( m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION)); // Cancel the recognize. m_AudioInputProcessor->resetState(); // Put audio onto the SDS saying "Tell me a joke". bool error; std::string file = g_inputPath + JOKE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); ASSERT_FALSE(audioData.empty()); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // Stop holding the button. ASSERT_FALSE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor)); // Check that AIP is in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION)); // Check that the test context provider was not asked to provide context for the event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // Check that no recognize event was sent. ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE)); // Check that no prehandle or handle for setMute and Speak has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } /** * Test AudioInputProcessor's ability to not handle audio when no recognize occurs. * * To do this, audio of "Tell me a joke" is fed into a stream that is being read by a wake word engine. The * lack of the wakeword or button-initiated recognize results in no recognize event being sent. */ TEST_F(AudioInputProcessorTest, audioWithoutAnyTrigger) { // Put audio onto the SDS saying "Tell me a joke" without a trigger. bool error; std::string file = g_inputPath + JOKE_AUDIO_FILE; std::vector audioData = readAudioFromFile(file, RIFF_HEADER_SIZE, &error); ASSERT_FALSE(error); m_AudioBufferWriter->write(audioData.data(), audioData.size()); // Check that AIP is still in an IDLE state. ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, SHORT_TIMEOUT_DURATION)); // Check that the test context provider was asked to provide context as the post-connect objects would have fetched // context to send StateSynchronizer event. ASSERT_TRUE(m_stateProvider->checkStateRequested()); // The test channel client has been not notified the alarm channel has been foregrounded. ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE); // Check that no prehandle or handle has reached the test SS. TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION); ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT); } } // namespace test } // namespace integration } // namespace alexaClientSDK int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); if (argc < 3) { std::cerr << "USAGE: " << std::string(argv[0]) << " " << std::endl; return 1; } else { alexaClientSDK::integration::test::g_configPath = std::string(argv[1]); alexaClientSDK::integration::test::g_inputPath = std::string(argv[2]); return RUN_ALL_TESTS(); } }