SmartAudio/package/avs/avs-sdk/files/avs-device-sdk/Integration/test/AudioInputProcessorIntegrat...

/*
 * Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *     http://aws.amazon.com/apache2.0/
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

/// @file AudioInputProcessorIntegrationTest.cpp

#include <chrono>
#include <deque>
#include <fstream>
#include <future>
#include <mutex>
#include <string>
#include <unordered_map>

#include <gtest/gtest.h>

#include <ACL/AVSConnectionManager.h>
#include <ADSL/DirectiveSequencer.h>
#include <ADSL/MessageInterpreter.h>
#include <AFML/Channel.h>
#include <AFML/FocusManager.h>
#include <AIP/AudioInputProcessor.h>
#include <AIP/AudioProvider.h>
#include <AIP/Initiator.h>
#include <AVSCommon/AVS/Attachment/InProcessAttachmentWriter.h>
#include <AVSCommon/AVS/BlockingPolicy.h>
#include <AVSCommon/AVS/MessageRequest.h>
#include <AVSCommon/SDKInterfaces/ChannelObserverInterface.h>
#include <AVSCommon/SDKInterfaces/DirectiveHandlerInterface.h>
#include <AVSCommon/SDKInterfaces/DirectiveHandlerResultInterface.h>
#include <AVSCommon/SDKInterfaces/ExceptionEncounteredSenderInterface.h>
#include <AVSCommon/SDKInterfaces/KeyWordObserverInterface.h>
#include <AVSCommon/Utils/JSON/JSONUtils.h>
#include <AVSCommon/Utils/Logger/LogEntry.h>

#include "Integration/ACLTestContext.h"
#include "Integration/ObservableMessageRequest.h"
#include "Integration/AipStateObserver.h"
#include "Integration/TestMessageSender.h"
#include "Integration/TestDirectiveHandler.h"
#include "Integration/TestExceptionEncounteredSender.h"
#include "System/UserInactivityMonitor.h"

// If the tests are created with both Kittai and Sensory, Kittai is chosen.
#ifdef KWD_KITTAI
#include "KittAi/KittAiKeyWordDetector.h"
#elif KWD_SENSORY
#include "Sensory/SensoryKeywordDetector.h"
#endif

namespace alexaClientSDK {
namespace integration {
namespace test {

using namespace alexaClientSDK::acl;
using namespace alexaClientSDK::adsl;
using namespace alexaClientSDK::avsCommon;
using namespace alexaClientSDK::avsCommon::avs;
using namespace alexaClientSDK::avsCommon::utils;
using namespace alexaClientSDK::avsCommon::avs::attachment;
using namespace alexaClientSDK::avsCommon::sdkInterfaces;
using namespace capabilityAgents::aip;
using namespace capabilityAgents::system;
using namespace sdkInterfaces;
using namespace avsCommon::utils::sds;
using namespace avsCommon::utils::json;
using namespace afml;
using namespace contextManager;

// This is a 16 bit 16 kHz little endian linear PCM audio file of "Tell me a Joke" to be recognized.
static const std::string JOKE_AUDIO_FILE = "/recognize_joke_test.wav";
// This is a 16 bit 16 kHz little endian linear PCM audio file of "Wikipedia" to be recognized.
static const std::string WIKI_AUDIO_FILE = "/recognize_wiki_test.wav";
// This is a 16 bit 16 kHz little endian linear PCM audio file of "Lions" to be recognized.
static const std::string LIONS_AUDIO_FILE = "/recognize_lions_test.wav";
// This is a 16 bit 16 kHz little endian linear PCM audio file of silence to be recognized.
static const std::string SILENCE_AUDIO_FILE = "/recognize_silence_test.wav";
// This is a 16 bit 16 kHz little endian linear PCM audio file of "Alexa, Tell me a Joke" to be recognized.
static const std::string ALEXA_JOKE_AUDIO_FILE = "/alexa_recognize_joke_test.wav";
// This is a 16 bit 16 kHz little endian linear PCM audio file of "Alexa, Wikipedia" to be recognized.
static const std::string ALEXA_WIKI_AUDIO_FILE = "/alexa_recognize_wiki_test.wav";
// This is a 16 bit 16 kHz little endian linear PCM audio file of "Alexa" then silence to be recognized.
static const std::string ALEXA_SILENCE_AUDIO_FILE = "/alexa_recognize_silence_test.wav";
// This is a 32KHz little endian OPUS audio file with Constant Bit rate of "What time is it?" to be recognized.
static const std::string TIME_AUDIO_FILE_OPUS = "/utterance_time_success.opus";
// This string to be used for Speak Directives which use the NAMESPACE_SPEECH_SYNTHESIZER namespace.
static const std::string NAME_VOLUME_STATE = "VolumeState";
// This string to be used for Speak Directives which use the NAMESPACE_SPEECH_SYNTHESIZER namespace.
static const std::string NAME_SPEAK = "Speak";
// This string to be used for ExpectSpeech Directives which use the NAMESPACE_SPEECH_RECOGNIZER namespace.
static const std::string NAME_EXPECT_SPEECH = "ExpectSpeech";
// This string to be used for ExpectSpeechTimedOut Events which use the NAMESPACE_SPEECH_RECOGNIZER namespace.
static const std::string NAME_EXPECT_SPEECH_TIMED_OUT = "ExpectSpeechTimedOut";
// This string to be used for SetMute Directives which use the NAMESPACE_SPEAKER namespace.
static const std::string NAME_SET_MUTE = "SetMute";
// This string to be used for Play Directives which use the NAMESPACE_AUDIO_PLAYER namespace.
static const std::string NAME_PLAY = "Play";
// This string to be used for StopCapture Directives which use the NAMESPACE_SPEECH_RECOGNIZER namespace.
static const std::string NAME_STOP_CAPTURE = "StopCapture";
// This string to be used for Recognize Directives which use the NAMESPACE_SPEECH_RECOGNIZER namespace.
static const std::string NAME_RECOGNIZE = "Recognize";
// This String to be used to register the SpeechRecognizer namespace to a DirectiveHandler.
static const std::string NAMESPACE_SPEECH_RECOGNIZER = "SpeechRecognizer";
// This String to be used to register the SpeechSynthesizer namespace to a DirectiveHandler.
static const std::string NAMESPACE_SPEECH_SYNTHESIZER = "SpeechSynthesizer";
// This String to be used to register the AudioPlayer namespace to a DirectiveHandler.
static const std::string NAMESPACE_AUDIO_PLAYER = "AudioPlayer";
// This String to be used to register the Speaker namespace to a DirectiveHandler.
static const std::string NAMESPACE_SPEAKER = "Speaker";
// This pair connects a Speak name and SpeechSynthesizer namespace for use in DirectiveHandler registration.
static const NamespaceAndName SPEAK_PAIR = {NAMESPACE_SPEECH_SYNTHESIZER, NAME_SPEAK};
// This pair connects a ExpectSpeech name and SpeechRecognizer namespace for use in DirectiveHandler registration.
static const NamespaceAndName EXPECT_SPEECH_PAIR = {NAMESPACE_SPEECH_RECOGNIZER, NAME_EXPECT_SPEECH};
// This pair connects a SetMute name and Speaker namespace for use in DirectiveHandler registration.
static const NamespaceAndName SET_MUTE_PAIR = {NAMESPACE_SPEAKER, NAME_SET_MUTE};
// This pair connects a SetMute name and Speaker namespace for use in DirectiveHandler registration.
static const NamespaceAndName VOLUME_STATE_PAIR = {NAMESPACE_SPEAKER, NAME_VOLUME_STATE};
// This pair connects a Play name and AudioPlayer namespace for use in DirectiveHandler registration.
static const NamespaceAndName PLAY_PAIR = {NAMESPACE_AUDIO_PLAYER, NAME_PLAY};
// This pair connects a StopCapture name and SpeechRecognizer namespace for use in DirectiveHandler registration.
static const NamespaceAndName STOP_CAPTURE_PAIR = {NAMESPACE_SPEECH_RECOGNIZER, NAME_STOP_CAPTURE};
/// Sample dialog activity id.
static const std::string DIALOG_ACTIVITY_ID = "Dialog";
/// Sample alerts activity id.
static const std::string ALARM_ACTIVITY_ID = "Alarms";
/// Sample content activity id.
static const std::string CONTENT_ACTIVITY_ID = "Content";

// This Integer to be used to specify a timeout in seconds for long operations.
static const std::chrono::seconds LONG_TIMEOUT_DURATION(10);
// This Integer to be used when it is expected the duration will timeout but some wait time is still desired.
static const std::chrono::seconds SHORT_TIMEOUT_DURATION(2);
// This Integer to be used when no timeout is desired.
static const std::chrono::seconds NO_TIMEOUT_DURATION(0);
// The length of RIFF container format which is the header of a wav file.
static const int RIFF_HEADER_SIZE = 44;
/// The compatible sample rate for OPUS 32KHz.
static const unsigned int COMPATIBLE_SAMPLE_RATE_OPUS_32 = 32000;
#ifdef KWD_KITTAI
/// The name of the resource file required for Kitt.ai.
static const std::string RESOURCE_FILE = "/KittAiModels/common.res";
/// The name of the Alexa model file for Kitt.ai.
static const std::string MODEL_FILE = "/KittAiModels/alexa.umdl";
/// The keyword associated with alexa.umdl.
static const std::string MODEL_KEYWORD = "ALEXA";
#elif KWD_SENSORY
/// The name of the resource file required for Sensory
static const std::string RESOURCE_FILE = "/SensoryModels/spot-alexa-rpi-31000.snsr";
#endif

/// JSON key to get the directive object of a message.
static const std::string JSON_MESSAGE_DIRECTIVE_KEY = "directive";
/// JSON key to get the header object of a message.
static const std::string JSON_MESSAGE_HEADER_KEY = "header";
/// JSON key to get the namespace value of a header.
static const std::string JSON_MESSAGE_NAMESPACE_KEY = "namespace";
/// JSON key to get the name value of a header.
static const std::string JSON_MESSAGE_NAME_KEY = "name";
/// JSON key to get the messageId value of a header.
static const std::string JSON_MESSAGE_MESSAGE_ID_KEY = "messageId";
/// JSON key to get the dialogRequestId value of a header.
static const std::string JSON_MESSAGE_DIALOG_REQUEST_ID_KEY = "dialogRequestId";
/// JSON key to get the payload object of a message.
static const std::string JSON_MESSAGE_PAYLOAD_KEY = "payload";

/**
 * The sensitivity to the keyword in the model. Set to 0.6 as this is what was described as optimal on the Kitt.ai
 * Github page.
 */
#ifdef KWD_KITTAI
static const double KITTAI_SENSITIVITY = 0.6;
#endif
/// The compatible encoding for Kitt.ai.
static const avsCommon::utils::AudioFormat::Encoding COMPATIBLE_ENCODING =
    avsCommon::utils::AudioFormat::Encoding::LPCM;
/// The compatible endianness for Kitt.ai.
static const avsCommon::utils::AudioFormat::Endianness COMPATIBLE_ENDIANNESS =
    avsCommon::utils::AudioFormat::Endianness::LITTLE;
/// The compatible sample rate for Kitt.ai.
static const unsigned int COMPATIBLE_SAMPLE_RATE = 16000;
/// The compatible bits per sample for Kitt.ai.
static const unsigned int COMPATIBLE_SAMPLE_SIZE_IN_BITS = 16;
/// The compatible number of channels for Kitt.ai
static const unsigned int COMPATIBLE_NUM_CHANNELS = 1;

/// String to identify log entries originating from this file.
static const std::string TAG("AlexaDirectiveSequencerLibraryTest");

/// Path to the AlexaClientSDKConfig.json file (from command line arguments).
static std::string g_configPath;
/// Path to resources (e.g. audio files) for tests (from command line arguments).
static std::string g_inputPath;

/**
 * Create a LogEntry using this file's TAG and the specified event string.
 *
 * @param The event string for this @c LogEntry.
 */
#define LX(event) ::alexaClientSDK::avsCommon::utils::logger::LogEntry(TAG, event)

class tapToTalkButton {
public:
    bool startRecognizing(std::shared_ptr<AudioInputProcessor> aip, std::shared_ptr<AudioProvider> audioProvider) {
        return aip->recognize(*audioProvider, Initiator::TAP).get();
    }
};

class holdToTalkButton {
public:
    bool startRecognizing(std::shared_ptr<AudioInputProcessor> aip, std::shared_ptr<AudioProvider> audioProvider) {
        return aip->recognize(*audioProvider, Initiator::PRESS_AND_HOLD).get();
    }

    bool stopRecognizing(std::shared_ptr<AudioInputProcessor> aip) {
        return aip->stopCapture().get();
    }
};

#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
class wakeWordTrigger : public KeyWordObserverInterface {
public:
    wakeWordTrigger(AudioFormat compatibleAudioFormat, std::shared_ptr<AudioInputProcessor> aip) {
        m_compatibleAudioFormat = compatibleAudioFormat;
        m_aip = aip;
    }
    void onKeyWordDetected(
        std::shared_ptr<AudioInputStream> stream,
        std::string keyword,
        AudioInputStream::Index beginIndex,
        AudioInputStream::Index endIndex,
        std::shared_ptr<const std::vector<char>> KWDMetadata = nullptr) {
        keyWordDetected = true;
        ASSERT_NE(nullptr, stream);
        bool alwaysReadable = true;
        bool canOverride = false;
        bool canBeOverridden = true;
        auto audioProvider = AudioProvider(
            stream, m_compatibleAudioFormat, ASRProfile::NEAR_FIELD, alwaysReadable, !canOverride, canBeOverridden);

        if (m_aip) {
            AudioInputStream::Index aipBegin = AudioInputProcessor::INVALID_INDEX;
            AudioInputStream::Index aipEnd = AudioInputProcessor::INVALID_INDEX;

            if (endIndex != KeyWordObserverInterface::UNSPECIFIED_INDEX) {
                if (beginIndex != KeyWordObserverInterface::UNSPECIFIED_INDEX) {
                    // If we know where the keyword starts and ends, pass both of those along to AIP.
                    aipBegin = beginIndex;
                    aipEnd = endIndex;
                } else {
                    // If we only know where the keyword ends, AIP should begin recording there.
                    aipBegin = endIndex;
                }
            }
// Else we don't have any indices to pass along; AIP will begin recording ASAP.
#ifdef KWD_KITTAI
            m_aip->recognize(audioProvider, Initiator::TAP, aipBegin, aipEnd, keyword);
#elif KWD_SENSORY
            m_aip->recognize(audioProvider, Initiator::WAKEWORD, aipBegin, aipEnd, keyword);
#endif
        }
    }

    bool keyWordDetected = false;
    AudioFormat m_compatibleAudioFormat;
    std::shared_ptr<AudioInputProcessor> m_aip;
};
#endif

class testStateProvider
        : public StateProviderInterface
        , public RequiresShutdown {
public:
    testStateProvider(std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager) :
            RequiresShutdown("testStateProvider") {
        m_contextManager = contextManager;
    }
    ~testStateProvider() {
    }
    void provideState(const NamespaceAndName& nsname, const unsigned int stateRequestToken) override {
        std::ostringstream context;
        context << R"({)"
                   R"("volume":)"
                << 50 << R"(,)"
                << R"("muted":)" << false << R"(})";

        m_contextManager->setState(
            VOLUME_STATE_PAIR, context.str(), avsCommon::avs::StateRefreshPolicy::ALWAYS, stateRequestToken);
        m_stateRequested = true;
    }
    bool checkStateRequested() {
        bool savedResult = false;
        if (m_stateRequested) {
            savedResult = true;
            m_stateRequested = false;
        }
        return savedResult;
    }

protected:
    void doShutdown() override {
        m_contextManager.reset();
    }

private:
    bool m_stateRequested = false;
    std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> m_contextManager;
};

/// A test observer that mocks out the ChannelObserverInterface##onFocusChanged() call.
class TestClient : public ChannelObserverInterface {
public:
    /**
     * Constructor.
     */
    TestClient() : m_focusState(FocusState::NONE) {
    }

    /**
     * Implementation of the ChannelObserverInterface##onFocusChanged() callback.
     *
     * @param focusState The new focus state of the Channel observer.
     */
    void onFocusChanged(FocusState focusState) override {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_queue.push_back(focusState);
        m_focusState = focusState;
        m_wakeTrigger.notify_all();
    }

    /**
     * Waits for the ChannelObserverInterface##onFocusChanged() callback.
     *
     * @param timeout The amount of time to wait for the callback.
     * @param focusChanged An output parameter that notifies the caller whether a callback occurred.
     * @return Returns @c true if the callback occured within the timeout period and @c false otherwise.
     */
    FocusState waitForFocusChange(std::chrono::milliseconds timeout) {
        FocusState ret;
        std::unique_lock<std::mutex> lock(m_mutex);
        if (!m_wakeTrigger.wait_for(lock, timeout, [this]() { return !m_queue.empty(); })) {
            ret = FocusState::NONE;
            return ret;
        }
        ret = m_queue.front();
        m_queue.pop_front();
        return ret;
    }

private:
    /// The focus state of the observer.
    FocusState m_focusState;

    /// A lock to guard against focus state changes.
    std::mutex m_mutex;

    /// Trigger to wake up waitForNext calls.
    std::condition_variable m_wakeTrigger;
    /// Queue of received directives that have not been waited on.
    std::deque<FocusState> m_queue;
};

class AudioInputProcessorTest : public ::testing::Test {
protected:
    void SetUp() override {
        m_context = ACLTestContext::create(g_configPath);
        ASSERT_TRUE(m_context);

        m_exceptionEncounteredSender = std::make_shared<TestExceptionEncounteredSender>();

        DirectiveHandlerConfiguration handlerConfig;
        handlerConfig[SET_MUTE_PAIR] = BlockingPolicy::NON_BLOCKING;
        handlerConfig[SPEAK_PAIR] = BlockingPolicy::BLOCKING;
        m_directiveHandler = std::make_shared<TestDirectiveHandler>(handlerConfig);

        m_directiveSequencer = DirectiveSequencer::create(m_exceptionEncounteredSender);
        ASSERT_NE(nullptr, m_directiveSequencer);
        m_messageInterpreter = std::make_shared<MessageInterpreter>(
            m_exceptionEncounteredSender, m_directiveSequencer, m_context->getAttachmentManager());

        m_compatibleAudioFormat.sampleRateHz = COMPATIBLE_SAMPLE_RATE;
        m_compatibleAudioFormat.sampleSizeInBits = COMPATIBLE_SAMPLE_SIZE_IN_BITS;
        m_compatibleAudioFormat.numChannels = COMPATIBLE_NUM_CHANNELS;
        m_compatibleAudioFormat.endianness = COMPATIBLE_ENDIANNESS;
        m_compatibleAudioFormat.encoding = COMPATIBLE_ENCODING;

        size_t nWords = 1024 * 1024;
        size_t wordSize = 2;
        size_t maxReaders = 3;
        size_t bufferSize = AudioInputStream::calculateBufferSize(nWords, wordSize, maxReaders);

        auto m_Buffer = std::make_shared<avsCommon::avs::AudioInputStream::Buffer>(bufferSize);
        auto m_Sds = avsCommon::avs::AudioInputStream::create(m_Buffer, wordSize, maxReaders);
        ASSERT_NE(nullptr, m_Sds);
        m_AudioBuffer = std::move(m_Sds);
        m_AudioBufferWriter =
            m_AudioBuffer->createWriter(avsCommon::avs::AudioInputStream::Writer::Policy::NONBLOCKABLE);
        ASSERT_NE(nullptr, m_AudioBufferWriter);

        // Set up tap and hold to talk buttons.
        bool alwaysReadable = true;
        bool canOverride = true;
        bool canBeOverridden = true;
        m_HoldToTalkAudioProvider = std::make_shared<AudioProvider>(
            m_AudioBuffer,
            m_compatibleAudioFormat,
            ASRProfile::CLOSE_TALK,
            !alwaysReadable,
            canOverride,
            !canBeOverridden);
        m_TapToTalkAudioProvider = std::make_shared<AudioProvider>(
            m_AudioBuffer,
            m_compatibleAudioFormat,
            ASRProfile::NEAR_FIELD,
            alwaysReadable,
            canOverride,
            !canBeOverridden);

        m_tapToTalkButton = std::make_shared<tapToTalkButton>();
        m_holdToTalkButton = std::make_shared<holdToTalkButton>();
        m_focusManager = std::make_shared<FocusManager>(FocusManager::DEFAULT_AUDIO_CHANNELS);
        m_dialogUXStateAggregator = std::make_shared<avsCommon::avs::DialogUXStateAggregator>();

        m_stateProvider = std::make_shared<testStateProvider>(m_context->getContextManager());
        ASSERT_TRUE(m_context->getContextManager());

        m_context->getContextManager()->setStateProvider(VOLUME_STATE_PAIR, m_stateProvider);

        // Set up connection and connect
        m_avsConnectionManager = std::make_shared<TestMessageSender>(
            m_context->getMessageRouter(), false, m_context->getConnectionStatusObserver(), m_messageInterpreter);
        ASSERT_TRUE(m_avsConnectionManager);

        connect();

        m_userInactivityMonitor = UserInactivityMonitor::create(m_avsConnectionManager, m_exceptionEncounteredSender);
        m_AudioInputProcessor = AudioInputProcessor::create(
            m_directiveSequencer,
            m_avsConnectionManager,
            m_context->getContextManager(),
            m_focusManager,
            m_dialogUXStateAggregator,
            m_exceptionEncounteredSender,
            m_userInactivityMonitor);
        ASSERT_NE(nullptr, m_AudioInputProcessor);
        m_AudioInputProcessor->addObserver(m_dialogUXStateAggregator);

        m_testClient = std::make_shared<TestClient>();
        ASSERT_TRUE(m_focusManager->acquireChannel(FocusManager::ALERTS_CHANNEL_NAME, m_testClient, ALARM_ACTIVITY_ID));
        ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

        m_StateObserver = std::make_shared<AipStateObserver>();
        ASSERT_NE(nullptr, m_StateObserver);
        m_AudioInputProcessor->addObserver(m_StateObserver);

        ASSERT_TRUE(m_directiveSequencer->addDirectiveHandler(m_AudioInputProcessor));

#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
        m_wakeWordTrigger = std::make_shared<wakeWordTrigger>(m_compatibleAudioFormat, m_AudioInputProcessor);

#ifdef KWD_KITTAI
        kwd::KittAiKeyWordDetector::KittAiConfiguration config;
        config = {g_inputPath + MODEL_FILE, MODEL_KEYWORD, KITTAI_SENSITIVITY};
        m_detector = kwd::KittAiKeyWordDetector::create(
            m_AudioBuffer,
            m_compatibleAudioFormat,
            {m_wakeWordTrigger},
            // Not using an empty initializer list here to account for a GCC 4.9.2 regression
            std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
            g_inputPath + RESOURCE_FILE,
            {config},
            2.0,
            false);
        ASSERT_TRUE(m_detector);
#elif KWD_SENSORY
        m_detector = kwd::SensoryKeywordDetector::create(
            m_AudioBuffer,
            m_compatibleAudioFormat,
            {m_wakeWordTrigger},
            // Not using an empty initializer list here to account for a GCC 4.9.2 regression
            std::unordered_set<std::shared_ptr<KeyWordDetectorStateObserverInterface>>(),
            g_inputPath + RESOURCE_FILE);
        ASSERT_TRUE(m_detector);
#endif
#endif
        ASSERT_TRUE(m_directiveSequencer->addDirectiveHandler(m_directiveHandler));

        ASSERT_TRUE(
            m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, NO_TIMEOUT_DURATION));
    }

    void TearDown() override {
        disconnect();
        // Note that these nullptr checks are needed to avoid segaults if @c SetUp() failed.
        if (m_AudioInputProcessor) {
            m_AudioInputProcessor->shutdown();
        }
        if (m_directiveSequencer) {
            m_directiveSequencer->shutdown();
        }
        if (m_avsConnectionManager) {
            m_avsConnectionManager->shutdown();
        }
        if (m_stateProvider) {
            m_stateProvider->shutdown();
        }
        if (m_userInactivityMonitor) {
            m_userInactivityMonitor->shutdown();
        }
        m_context.reset();
    }

    /**
     * Connect to AVS.
     */
    void connect() {
        m_avsConnectionManager->enable();
        m_context->waitForConnected();
    }

    /**
     * Disconnect from AVS.
     */
    void disconnect() {
        if (m_avsConnectionManager) {
            m_avsConnectionManager->disable();
            m_context->waitForDisconnected();
        }
    }

    bool checkSentEventName(std::shared_ptr<TestMessageSender> connectionManager, std::string expectedName) {
        TestMessageSender::SendParams sendParams = connectionManager->waitForNext(SHORT_TIMEOUT_DURATION);
        if (TestMessageSender::SendParams::Type::SEND == sendParams.type) {
            std::string eventString;
            std::string eventHeader;
            std::string eventName;
            jsonUtils::retrieveValue(sendParams.request->getJsonContent(), "event", &eventString);
            jsonUtils::retrieveValue(eventString, "header", &eventHeader);
            jsonUtils::retrieveValue(eventHeader, "name", &eventName);
            if (eventName == expectedName) {
                return true;
            } else {
                return false;
            }
        } else {
            return false;
        }
    }

    /// Context for running ACL based tests.
    std::unique_ptr<ACLTestContext> m_context;

    std::shared_ptr<TestMessageSender> m_avsConnectionManager;
    std::shared_ptr<TestDirectiveHandler> m_directiveHandler;
    std::shared_ptr<TestExceptionEncounteredSender> m_exceptionEncounteredSender;
    std::shared_ptr<DirectiveSequencerInterface> m_directiveSequencer;
    std::shared_ptr<MessageInterpreter> m_messageInterpreter;
    std::shared_ptr<afml::FocusManager> m_focusManager;
    std::shared_ptr<avsCommon::avs::DialogUXStateAggregator> m_dialogUXStateAggregator;
    std::shared_ptr<TestClient> m_testClient;
    std::shared_ptr<UserInactivityMonitor> m_userInactivityMonitor;
    std::shared_ptr<AudioInputProcessor> m_AudioInputProcessor;
    std::shared_ptr<AipStateObserver> m_StateObserver;
    std::shared_ptr<tapToTalkButton> m_tapToTalkButton;
    std::shared_ptr<holdToTalkButton> m_holdToTalkButton;
    std::shared_ptr<testStateProvider> m_stateProvider;
    std::unique_ptr<AudioInputStream::Writer> m_AudioBufferWriter;
    std::shared_ptr<AudioInputStream> m_AudioBuffer;
    std::shared_ptr<AudioProvider> m_TapToTalkAudioProvider;
    std::shared_ptr<AudioProvider> m_HoldToTalkAudioProvider;
    avsCommon::utils::AudioFormat m_compatibleAudioFormat;
#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
    std::shared_ptr<wakeWordTrigger> m_wakeWordTrigger;
#ifdef KWD_KITTAI
    std::unique_ptr<kwd::KittAiKeyWordDetector> m_detector;
#elif KWD_SENSORY
    std::unique_ptr<kwd::SensoryKeywordDetector> m_detector;
#endif
#endif
};

template <typename T>
std::vector<T> readAudioFromFile(const std::string& fileName, const int& headerPosition, bool* errorOccurred) {
    std::ifstream inputFile(fileName.c_str(), std::ifstream::binary);
    if (!inputFile.good()) {
        std::cout << "Couldn't open audio file!" << std::endl;
        if (errorOccurred) {
            *errorOccurred = true;
        }
        return {};
    }
    inputFile.seekg(0, std::ios::end);
    int fileLengthInBytes = inputFile.tellg();

    if (fileLengthInBytes <= headerPosition) {
        std::cout << "File should be larger than header position" << std::endl;
        if (errorOccurred) {
            *errorOccurred = true;
        }
        return {};
    }

    inputFile.seekg(headerPosition, std::ios::beg);

    int numSamples = (fileLengthInBytes - headerPosition) / sizeof(T);

    std::vector<T> retVal(numSamples, 0);

    inputFile.read((char*)&retVal[0], numSamples * sizeof(T));

    if (static_cast<size_t>(inputFile.gcount()) != numSamples * sizeof(T)) {
        std::cout << "Error reading audio file" << std::endl;
        if (errorOccurred) {
            *errorOccurred = true;
        }
        return {};
    }

    inputFile.close();
    if (errorOccurred) {
        *errorOccurred = false;
    }
    return retVal;
}

/**
 * Test AudioInputProcessor's ability to handle a simple interation triggered by a wakeword.
 *
 * To do this, audio of "Alexa, tell me a joke" is fed into a stream that is being read by a wake word engine. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute and Speak
 * directive.
 */
#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
TEST_F(AudioInputProcessorTest, wakeWordJoke) {
    // Put audio onto the SDS saying "Alexa, Tell me a joke".
    bool error;
    std::string file = g_inputPath + ALEXA_JOKE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params =
        m_directiveHandler->waitForNext(std::chrono::seconds(LONG_TIMEOUT_DURATION));
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }
}
#endif

/**
 * Test AudioInputProcessor's ability to handle a recognize triggered by a wakeword followed by silence .
 *
 * To do this, audio of "Alexa, ........." is fed into a stream that is being read by a wake word engine. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with no directives.
 */
#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
TEST_F(AudioInputProcessorTest, wakeWordSilence) {
    // Put audio onto the SDS saying "Alexa ......".
    bool error;
    std::string file = g_inputPath + ALEXA_SILENCE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // Check that a recognize event was sent
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that no prehandle or handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}
#endif

/**
 * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a wakeword.
 *
 * To do this, audio of "Alexa, wikipedia" is fed into a stream that is being read by a wake word engine. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak,
 * and ExpectSpeech directive. Audio of "Lions" is then fed into the stream and another recognize event is sent.
 */
#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
TEST_F(AudioInputProcessorTest, wakeWordMultiturn) {
    // Put audio onto the SDS saying "Alexa, wikipedia".
    bool error;
    std::string file = g_inputPath + ALEXA_WIKI_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params =
        m_directiveHandler->waitForNext(std::chrono::seconds(LONG_TIMEOUT_DURATION));
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }

    // Check that AIP is now in EXPECTING_SPEECH state.
    ASSERT_TRUE(m_StateObserver->checkState(
        AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Lions".
    bool secondError;
    std::string secondFile = g_inputPath + LIONS_AUDIO_FILE;
    std::vector<int16_t> secondAudioData = readAudioFromFile<int16_t>(secondFile, RIFF_HEADER_SIZE, &secondError);
    ASSERT_FALSE(secondError);
    m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size());

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }
}
#endif

/**
 * Test AudioInputProcessor's ability to handle a simple interation triggered by a wakeword but no user response.
 *
 * To do this, audio of "Alexa, wikipedia" is fed into a stream that is being read by a wake word engine. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak,
 * and ExpectSpeech directive. Audio of "...." is then fed into the stream and another recognize event is sent
 * but no directives are given in response.
 */
#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
TEST_F(AudioInputProcessorTest, wakeWordMultiturnWithoutUserResponse) {
    // Put audio onto the SDS saying "Alexa, wikipedia".
    bool error;
    std::string file = g_inputPath + ALEXA_WIKI_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // Check that a recognize event was sent
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that prehandle and handle for setMute and Speak has reached the test SS
    TestDirectiveHandler::DirectiveParams params =
        m_directiveHandler->waitForNext(std::chrono::seconds(LONG_TIMEOUT_DURATION));

    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            std::this_thread::sleep_for(std::chrono::seconds(2));
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }

    // Check that AIP is now in EXPECTING_SPEECH state.
    ASSERT_TRUE(m_StateObserver->checkState(
        AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying ".......".
    bool secondError;
    std::string secondFile = g_inputPath + SILENCE_AUDIO_FILE;
    std::vector<int16_t> secondAudioData = readAudioFromFile<int16_t>(secondFile, RIFF_HEADER_SIZE, &secondError);
    ASSERT_FALSE(secondError);
    m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size());

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was not sent.
    ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_FALSE(m_stateProvider->checkStateRequested());

    // The test channel client has not changed.
    ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE);
}
#endif

/**
 * Test AudioInputProcessor's ability to handle a simple interation triggered by a tap to talk button.
 *
 * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute and Speak
 * directive.
 */
TEST_F(AudioInputProcessorTest, DISABLED_tapToTalkJoke) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Tell me a joke".
    bool error;
    std::string file = g_inputPath + JOKE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }
}

TEST_F(AudioInputProcessorTest, tapToTalkTimeOpus) {
    m_compatibleAudioFormat.sampleRateHz = COMPATIBLE_SAMPLE_RATE_OPUS_32;
    m_compatibleAudioFormat.numChannels = COMPATIBLE_NUM_CHANNELS;
    m_compatibleAudioFormat.endianness = COMPATIBLE_ENDIANNESS;
    m_compatibleAudioFormat.encoding = avsCommon::utils::AudioFormat::Encoding::OPUS;

    bool alwaysReadable = true;
    bool canOverride = true;
    bool canBeOverridden = true;
    std::shared_ptr<AudioProvider> tapToTalkAudioProvider;
    tapToTalkAudioProvider = std::make_shared<AudioProvider>(
        m_AudioBuffer, m_compatibleAudioFormat, ASRProfile::NEAR_FIELD, alwaysReadable, canOverride, !canBeOverridden);
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, tapToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "What time is it?".
    bool error;
    std::string file = g_inputPath + TIME_AUDIO_FILE_OPUS;
    int headerSize = 0;
    std::vector<uint8_t> audioData = readAudioFromFile<uint8_t>(file, headerSize, &error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));
}

/**
 * Test AudioInputProcessor's ability to handle a silent interation triggered by a tap to talk button.
 *
 * To do this, audio of "....." is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds no directives.
 */
TEST_F(AudioInputProcessorTest, tapToTalkSilence) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying ".......".
    bool error;
    std::string file = g_inputPath + SILENCE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that no directives arrived to the fake SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

/**
 * Test AudioInputProcessor's ability to handle no audio being written triggered by a tap to talk button.
 *
 * To do this, no audio is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with no directive.
 */
TEST_F(AudioInputProcessorTest, tapToTalkNoAudio) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider));

    // Put no audio onto the SDS.

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that a recognize event was sent
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has not changed.
    ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE);

    // Check that no directives arrived to the fake SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

/**
 * Test AudioInputProcessor's ability to handle an interation triggered by a tap to talk button with wake word.
 *
 * To do this, audio of "Alexa, Tell me a joke" is fed into a stream after button sends recognize to
 * AudioInputProcessor. The AudioInputProcessor is then observed to send only one Recognize event to AVS which responds
 * with a SetMute and Speak directive.
 */
#if defined(KWD_KITTAI) || defined(KWD_SENSORY)
TEST_F(AudioInputProcessorTest, tapToTalkWithWakeWordConflict) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Alexa, Tell me a joke".
    bool error;
    std::string file = g_inputPath + ALEXA_JOKE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }
}
#endif

/**
 * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a tap to talk button.
 *
 * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak,
 * and ExpectSpeech directive. Audio of "Lions" is then fed into the stream and another recognize event is sent.
 */
TEST_F(AudioInputProcessorTest, tapToTalkMultiturn) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Wikipedia".
    bool error;
    std::string file = g_inputPath + WIKI_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            std::this_thread::sleep_for(std::chrono::seconds(2));
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
    }

    // Check that AIP is now in EXPECTING_SPEECH state.
    ASSERT_TRUE(m_StateObserver->checkState(
        AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, SHORT_TIMEOUT_DURATION));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, SHORT_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Lions".
    bool secondError;
    std::string secondFile = g_inputPath + LIONS_AUDIO_FILE;
    std::vector<int16_t> secondAudioData = readAudioFromFile<int16_t>(secondFile, RIFF_HEADER_SIZE, &secondError);
    ASSERT_FALSE(secondError);
    m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }
}

/**
 * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a tap to talk button but no user
 * response.
 *
 * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak,
 * and ExpectSpeech directive. Audio of "...." is then fed into the stream and another recognize event is sent
 * but no directives are given in response.
 */
TEST_F(AudioInputProcessorTest, tapToTalkMultiturnWithoutUserResponse) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Wikipedia".
    bool error;
    std::string file = g_inputPath + WIKI_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    bool expectSpeechFound = true;
    TestDirectiveHandler::DirectiveParams params;
    while (expectSpeechFound) {
        // Check that AIP is in an IDLE state before starting.
        ASSERT_TRUE(
            m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

        // Check that prehandle and handle for setMute and Speak has reached the test SS.
        params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
        ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
        while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
            if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
                std::this_thread::sleep_for(std::chrono::seconds(2));
                params.result->setCompleted();
            }
            params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
        }

        // Check that AIP is now in EXPECTING_SPEECH state.
        ASSERT_TRUE(m_StateObserver->checkState(
            AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, SHORT_TIMEOUT_DURATION));

        // Put audio onto the SDS saying ".......".
        bool secondError;
        std::string secondFile = g_inputPath + SILENCE_AUDIO_FILE;
        std::vector<int16_t> secondAudioData = readAudioFromFile<int16_t>(secondFile, RIFF_HEADER_SIZE, &secondError);
        ASSERT_FALSE(secondError);
        m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size());

        // Check that AIP is now in RECOGNIZING state.
        ASSERT_TRUE(m_StateObserver->checkState(
            AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

        // Check that a recognize event was sent.
        ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

        // The test channel client has been notified the alarm channel has been backgrounded.
        ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

        // Check that AIP is in BUSY state.
        ASSERT_TRUE(
            m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

        // Check that AIP is in an IDLE state.
        ASSERT_TRUE(
            m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

        // Check that the test context provider was asked to provide context for the event.
        ASSERT_TRUE(m_stateProvider->checkStateRequested());

        // The test channel client has been notified the alarm channel has been foregrounded.
        ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

        // Check that the test context provider was asked to provide context for the event.
        ASSERT_FALSE(m_stateProvider->checkStateRequested());
        params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
        if (params.type == TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
            expectSpeechFound = false;
        }
    }
    // The test channel client has not changed.
    ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE);

    // Check that a recognize event was not sent.
    ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that no directives arrived to the fake SS.
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

/**
 * Test AudioInputProcessor's ability to handle a cancel partway through an interaction.
 *
 * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * button then sends a reset command and no recognize event is sent.
 */
TEST_F(AudioInputProcessorTest, tapToTalkCancel) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_tapToTalkButton->startRecognizing(m_AudioInputProcessor, m_TapToTalkAudioProvider));

    // Cancel the interaction.
    m_AudioInputProcessor->resetState();

    // Check that AIP was briefly in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Tell me a joke".
    bool error;
    std::string file = g_inputPath + JOKE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that no directives arrived to the fake SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

/**
 * Test AudioInputProcessor's ability to handle a simple interation triggered by a hold to talk button.
 *
 * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute and Speak
 * directive.
 */
TEST_F(AudioInputProcessorTest, holdToTalkJoke) {
    // Signal to the AIP to start recognizing.
    ASSERT_NE(nullptr, m_HoldToTalkAudioProvider);
    ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Tell me a joke".
    bool error;
    std::string file = g_inputPath + JOKE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Stop holding the button.
    ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }
}

/**
 * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a hold to talk button.
 *
 * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak,
 * and ExpectSpeech directive. Audio of "Lions" is then fed into the stream and another recognize event is sent.
 */
TEST_F(AudioInputProcessorTest, holdToTalkMultiturn) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Wikipedia".
    bool error;
    std::string file = g_inputPath + WIKI_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Stop holding the button.
    ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            std::this_thread::sleep_for(std::chrono::seconds(2));
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
    }

    // Check that AIP is now in EXPECTING_SPEECH state.
    ASSERT_TRUE(m_StateObserver->checkState(
        AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION));

    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

    // Put audio onto the SDS of "Lions".
    bool secondError;
    file = g_inputPath + LIONS_AUDIO_FILE;
    std::vector<int16_t> secondAudioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &secondError);
    ASSERT_FALSE(secondError);
    m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size());

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Stop holding the button.
    ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    }
}

/**
 * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a hold to talk button but no user
 * response.
 *
 * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak,
 * and ExpectSpeech directive. Audio of "...." is then fed into the stream and another recognize event is sent
 * but no directives are given in response.
 */
TEST_F(AudioInputProcessorTest, holdToTalkMultiTurnWithSilence) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Wikipedia".
    bool error;
    std::string file = g_inputPath + WIKI_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Stop holding the button.
    ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    bool expectSpeechFound = true;
    TestDirectiveHandler::DirectiveParams params;
    while (expectSpeechFound) {
        // Check that AIP is in an IDLE state before starting.
        ASSERT_TRUE(
            m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

        // Check that prehandle and handle for setMute and Speak has reached the test SS.
        params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);

        ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);

        while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
            if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
                std::this_thread::sleep_for(std::chrono::seconds(2));
                params.result->setCompleted();
            }
            params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
        }

        // Check that AIP is now in EXPECTING_SPEECH state.
        ASSERT_TRUE(m_StateObserver->checkState(
            AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, SHORT_TIMEOUT_DURATION));

        // Signal to the AIP to start recognizing.
        ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

        // Put audio onto the SDS saying ".......".
        bool secondError;
        std::string secondFile = g_inputPath + SILENCE_AUDIO_FILE;
        std::vector<int16_t> secondAudioData = readAudioFromFile<int16_t>(secondFile, RIFF_HEADER_SIZE, &secondError);
        ASSERT_FALSE(secondError);
        m_AudioBufferWriter->write(secondAudioData.data(), secondAudioData.size());

        // Check that AIP is now in RECOGNIZING state.
        ASSERT_TRUE(m_StateObserver->checkState(
            AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

        // Stop holding the button.
        ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

        // Check that a recognize event was sent.
        ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

        // The test channel client has been notified the alarm channel has been backgrounded.
        ASSERT_EQ(m_testClient->waitForFocusChange(NO_TIMEOUT_DURATION), FocusState::BACKGROUND);

        // Check that AIP is in BUSY state.
        ASSERT_TRUE(
            m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

        // Check that AIP is in an IDLE state.
        ASSERT_TRUE(
            m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

        // Check that the test context provider was asked to provide context for the event.
        ASSERT_TRUE(m_stateProvider->checkStateRequested());

        // The test channel client has been notified the alarm channel has been foregrounded.
        ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

        // Check that the test context provider was asked to provide context for the event.
        ASSERT_FALSE(m_stateProvider->checkStateRequested());
        params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
        if (params.type == TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
            expectSpeechFound = false;
        }
    }
    // The test channel client has not changed.
    ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE);

    // Check that a recognize event was not sent.
    ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that no directives arrived to the fake SS.
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

/**
 * Test AudioInputProcessor's ability to handle a multiturn interation triggered by a hold to talk button that times
 * out.
 *
 * To do this, audio of "Wikipedia" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with a SetMute, Speak,
 * and ExpectSpeech directive. The button does not trigger another recognize so no recognize event is sent
 * and no directives are given in response. ExpectSpeechTimedOut event is observed to be sent.
 */
TEST_F(AudioInputProcessorTest, holdToTalkMultiturnWithTimeOut) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Put audio onto the SDS saying "Wikipedia".
    bool error;
    std::string file = g_inputPath + WIKI_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Stop holding the button.
    ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that prehandle and handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(LONG_TIMEOUT_DURATION);
    ASSERT_NE(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
    while (params.type != TestDirectiveHandler::DirectiveParams::Type::TIMEOUT) {
        if (params.isHandle() && params.directive->getName() == NAME_SPEAK) {
            std::this_thread::sleep_for(std::chrono::seconds(2));
            params.result->setCompleted();
        }
        params = m_directiveHandler->waitForNext(SHORT_TIMEOUT_DURATION);
    }

    // Do not signal to the AIP to start recognizing.

    // Check that AIP is now in EXPECTING_SPEECH state.
    ASSERT_TRUE(m_StateObserver->checkState(
        AudioInputProcessorObserverInterface::State::EXPECTING_SPEECH, LONG_TIMEOUT_DURATION));

    // The test channel client has stayed foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE);

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that ExpectSpeechTimeOut event has been sent.
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_EXPECT_SPEECH_TIMED_OUT));
}

/**
 * Test AudioInputProcessor's ability to handle no audio being written triggered by a hold to talk button.
 *
 * To do this, no audio is fed into a stream after button sends recognize to AudioInputProcessor. The
 * AudioInputProcessor is then observed to send a Recognize event to AVS which responds with no directive.
 */
TEST_F(AudioInputProcessorTest, holdToTalkNoAudio) {
    // Signal to the AIP to start recognizing.
    ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Write nothing to the SDS.

    // The test channel client has been notified the alarm channel has been backgrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::BACKGROUND);

    // Stop holding the button.
    ASSERT_TRUE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

    // Check that AIP is in BUSY state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::BUSY, LONG_TIMEOUT_DURATION));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(LONG_TIMEOUT_DURATION), FocusState::FOREGROUND);

    // Check that a recognize event was sent
    ASSERT_TRUE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that no prehandle or handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

/**
 * Test AudioInputProcessor's ability to handle a cancel partway through a hold to talk interaction.
 *
 * To do this, audio of "Tell me a joke" is fed into a stream after button sends recognize to AudioInputProcessor. The
 * button then sends a cancel command and no recognize event is sent.
 */
TEST_F(AudioInputProcessorTest, holdToTalkCancel) {
    // Signal to the AIP to start recognizing.
    ASSERT_NE(nullptr, m_HoldToTalkAudioProvider);
    ASSERT_TRUE(m_holdToTalkButton->startRecognizing(m_AudioInputProcessor, m_HoldToTalkAudioProvider));

    // Check that AIP is now in RECOGNIZING state.
    ASSERT_TRUE(
        m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::RECOGNIZING, LONG_TIMEOUT_DURATION));

    // Cancel the recognize.
    m_AudioInputProcessor->resetState();

    // Put audio onto the SDS saying "Tell me a joke".
    bool error;
    std::string file = g_inputPath + JOKE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    ASSERT_FALSE(audioData.empty());
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // Stop holding the button.
    ASSERT_FALSE(m_holdToTalkButton->stopRecognizing(m_AudioInputProcessor));

    // Check that AIP is in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, LONG_TIMEOUT_DURATION));

    // Check that the test context provider was not asked to provide context for the event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // Check that no recognize event was sent.
    ASSERT_FALSE(checkSentEventName(m_avsConnectionManager, NAME_RECOGNIZE));

    // Check that no prehandle or handle for setMute and Speak has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

/**
 * Test AudioInputProcessor's ability to not handle audio when no recognize occurs.
 *
 * To do this, audio of "Tell me a joke" is fed into a stream that is being read by a wake word engine. The
 * lack of the wakeword or button-initiated recognize results in no recognize event being sent.
 */
TEST_F(AudioInputProcessorTest, audioWithoutAnyTrigger) {
    // Put audio onto the SDS saying "Tell me a joke" without a trigger.
    bool error;
    std::string file = g_inputPath + JOKE_AUDIO_FILE;
    std::vector<int16_t> audioData = readAudioFromFile<int16_t>(file, RIFF_HEADER_SIZE, &error);
    ASSERT_FALSE(error);
    m_AudioBufferWriter->write(audioData.data(), audioData.size());

    // Check that AIP is still in an IDLE state.
    ASSERT_TRUE(m_StateObserver->checkState(AudioInputProcessorObserverInterface::State::IDLE, SHORT_TIMEOUT_DURATION));

    // Check that the test context provider was asked to provide context as the post-connect objects would have fetched
    // context to send StateSynchronizer event.
    ASSERT_TRUE(m_stateProvider->checkStateRequested());

    // The test channel client has been not notified the alarm channel has been foregrounded.
    ASSERT_EQ(m_testClient->waitForFocusChange(SHORT_TIMEOUT_DURATION), FocusState::NONE);

    // Check that no prehandle or handle has reached the test SS.
    TestDirectiveHandler::DirectiveParams params = m_directiveHandler->waitForNext(NO_TIMEOUT_DURATION);
    ASSERT_EQ(params.type, TestDirectiveHandler::DirectiveParams::Type::TIMEOUT);
}

}  // namespace test
}  // namespace integration
}  // namespace alexaClientSDK

int main(int argc, char** argv) {
    ::testing::InitGoogleTest(&argc, argv);
    if (argc < 3) {
        std::cerr << "USAGE: " << std::string(argv[0]) << " <path_to_AlexaClientSDKConfig.json> <path_to_inputs_folder>"
                  << std::endl;
        return 1;

    } else {
        alexaClientSDK::integration::test::g_configPath = std::string(argv[1]);
        alexaClientSDK::integration::test::g_inputPath = std::string(argv[2]);
        return RUN_ALL_TESTS();
    }
}