SmartAudio/package/avs/sampleapp/files/source/sample-wakeup/main.cpp

/*
 * main.cpp
 *
 * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *     http://aws.amazon.com/apache2.0/
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
#include <stdio.h>
#include <strings.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <stdio.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>
#include <signal.h>

#include <cstdlib>
#include <string>
#include <algorithm>
#include <cctype>
#include <fstream>
#include <chrono>
#include <set>

#include "SampleApp/AlsaMicrophoneWrapper.h"
#include "SampleApp/ConsolePrinter.h"

#ifdef KWD_KITTAI
#include <KittAi/KittAiKeyWordDetector.h>
#endif

#ifdef KWD_SENSORY
#include <Sensory/SensoryKeywordDetector.h>
#endif

#ifdef KWD_AMAZONLITE
#include <AmazonLite/PryonLiteKeywordDetector.h>
#endif

#include <TutuClearKeywordDetector/TutuClearKeywordDetector.h>

//#include <AVSCommon/SDKInterfaces/KeyWordObserverInterface.h>
#include <AIP/AudioInputProcessor.h>

#include <utils/WavUtils.h>
#include <platformadapter/PlatformAdapter.h>

/// The sample rate of microphone audio data.
static const unsigned int SAMPLE_RATE_HZ = 16000;

/// The number of audio channels.
static const unsigned int NUM_CHANNELS = 1;

/// The size of each word within the stream.
static const size_t WORD_SIZE = 2;

/// The maximum number of readers of the stream.
static const size_t MAX_READERS = 10;

static const size_t CHAR_BIT = 8;

/// The amount of audio data to keep in the ring buffer.
static const std::chrono::seconds AMOUNT_OF_AUDIO_DATA_IN_BUFFER = std::chrono::seconds(15);

const std::chrono::milliseconds TIMEOUT_FOR_READ_CALLS = std::chrono::milliseconds(1000);

/// The size of the ring buffer.
static const size_t BUFFER_SIZE_IN_SAMPLES = (SAMPLE_RATE_HZ)*AMOUNT_OF_AUDIO_DATA_IN_BUFFER.count();

/// Key for the root node value containing configuration values for SampleApp.
static const std::string SAMPLE_APP_CONFIG_KEY("sampleApp");

/// Key for the endpoint value under the @c SAMPLE_APP_CONFIG_KEY configuration node.
static const std::string ENDPOINT_KEY("endpoint");

/// Default AVS endpoint to connect to.
static const std::string DEFAULT_ENDPOINT("https://avs-alexa-na.amazon.com");

#ifdef KWD_KITTAI
/// The sensitivity of the Kitt.ai engine.
static const double KITT_AI_SENSITIVITY = 0.6;

/// The audio amplifier level of the Kitt.ai engine.
static const float KITT_AI_AUDIO_GAIN = 2.0;

/// Whether Kitt.ai should apply front end audio processing.
static const bool KITT_AI_APPLY_FRONT_END_PROCESSING = true;
#endif

/// A set of all log levels.
static const std::set<alexaClientSDK::avsCommon::utils::logger::Level> allLevels = {
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG9,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG8,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG7,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG6,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG5,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG4,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG3,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG2,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG1,
    alexaClientSDK::avsCommon::utils::logger::Level::DEBUG0,
    alexaClientSDK::avsCommon::utils::logger::Level::INFO,
    alexaClientSDK::avsCommon::utils::logger::Level::WARN,
    alexaClientSDK::avsCommon::utils::logger::Level::ERROR,
    alexaClientSDK::avsCommon::utils::logger::Level::CRITICAL,
    alexaClientSDK::avsCommon::utils::logger::Level::NONE};

static std::string getTimeStamp() {
    struct timeval tv;
    struct timezone tz;
    struct tm *p;

    gettimeofday(&tv, &tz);
    p = localtime(&tv.tv_sec);

    char buf[200];
    snprintf(buf, 200, "%02d-%02d-%02d-%02d-%02d-%02d-%06ld", p->tm_year + 1900, \
                                                         1+p->tm_mon, \
                                                         p->tm_mday, \
                                                         p->tm_hour, \
                                                         p->tm_min, \
                                                         p->tm_sec, \
                                                         tv.tv_usec);
    return std::string(buf);
}

/**
 * Observes callbacks from keyword detections and notifies the DefaultClient that a wake word has occurred.
 */
class KeywordObserver : public alexaClientSDK::avsCommon::sdkInterfaces::KeyWordObserverInterface {
public:
    /**
     * Constructor.
     *
     * @param client The default SDK client.
     * @param audioProvider The audio provider from which to stream audio data from.
     */
    KeywordObserver(std::shared_ptr<AW::IShowManager> show,
                    std::shared_ptr<AW::DOAInfo> doa,
                    alexaClientSDK::avsCommon::utils::AudioFormat format,
                    std::string dir,
                    bool is_save = false){
        m_show = show;
        m_format = format;
        m_is_save_data = is_save;

        m_wakeup_data = new int16_t[m_cache_samples];
        m_doa = doa;
        m_dir = dir;
    }
    ~KeywordObserver(){
        delete m_wakeup_data;
    }

    void onKeyWordDetected(
        std::shared_ptr<alexaClientSDK::avsCommon::avs::AudioInputStream> stream,
        std::string keyword,
        alexaClientSDK::avsCommon::avs::AudioInputStream::Index beginIndex,
        alexaClientSDK::avsCommon::avs::AudioInputStream::Index endIndex,
        std::shared_ptr<const std::vector<char>> KWDMetadata) {
            std::cout<< "["
                     << getTimeStamp()
                     << "] key work \""
                     << keyword
                     <<"\" detect! direction: "
                     << m_doa->get()
                     << ", count: "
                     << m_count++
                     << " begin: "
                     << beginIndex
                     << ", end: "
                     << endIndex ;

        m_show->enableShow(AW::Profile::WAKEUPTEST, AW::ProfileFlag::REPLACE);

        if (alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::INVALID_INDEX == beginIndex || !m_is_save_data){
            std::cout << std::endl;
            return;
        }

        auto reader = stream->createReader(alexaClientSDK::avsCommon::utils::sds::InProcessSDS::Reader::Policy::NONBLOCKING);
        if(reader->seek(beginIndex) == false)
            return;

        ssize_t nWords = endIndex - beginIndex;
        if(m_cache_samples < nWords){
            delete m_wakeup_data;
            m_wakeup_data = new int16_t[nWords];
            m_cache_samples = nWords;
        }

        reader->read((void*)m_wakeup_data, nWords);

        std::string file_path = m_dir + getTimeStamp() + ".wav";
        AW::WavUtils wav;
        wav.create(file_path, "wb", m_format.sampleSizeInBits, m_format.numChannels, m_format.sampleRateHz);
        wav.write((char*)m_wakeup_data, nWords);
        wav.release();

        std::cout << "\t, wakup up pcm files: " << file_path << std::endl;
    }
private:
    int m_count = 0;
    bool m_is_save_data = false;
    int16_t *m_wakeup_data = nullptr;
    ssize_t m_cache_samples = 16000*2;
    std::string m_dir;
    alexaClientSDK::avsCommon::utils::AudioFormat m_format;

    std::shared_ptr<AW::IShowManager> m_show;
    std::shared_ptr<AW::DOAInfo> m_doa;
};

/// Observes user input from the console and notifies the interaction manager of the user's intentions.
class ButtonInputManager : public AW::ButtonManager::Observer,
                           public std::enable_shared_from_this<ButtonInputManager>
{
public:
    /**
     * Create a ButtonInputManager.
     *
     * @param interactionManager An instance of the @c InteractionManager used to manage user input.
     * @return Returns a new @c ButtonInputManager, or @c nullptr if the operation failed.
     */
    static std::shared_ptr<ButtonInputManager> create(std::shared_ptr<AW::PlatformAdapter> platformadapter){
        auto button_manger = std::shared_ptr<ButtonInputManager>(new ButtonInputManager(platformadapter));
        platformadapter->getButtonManager()->addButtonObserver(button_manger);

        return button_manger;
    };

    /**
     * Processes user input forever. Returns upon a quit command.
     */
    int run(){ return 0; };
    void stop(){
        m_platformadapter->getButtonManager()->removeButtonObserver(shared_from_this());
    };
private:
    /**
     * Constructor.
     */
    ButtonInputManager(std::shared_ptr<AW::PlatformAdapter> platformadapter){
        m_platformadapter = platformadapter;
        m_platformadapter->getMuteManager()->privacyMute(false);
        m_status = Status::PRIVATE_UNMUTE;
    };

    void onVolumeUp(){};
    void onVolumeDown(){};
    void onMute(){
        if(m_status == Status::PRIVATE_UNMUTE){
            m_status = Status::PRIVATE_MUTE;
            m_platformadapter->getShowManager()->enableShow(AW::Profile::MUTE, AW::ProfileFlag::REPLACE);
            m_platformadapter->getMuteManager()->privacyMute(true);
        }else{
            m_status = Status::PRIVATE_UNMUTE;
            m_platformadapter->getShowManager()->enableShow(AW::Profile::UNMUTE, AW::ProfileFlag::REPLACE);
            m_platformadapter->getMuteManager()->privacyMute(false);
        }
    };
    void onAudioJackPlugIn(){
        m_platformadapter->getAudioJackManager()->doAudioJackPlugIn();
    };
    void onAudioJackPlugOut(){
        m_platformadapter->getAudioJackManager()->doAudioJackPlugOut();
    };

private:
    /// The main interaction manager that interfaces with the SDK.
    std::shared_ptr<AW::PlatformAdapter> m_platformadapter;

    enum class Status
    {
        PRIVATE_MUTE,
        PRIVATE_UNMUTE
    };
    Status m_status{Status::PRIVATE_UNMUTE};
};

static void when_signal(int sig)
{
    switch(sig){
        case SIGINT:
        case SIGQUIT:
        case SIGHUP:
        {
            printf("signal coming, stop the capture\n");
            break;
        }
        case SIGPIPE:
        {
            //When the client is closed after start scaning and parsing,
            //this signal will come, ignore it!
            printf("do nothings for PIPE signal\n");
            break;
        }
    }
}

/**
 * This serves as the starting point for the application. This code instantiates the @c UserInputManager and processes
 * user input until the @c run() function returns.
 *
 * @param argc The number of elements in the @c argv array.
 * @param argv An array of @argc elements, containing the program name and all command-line arguments.
 * @return @c EXIT_FAILURE if the program failed to initialize correctly, else @c EXIT_SUCCESS.
 */
int main(int argc, char** argv) {

    signal(SIGHUP,when_signal);
    signal(SIGQUIT,when_signal);
    signal(SIGINT,when_signal);
    signal(SIGPIPE,when_signal);

    std::string pathToConfig;
    std::string pathToInputFolder;
    std::string logLevel;

    if (argc < 2) {
        alexaClientSDK::sampleApp::ConsolePrinter::simplePrint(
            "USAGE: " + std::string(argv[0]) +
            " <path_to_AlexaClientSDKConfig.json> <path_to_inputs_folder> [log_level]");
        return EXIT_FAILURE;
    } else {
        pathToInputFolder = std::string(argv[1]);
        if (3 == argc) {
            logLevel = std::string(argv[2]);
        }
    }

    /*
     * Creating the buffer (Shared Data Stream) that will hold user audio data. This is the main input into the SDK.
     */
    size_t bufferSize = alexaClientSDK::avsCommon::avs::AudioInputStream::calculateBufferSize(
        BUFFER_SIZE_IN_SAMPLES, WORD_SIZE, MAX_READERS);
    auto buffer = std::make_shared<alexaClientSDK::avsCommon::avs::AudioInputStream::Buffer>(bufferSize);
    std::shared_ptr<alexaClientSDK::avsCommon::avs::AudioInputStream> sharedDataStream =
        alexaClientSDK::avsCommon::avs::AudioInputStream::create(buffer, WORD_SIZE, MAX_READERS);

    if (!sharedDataStream) {
        alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create shared data stream!");
        return false;
    }

    alexaClientSDK::avsCommon::utils::AudioFormat compatibleAudioFormat;
    compatibleAudioFormat.sampleRateHz = SAMPLE_RATE_HZ;
    compatibleAudioFormat.sampleSizeInBits = WORD_SIZE * CHAR_BIT;
    compatibleAudioFormat.numChannels = NUM_CHANNELS;
    compatibleAudioFormat.endianness = alexaClientSDK::avsCommon::utils::AudioFormat::Endianness::LITTLE;
    compatibleAudioFormat.encoding = alexaClientSDK::avsCommon::utils::AudioFormat::Encoding::LPCM;

    auto platform = AW::PlatformAdapter::create(pathToInputFolder.data());
    if(platform == nullptr) {
        alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to get " + pathToInputFolder + " for configure!");
        return false;
    }

    auto buttonmanager = ButtonInputManager::create(platform);
    if(buttonmanager == nullptr) {
        alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create buttonmanager!");
        return false;
    }
    buttonmanager->run();

    bool is_save_wakeup = false;
    std::string wakeup_data_dir = "";

    std::shared_ptr<alexaClientSDK::sampleApp::MicrophoneWrapperInterface> micWrapper =
        alexaClientSDK::sampleApp::AlsaMicrophoneWrapper::create(sharedDataStream, nullptr, platform);
    if (!micWrapper) {
        alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create AlsaMicrophoneWrapper!");
        return false;
    }

    // This observer is notified any time a keyword is detected and notifies the DefaultClient to start recognizing.
    auto keywordObserver = std::make_shared<KeywordObserver>(platform->getShowManager(),
                                                             platform->getRecorder()->getFilter()->getDOAInfo(),
                                                             compatibleAudioFormat,
                                                             wakeup_data_dir,
                                                             is_save_wakeup);

    std::unique_ptr<alexaClientSDK::kwd::AbstractKeywordDetector> m_keywordDetector{nullptr};
    const char *detector = platform->getDetectorType();
#if defined(KWD_KITTAI)
    if(strcmp(detector, "kitt.ai") == 0) {
        m_keywordDetector = alexaClientSDK::kwd::KittAiKeyWordDetector::create(
            sharedDataStream,
            compatibleAudioFormat,
            {keywordObserver},
            std::unordered_set<
                std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
            pathToInputFolder + "/common.res",
            {{pathToInputFolder + "/alexa.umdl", "ALEXA", KITT_AI_SENSITIVITY}},
            KITT_AI_AUDIO_GAIN,
            KITT_AI_APPLY_FRONT_END_PROCESSING);
        if (!m_keywordDetector) {
            alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create KittAiKeyWordDetector!");
            return false;
        }
#endif
#if defined(KWD_SENSORY)
    if(strcmp(detector, "sensory") == 0) {
        m_keywordDetector = alexaClientSDK::kwd::SensoryKeywordDetector::create(
            sharedDataStream,
            compatibleAudioFormat,
            {keywordObserver},
            std::unordered_set<
                std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
            platform->getSensoryModel());
        if (!m_keywordDetector) {
            alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create SensoryKeyWordDetector!");
            return false;
        }
    }
#endif
#if defined(KWD_AMAZONLITE)
    if(strcmp(detector, "amazon-lite") == 0) {
        m_keywordDetector = alexaClientSDK::kwd::PryonLiteKeywordDetector::create(
            sharedDataStream,
            compatibleAudioFormat,
            {keywordObserver},
            std::unordered_set<
                std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
            platform->getAmazonliteModel(),
            platform->getAmazonliteDetectThreshold());
        if (!m_keywordDetector) {
            alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create PryonLiteKeywordDetector!");
            return false;
        }
    }
#endif
    if(strcmp(detector, "tutudetect") == 0) {
        m_keywordDetector = alexaClientSDK::kwd::TutuClearKeywordDetector::create(
            platform,
            sharedDataStream,
            compatibleAudioFormat,
            {keywordObserver},
            std::unordered_set<
                std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>());
        if (!m_keywordDetector) {
            alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create TutuClearKeywordDetector!");
            return false;
        }
    }

    micWrapper->startStreamingMicrophoneData();

    sleep(100000000);

    micWrapper->stopStreamingMicrophoneData();
    buttonmanager->stop();

    return EXIT_SUCCESS;
}