/* * main.cpp * * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "SampleApp/AlsaMicrophoneWrapper.h" #include "SampleApp/ConsolePrinter.h" #ifdef KWD_KITTAI #include #endif #ifdef KWD_SENSORY #include #endif #ifdef KWD_AMAZONLITE #include #endif #include //#include #include #include #include /// The sample rate of microphone audio data. static const unsigned int SAMPLE_RATE_HZ = 16000; /// The number of audio channels. static const unsigned int NUM_CHANNELS = 1; /// The size of each word within the stream. static const size_t WORD_SIZE = 2; /// The maximum number of readers of the stream. static const size_t MAX_READERS = 10; static const size_t CHAR_BIT = 8; /// The amount of audio data to keep in the ring buffer. static const std::chrono::seconds AMOUNT_OF_AUDIO_DATA_IN_BUFFER = std::chrono::seconds(15); const std::chrono::milliseconds TIMEOUT_FOR_READ_CALLS = std::chrono::milliseconds(1000); /// The size of the ring buffer. static const size_t BUFFER_SIZE_IN_SAMPLES = (SAMPLE_RATE_HZ)*AMOUNT_OF_AUDIO_DATA_IN_BUFFER.count(); /// Key for the root node value containing configuration values for SampleApp. static const std::string SAMPLE_APP_CONFIG_KEY("sampleApp"); /// Key for the endpoint value under the @c SAMPLE_APP_CONFIG_KEY configuration node. static const std::string ENDPOINT_KEY("endpoint"); /// Default AVS endpoint to connect to. static const std::string DEFAULT_ENDPOINT("https://avs-alexa-na.amazon.com"); #ifdef KWD_KITTAI /// The sensitivity of the Kitt.ai engine. static const double KITT_AI_SENSITIVITY = 0.6; /// The audio amplifier level of the Kitt.ai engine. static const float KITT_AI_AUDIO_GAIN = 2.0; /// Whether Kitt.ai should apply front end audio processing. static const bool KITT_AI_APPLY_FRONT_END_PROCESSING = true; #endif /// A set of all log levels. static const std::set allLevels = { alexaClientSDK::avsCommon::utils::logger::Level::DEBUG9, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG8, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG7, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG6, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG5, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG4, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG3, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG2, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG1, alexaClientSDK::avsCommon::utils::logger::Level::DEBUG0, alexaClientSDK::avsCommon::utils::logger::Level::INFO, alexaClientSDK::avsCommon::utils::logger::Level::WARN, alexaClientSDK::avsCommon::utils::logger::Level::ERROR, alexaClientSDK::avsCommon::utils::logger::Level::CRITICAL, alexaClientSDK::avsCommon::utils::logger::Level::NONE}; static std::string getTimeStamp() { struct timeval tv; struct timezone tz; struct tm *p; gettimeofday(&tv, &tz); p = localtime(&tv.tv_sec); char buf[200]; snprintf(buf, 200, "%02d-%02d-%02d-%02d-%02d-%02d-%06ld", p->tm_year + 1900, \ 1+p->tm_mon, \ p->tm_mday, \ p->tm_hour, \ p->tm_min, \ p->tm_sec, \ tv.tv_usec); return std::string(buf); } /** * Observes callbacks from keyword detections and notifies the DefaultClient that a wake word has occurred. */ class KeywordObserver : public alexaClientSDK::avsCommon::sdkInterfaces::KeyWordObserverInterface { public: /** * Constructor. * * @param client The default SDK client. * @param audioProvider The audio provider from which to stream audio data from. */ KeywordObserver(std::shared_ptr show, std::shared_ptr doa, alexaClientSDK::avsCommon::utils::AudioFormat format, std::string dir, bool is_save = false){ m_show = show; m_format = format; m_is_save_data = is_save; m_wakeup_data = new int16_t[m_cache_samples]; m_doa = doa; m_dir = dir; } ~KeywordObserver(){ delete m_wakeup_data; } void onKeyWordDetected( std::shared_ptr stream, std::string keyword, alexaClientSDK::avsCommon::avs::AudioInputStream::Index beginIndex, alexaClientSDK::avsCommon::avs::AudioInputStream::Index endIndex, std::shared_ptr> KWDMetadata) { std::cout<< "[" << getTimeStamp() << "] key work \"" << keyword <<"\" detect! direction: " << m_doa->get() << ", count: " << m_count++ << " begin: " << beginIndex << ", end: " << endIndex ; m_show->enableShow(AW::Profile::WAKEUPTEST, AW::ProfileFlag::REPLACE); if (alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::INVALID_INDEX == beginIndex || !m_is_save_data){ std::cout << std::endl; return; } auto reader = stream->createReader(alexaClientSDK::avsCommon::utils::sds::InProcessSDS::Reader::Policy::NONBLOCKING); if(reader->seek(beginIndex) == false) return; ssize_t nWords = endIndex - beginIndex; if(m_cache_samples < nWords){ delete m_wakeup_data; m_wakeup_data = new int16_t[nWords]; m_cache_samples = nWords; } reader->read((void*)m_wakeup_data, nWords); std::string file_path = m_dir + getTimeStamp() + ".wav"; AW::WavUtils wav; wav.create(file_path, "wb", m_format.sampleSizeInBits, m_format.numChannels, m_format.sampleRateHz); wav.write((char*)m_wakeup_data, nWords); wav.release(); std::cout << "\t, wakup up pcm files: " << file_path << std::endl; } private: int m_count = 0; bool m_is_save_data = false; int16_t *m_wakeup_data = nullptr; ssize_t m_cache_samples = 16000*2; std::string m_dir; alexaClientSDK::avsCommon::utils::AudioFormat m_format; std::shared_ptr m_show; std::shared_ptr m_doa; }; /// Observes user input from the console and notifies the interaction manager of the user's intentions. class ButtonInputManager : public AW::ButtonManager::Observer, public std::enable_shared_from_this { public: /** * Create a ButtonInputManager. * * @param interactionManager An instance of the @c InteractionManager used to manage user input. * @return Returns a new @c ButtonInputManager, or @c nullptr if the operation failed. */ static std::shared_ptr create(std::shared_ptr platformadapter){ auto button_manger = std::shared_ptr(new ButtonInputManager(platformadapter)); platformadapter->getButtonManager()->addButtonObserver(button_manger); return button_manger; }; /** * Processes user input forever. Returns upon a quit command. */ int run(){ return 0; }; void stop(){ m_platformadapter->getButtonManager()->removeButtonObserver(shared_from_this()); }; private: /** * Constructor. */ ButtonInputManager(std::shared_ptr platformadapter){ m_platformadapter = platformadapter; m_platformadapter->getMuteManager()->privacyMute(false); m_status = Status::PRIVATE_UNMUTE; }; void onVolumeUp(){}; void onVolumeDown(){}; void onMute(){ if(m_status == Status::PRIVATE_UNMUTE){ m_status = Status::PRIVATE_MUTE; m_platformadapter->getShowManager()->enableShow(AW::Profile::MUTE, AW::ProfileFlag::REPLACE); m_platformadapter->getMuteManager()->privacyMute(true); }else{ m_status = Status::PRIVATE_UNMUTE; m_platformadapter->getShowManager()->enableShow(AW::Profile::UNMUTE, AW::ProfileFlag::REPLACE); m_platformadapter->getMuteManager()->privacyMute(false); } }; void onAudioJackPlugIn(){ m_platformadapter->getAudioJackManager()->doAudioJackPlugIn(); }; void onAudioJackPlugOut(){ m_platformadapter->getAudioJackManager()->doAudioJackPlugOut(); }; private: /// The main interaction manager that interfaces with the SDK. std::shared_ptr m_platformadapter; enum class Status { PRIVATE_MUTE, PRIVATE_UNMUTE }; Status m_status{Status::PRIVATE_UNMUTE}; }; static void when_signal(int sig) { switch(sig){ case SIGINT: case SIGQUIT: case SIGHUP: { printf("signal coming, stop the capture\n"); break; } case SIGPIPE: { //When the client is closed after start scaning and parsing, //this signal will come, ignore it! printf("do nothings for PIPE signal\n"); break; } } } /** * This serves as the starting point for the application. This code instantiates the @c UserInputManager and processes * user input until the @c run() function returns. * * @param argc The number of elements in the @c argv array. * @param argv An array of @argc elements, containing the program name and all command-line arguments. * @return @c EXIT_FAILURE if the program failed to initialize correctly, else @c EXIT_SUCCESS. */ int main(int argc, char** argv) { signal(SIGHUP,when_signal); signal(SIGQUIT,when_signal); signal(SIGINT,when_signal); signal(SIGPIPE,when_signal); std::string pathToConfig; std::string pathToInputFolder; std::string logLevel; if (argc < 2) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint( "USAGE: " + std::string(argv[0]) + " [log_level]"); return EXIT_FAILURE; } else { pathToInputFolder = std::string(argv[1]); if (3 == argc) { logLevel = std::string(argv[2]); } } /* * Creating the buffer (Shared Data Stream) that will hold user audio data. This is the main input into the SDK. */ size_t bufferSize = alexaClientSDK::avsCommon::avs::AudioInputStream::calculateBufferSize( BUFFER_SIZE_IN_SAMPLES, WORD_SIZE, MAX_READERS); auto buffer = std::make_shared(bufferSize); std::shared_ptr sharedDataStream = alexaClientSDK::avsCommon::avs::AudioInputStream::create(buffer, WORD_SIZE, MAX_READERS); if (!sharedDataStream) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create shared data stream!"); return false; } alexaClientSDK::avsCommon::utils::AudioFormat compatibleAudioFormat; compatibleAudioFormat.sampleRateHz = SAMPLE_RATE_HZ; compatibleAudioFormat.sampleSizeInBits = WORD_SIZE * CHAR_BIT; compatibleAudioFormat.numChannels = NUM_CHANNELS; compatibleAudioFormat.endianness = alexaClientSDK::avsCommon::utils::AudioFormat::Endianness::LITTLE; compatibleAudioFormat.encoding = alexaClientSDK::avsCommon::utils::AudioFormat::Encoding::LPCM; auto platform = AW::PlatformAdapter::create(pathToInputFolder.data()); if(platform == nullptr) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to get " + pathToInputFolder + " for configure!"); return false; } auto buttonmanager = ButtonInputManager::create(platform); if(buttonmanager == nullptr) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create buttonmanager!"); return false; } buttonmanager->run(); bool is_save_wakeup = false; std::string wakeup_data_dir = ""; std::shared_ptr micWrapper = alexaClientSDK::sampleApp::AlsaMicrophoneWrapper::create(sharedDataStream, nullptr, platform); if (!micWrapper) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create AlsaMicrophoneWrapper!"); return false; } // This observer is notified any time a keyword is detected and notifies the DefaultClient to start recognizing. auto keywordObserver = std::make_shared(platform->getShowManager(), platform->getRecorder()->getFilter()->getDOAInfo(), compatibleAudioFormat, wakeup_data_dir, is_save_wakeup); std::unique_ptr m_keywordDetector{nullptr}; const char *detector = platform->getDetectorType(); #if defined(KWD_KITTAI) if(strcmp(detector, "kitt.ai") == 0) { m_keywordDetector = alexaClientSDK::kwd::KittAiKeyWordDetector::create( sharedDataStream, compatibleAudioFormat, {keywordObserver}, std::unordered_set< std::shared_ptr>(), pathToInputFolder + "/common.res", {{pathToInputFolder + "/alexa.umdl", "ALEXA", KITT_AI_SENSITIVITY}}, KITT_AI_AUDIO_GAIN, KITT_AI_APPLY_FRONT_END_PROCESSING); if (!m_keywordDetector) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create KittAiKeyWordDetector!"); return false; } #endif #if defined(KWD_SENSORY) if(strcmp(detector, "sensory") == 0) { m_keywordDetector = alexaClientSDK::kwd::SensoryKeywordDetector::create( sharedDataStream, compatibleAudioFormat, {keywordObserver}, std::unordered_set< std::shared_ptr>(), platform->getSensoryModel()); if (!m_keywordDetector) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create SensoryKeyWordDetector!"); return false; } } #endif #if defined(KWD_AMAZONLITE) if(strcmp(detector, "amazon-lite") == 0) { m_keywordDetector = alexaClientSDK::kwd::PryonLiteKeywordDetector::create( sharedDataStream, compatibleAudioFormat, {keywordObserver}, std::unordered_set< std::shared_ptr>(), platform->getAmazonliteModel(), platform->getAmazonliteDetectThreshold()); if (!m_keywordDetector) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create PryonLiteKeywordDetector!"); return false; } } #endif if(strcmp(detector, "tutudetect") == 0) { m_keywordDetector = alexaClientSDK::kwd::TutuClearKeywordDetector::create( platform, sharedDataStream, compatibleAudioFormat, {keywordObserver}, std::unordered_set< std::shared_ptr>()); if (!m_keywordDetector) { alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create TutuClearKeywordDetector!"); return false; } } micWrapper->startStreamingMicrophoneData(); sleep(100000000); micWrapper->stopStreamingMicrophoneData(); buttonmanager->stop(); return EXIT_SUCCESS; }