SmartAudio/package/avs/sampleapp/files/source/sample-wakeup/main.cpp

468 lines
17 KiB
C++
Executable File

/*
* main.cpp
*
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#include <stdio.h>
#include <strings.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <stdio.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>
#include <signal.h>
#include <cstdlib>
#include <string>
#include <algorithm>
#include <cctype>
#include <fstream>
#include <chrono>
#include <set>
#include "SampleApp/AlsaMicrophoneWrapper.h"
#include "SampleApp/ConsolePrinter.h"
#ifdef KWD_KITTAI
#include <KittAi/KittAiKeyWordDetector.h>
#endif
#ifdef KWD_SENSORY
#include <Sensory/SensoryKeywordDetector.h>
#endif
#ifdef KWD_AMAZONLITE
#include <AmazonLite/PryonLiteKeywordDetector.h>
#endif
#include <TutuClearKeywordDetector/TutuClearKeywordDetector.h>
//#include <AVSCommon/SDKInterfaces/KeyWordObserverInterface.h>
#include <AIP/AudioInputProcessor.h>
#include <utils/WavUtils.h>
#include <platformadapter/PlatformAdapter.h>
/// The sample rate of microphone audio data.
static const unsigned int SAMPLE_RATE_HZ = 16000;
/// The number of audio channels.
static const unsigned int NUM_CHANNELS = 1;
/// The size of each word within the stream.
static const size_t WORD_SIZE = 2;
/// The maximum number of readers of the stream.
static const size_t MAX_READERS = 10;
static const size_t CHAR_BIT = 8;
/// The amount of audio data to keep in the ring buffer.
static const std::chrono::seconds AMOUNT_OF_AUDIO_DATA_IN_BUFFER = std::chrono::seconds(15);
const std::chrono::milliseconds TIMEOUT_FOR_READ_CALLS = std::chrono::milliseconds(1000);
/// The size of the ring buffer.
static const size_t BUFFER_SIZE_IN_SAMPLES = (SAMPLE_RATE_HZ)*AMOUNT_OF_AUDIO_DATA_IN_BUFFER.count();
/// Key for the root node value containing configuration values for SampleApp.
static const std::string SAMPLE_APP_CONFIG_KEY("sampleApp");
/// Key for the endpoint value under the @c SAMPLE_APP_CONFIG_KEY configuration node.
static const std::string ENDPOINT_KEY("endpoint");
/// Default AVS endpoint to connect to.
static const std::string DEFAULT_ENDPOINT("https://avs-alexa-na.amazon.com");
#ifdef KWD_KITTAI
/// The sensitivity of the Kitt.ai engine.
static const double KITT_AI_SENSITIVITY = 0.6;
/// The audio amplifier level of the Kitt.ai engine.
static const float KITT_AI_AUDIO_GAIN = 2.0;
/// Whether Kitt.ai should apply front end audio processing.
static const bool KITT_AI_APPLY_FRONT_END_PROCESSING = true;
#endif
/// A set of all log levels.
static const std::set<alexaClientSDK::avsCommon::utils::logger::Level> allLevels = {
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG9,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG8,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG7,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG6,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG5,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG4,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG3,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG2,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG1,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG0,
alexaClientSDK::avsCommon::utils::logger::Level::INFO,
alexaClientSDK::avsCommon::utils::logger::Level::WARN,
alexaClientSDK::avsCommon::utils::logger::Level::ERROR,
alexaClientSDK::avsCommon::utils::logger::Level::CRITICAL,
alexaClientSDK::avsCommon::utils::logger::Level::NONE};
static std::string getTimeStamp() {
struct timeval tv;
struct timezone tz;
struct tm *p;
gettimeofday(&tv, &tz);
p = localtime(&tv.tv_sec);
char buf[200];
snprintf(buf, 200, "%02d-%02d-%02d-%02d-%02d-%02d-%06ld", p->tm_year + 1900, \
1+p->tm_mon, \
p->tm_mday, \
p->tm_hour, \
p->tm_min, \
p->tm_sec, \
tv.tv_usec);
return std::string(buf);
}
/**
* Observes callbacks from keyword detections and notifies the DefaultClient that a wake word has occurred.
*/
class KeywordObserver : public alexaClientSDK::avsCommon::sdkInterfaces::KeyWordObserverInterface {
public:
/**
* Constructor.
*
* @param client The default SDK client.
* @param audioProvider The audio provider from which to stream audio data from.
*/
KeywordObserver(std::shared_ptr<AW::IShowManager> show,
std::shared_ptr<AW::DOAInfo> doa,
alexaClientSDK::avsCommon::utils::AudioFormat format,
std::string dir,
bool is_save = false){
m_show = show;
m_format = format;
m_is_save_data = is_save;
m_wakeup_data = new int16_t[m_cache_samples];
m_doa = doa;
m_dir = dir;
}
~KeywordObserver(){
delete m_wakeup_data;
}
void onKeyWordDetected(
std::shared_ptr<alexaClientSDK::avsCommon::avs::AudioInputStream> stream,
std::string keyword,
alexaClientSDK::avsCommon::avs::AudioInputStream::Index beginIndex,
alexaClientSDK::avsCommon::avs::AudioInputStream::Index endIndex,
std::shared_ptr<const std::vector<char>> KWDMetadata) {
std::cout<< "["
<< getTimeStamp()
<< "] key work \""
<< keyword
<<"\" detect! direction: "
<< m_doa->get()
<< ", count: "
<< m_count++
<< " begin: "
<< beginIndex
<< ", end: "
<< endIndex ;
m_show->enableShow(AW::Profile::WAKEUPTEST, AW::ProfileFlag::REPLACE);
if (alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::INVALID_INDEX == beginIndex || !m_is_save_data){
std::cout << std::endl;
return;
}
auto reader = stream->createReader(alexaClientSDK::avsCommon::utils::sds::InProcessSDS::Reader::Policy::NONBLOCKING);
if(reader->seek(beginIndex) == false)
return;
ssize_t nWords = endIndex - beginIndex;
if(m_cache_samples < nWords){
delete m_wakeup_data;
m_wakeup_data = new int16_t[nWords];
m_cache_samples = nWords;
}
reader->read((void*)m_wakeup_data, nWords);
std::string file_path = m_dir + getTimeStamp() + ".wav";
AW::WavUtils wav;
wav.create(file_path, "wb", m_format.sampleSizeInBits, m_format.numChannels, m_format.sampleRateHz);
wav.write((char*)m_wakeup_data, nWords);
wav.release();
std::cout << "\t, wakup up pcm files: " << file_path << std::endl;
}
private:
int m_count = 0;
bool m_is_save_data = false;
int16_t *m_wakeup_data = nullptr;
ssize_t m_cache_samples = 16000*2;
std::string m_dir;
alexaClientSDK::avsCommon::utils::AudioFormat m_format;
std::shared_ptr<AW::IShowManager> m_show;
std::shared_ptr<AW::DOAInfo> m_doa;
};
/// Observes user input from the console and notifies the interaction manager of the user's intentions.
class ButtonInputManager : public AW::ButtonManager::Observer,
public std::enable_shared_from_this<ButtonInputManager>
{
public:
/**
* Create a ButtonInputManager.
*
* @param interactionManager An instance of the @c InteractionManager used to manage user input.
* @return Returns a new @c ButtonInputManager, or @c nullptr if the operation failed.
*/
static std::shared_ptr<ButtonInputManager> create(std::shared_ptr<AW::PlatformAdapter> platformadapter){
auto button_manger = std::shared_ptr<ButtonInputManager>(new ButtonInputManager(platformadapter));
platformadapter->getButtonManager()->addButtonObserver(button_manger);
return button_manger;
};
/**
* Processes user input forever. Returns upon a quit command.
*/
int run(){ return 0; };
void stop(){
m_platformadapter->getButtonManager()->removeButtonObserver(shared_from_this());
};
private:
/**
* Constructor.
*/
ButtonInputManager(std::shared_ptr<AW::PlatformAdapter> platformadapter){
m_platformadapter = platformadapter;
m_platformadapter->getMuteManager()->privacyMute(false);
m_status = Status::PRIVATE_UNMUTE;
};
void onVolumeUp(){};
void onVolumeDown(){};
void onMute(){
if(m_status == Status::PRIVATE_UNMUTE){
m_status = Status::PRIVATE_MUTE;
m_platformadapter->getShowManager()->enableShow(AW::Profile::MUTE, AW::ProfileFlag::REPLACE);
m_platformadapter->getMuteManager()->privacyMute(true);
}else{
m_status = Status::PRIVATE_UNMUTE;
m_platformadapter->getShowManager()->enableShow(AW::Profile::UNMUTE, AW::ProfileFlag::REPLACE);
m_platformadapter->getMuteManager()->privacyMute(false);
}
};
void onAudioJackPlugIn(){
m_platformadapter->getAudioJackManager()->doAudioJackPlugIn();
};
void onAudioJackPlugOut(){
m_platformadapter->getAudioJackManager()->doAudioJackPlugOut();
};
private:
/// The main interaction manager that interfaces with the SDK.
std::shared_ptr<AW::PlatformAdapter> m_platformadapter;
enum class Status
{
PRIVATE_MUTE,
PRIVATE_UNMUTE
};
Status m_status{Status::PRIVATE_UNMUTE};
};
static void when_signal(int sig)
{
switch(sig){
case SIGINT:
case SIGQUIT:
case SIGHUP:
{
printf("signal coming, stop the capture\n");
break;
}
case SIGPIPE:
{
//When the client is closed after start scaning and parsing,
//this signal will come, ignore it!
printf("do nothings for PIPE signal\n");
break;
}
}
}
/**
* This serves as the starting point for the application. This code instantiates the @c UserInputManager and processes
* user input until the @c run() function returns.
*
* @param argc The number of elements in the @c argv array.
* @param argv An array of @argc elements, containing the program name and all command-line arguments.
* @return @c EXIT_FAILURE if the program failed to initialize correctly, else @c EXIT_SUCCESS.
*/
int main(int argc, char** argv) {
signal(SIGHUP,when_signal);
signal(SIGQUIT,when_signal);
signal(SIGINT,when_signal);
signal(SIGPIPE,when_signal);
std::string pathToConfig;
std::string pathToInputFolder;
std::string logLevel;
if (argc < 2) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint(
"USAGE: " + std::string(argv[0]) +
" <path_to_AlexaClientSDKConfig.json> <path_to_inputs_folder> [log_level]");
return EXIT_FAILURE;
} else {
pathToInputFolder = std::string(argv[1]);
if (3 == argc) {
logLevel = std::string(argv[2]);
}
}
/*
* Creating the buffer (Shared Data Stream) that will hold user audio data. This is the main input into the SDK.
*/
size_t bufferSize = alexaClientSDK::avsCommon::avs::AudioInputStream::calculateBufferSize(
BUFFER_SIZE_IN_SAMPLES, WORD_SIZE, MAX_READERS);
auto buffer = std::make_shared<alexaClientSDK::avsCommon::avs::AudioInputStream::Buffer>(bufferSize);
std::shared_ptr<alexaClientSDK::avsCommon::avs::AudioInputStream> sharedDataStream =
alexaClientSDK::avsCommon::avs::AudioInputStream::create(buffer, WORD_SIZE, MAX_READERS);
if (!sharedDataStream) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create shared data stream!");
return false;
}
alexaClientSDK::avsCommon::utils::AudioFormat compatibleAudioFormat;
compatibleAudioFormat.sampleRateHz = SAMPLE_RATE_HZ;
compatibleAudioFormat.sampleSizeInBits = WORD_SIZE * CHAR_BIT;
compatibleAudioFormat.numChannels = NUM_CHANNELS;
compatibleAudioFormat.endianness = alexaClientSDK::avsCommon::utils::AudioFormat::Endianness::LITTLE;
compatibleAudioFormat.encoding = alexaClientSDK::avsCommon::utils::AudioFormat::Encoding::LPCM;
auto platform = AW::PlatformAdapter::create(pathToInputFolder.data());
if(platform == nullptr) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to get " + pathToInputFolder + " for configure!");
return false;
}
auto buttonmanager = ButtonInputManager::create(platform);
if(buttonmanager == nullptr) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create buttonmanager!");
return false;
}
buttonmanager->run();
bool is_save_wakeup = false;
std::string wakeup_data_dir = "";
std::shared_ptr<alexaClientSDK::sampleApp::MicrophoneWrapperInterface> micWrapper =
alexaClientSDK::sampleApp::AlsaMicrophoneWrapper::create(sharedDataStream, nullptr, platform);
if (!micWrapper) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create AlsaMicrophoneWrapper!");
return false;
}
// This observer is notified any time a keyword is detected and notifies the DefaultClient to start recognizing.
auto keywordObserver = std::make_shared<KeywordObserver>(platform->getShowManager(),
platform->getRecorder()->getFilter()->getDOAInfo(),
compatibleAudioFormat,
wakeup_data_dir,
is_save_wakeup);
std::unique_ptr<alexaClientSDK::kwd::AbstractKeywordDetector> m_keywordDetector{nullptr};
const char *detector = platform->getDetectorType();
#if defined(KWD_KITTAI)
if(strcmp(detector, "kitt.ai") == 0) {
m_keywordDetector = alexaClientSDK::kwd::KittAiKeyWordDetector::create(
sharedDataStream,
compatibleAudioFormat,
{keywordObserver},
std::unordered_set<
std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
pathToInputFolder + "/common.res",
{{pathToInputFolder + "/alexa.umdl", "ALEXA", KITT_AI_SENSITIVITY}},
KITT_AI_AUDIO_GAIN,
KITT_AI_APPLY_FRONT_END_PROCESSING);
if (!m_keywordDetector) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create KittAiKeyWordDetector!");
return false;
}
#endif
#if defined(KWD_SENSORY)
if(strcmp(detector, "sensory") == 0) {
m_keywordDetector = alexaClientSDK::kwd::SensoryKeywordDetector::create(
sharedDataStream,
compatibleAudioFormat,
{keywordObserver},
std::unordered_set<
std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
platform->getSensoryModel());
if (!m_keywordDetector) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create SensoryKeyWordDetector!");
return false;
}
}
#endif
#if defined(KWD_AMAZONLITE)
if(strcmp(detector, "amazon-lite") == 0) {
m_keywordDetector = alexaClientSDK::kwd::PryonLiteKeywordDetector::create(
sharedDataStream,
compatibleAudioFormat,
{keywordObserver},
std::unordered_set<
std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
platform->getAmazonliteModel(),
platform->getAmazonliteDetectThreshold());
if (!m_keywordDetector) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create PryonLiteKeywordDetector!");
return false;
}
}
#endif
if(strcmp(detector, "tutudetect") == 0) {
m_keywordDetector = alexaClientSDK::kwd::TutuClearKeywordDetector::create(
platform,
sharedDataStream,
compatibleAudioFormat,
{keywordObserver},
std::unordered_set<
std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>());
if (!m_keywordDetector) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create TutuClearKeywordDetector!");
return false;
}
}
micWrapper->startStreamingMicrophoneData();
sleep(100000000);
micWrapper->stopStreamingMicrophoneData();
buttonmanager->stop();
return EXIT_SUCCESS;
}