788 lines
30 KiB
C++
788 lines
30 KiB
C++
/*
|
|
* AudioInputProcessor.cpp
|
|
*
|
|
* Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
* A copy of the License is located at
|
|
*
|
|
* http://aws.amazon.com/apache2.0/
|
|
*
|
|
* or in the "license" file accompanying this file. This file is distributed
|
|
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
|
* express or implied. See the License for the specific language governing
|
|
* permissions and limitations under the License.
|
|
*/
|
|
|
|
#include <sstream>
|
|
|
|
#include <AVSCommon/Utils/JSON/JSONUtils.h>
|
|
#include <AVSCommon/AVS/FocusState.h>
|
|
#include <AVSCommon/Utils/Logger/Logger.h>
|
|
#include <AVSCommon/Utils/UUIDGeneration/UUIDGeneration.h>
|
|
|
|
#include "AIP/AudioInputProcessor.h"
|
|
#include "AIP/MessageRequest.h"
|
|
|
|
namespace alexaClientSDK {
|
|
namespace capabilityAgents {
|
|
namespace aip {
|
|
|
|
using namespace avsCommon::utils::logger;
|
|
|
|
/// String to identify log entries originating from this file.
|
|
static const std::string TAG("AudioInputProcessor");
|
|
|
|
/**
|
|
* Create a LogEntry using this file's TAG and the specified event string.
|
|
*
|
|
* @param The event string for this @c LogEntry.
|
|
*/
|
|
#define LX(event) alexaClientSDK::avsCommon::utils::logger::LogEntry(TAG, event)
|
|
|
|
/// The name of the @c FocusManager channel used by @c AudioInputProvider.
|
|
static const std::string CHANNEL_NAME = avsCommon::sdkInterfaces::FocusManagerInterface::DIALOG_CHANNEL_NAME;
|
|
|
|
/// The activityId string used with @c FocusManager by @c AudioInputProvider.
|
|
static const std::string ACTIVITY_ID = "SpeechRecognizer.Recognize";
|
|
|
|
/// The namespace for this capability agent.
|
|
static const std::string NAMESPACE = "SpeechRecognizer";
|
|
|
|
/// The StopCapture directive signature.
|
|
static const avsCommon::avs::NamespaceAndName STOP_CAPTURE{NAMESPACE, "StopCapture"};
|
|
|
|
/// The ExpectSpeech directive signature.
|
|
static const avsCommon::avs::NamespaceAndName EXPECT_SPEECH{NAMESPACE, "ExpectSpeech"};
|
|
|
|
/// The SpeechRecognizer context state signature.
|
|
static const avsCommon::avs::NamespaceAndName RECOGNIZER_STATE{NAMESPACE, "RecognizerState"};
|
|
|
|
std::shared_ptr<AudioInputProcessor> AudioInputProcessor::create(
|
|
std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
|
|
std::shared_ptr<avsCommon::avs::DialogUXStateAggregator> dialogUXStateAggregator,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::UserActivityNotifierInterface> userActivityNotifier,
|
|
AudioProvider defaultAudioProvider) {
|
|
if (!directiveSequencer) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullDirectiveSequencer"));
|
|
return nullptr;
|
|
} else if (!messageSender) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullMessageSender"));
|
|
return nullptr;
|
|
} else if (!contextManager) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullContextManager"));
|
|
return nullptr;
|
|
} else if (!focusManager) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullFocusManager"));
|
|
return nullptr;
|
|
} else if (!dialogUXStateAggregator) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullDialogUXStateAggregator"));
|
|
return nullptr;
|
|
} else if (!exceptionEncounteredSender) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullExceptionEncounteredSender"));
|
|
return nullptr;
|
|
} else if (!userActivityNotifier) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullUserActivityNotifier"));
|
|
return nullptr;
|
|
}
|
|
|
|
auto aip = std::shared_ptr<AudioInputProcessor>(new AudioInputProcessor(
|
|
directiveSequencer,
|
|
messageSender,
|
|
contextManager,
|
|
focusManager,
|
|
exceptionEncounteredSender,
|
|
userActivityNotifier,
|
|
defaultAudioProvider));
|
|
|
|
if (aip) {
|
|
contextManager->setStateProvider(RECOGNIZER_STATE, aip);
|
|
dialogUXStateAggregator->addObserver(aip);
|
|
}
|
|
|
|
return aip;
|
|
}
|
|
|
|
avsCommon::avs::DirectiveHandlerConfiguration AudioInputProcessor::getConfiguration() const {
|
|
avsCommon::avs::DirectiveHandlerConfiguration configuration;
|
|
configuration[STOP_CAPTURE] = avsCommon::avs::BlockingPolicy::NON_BLOCKING;
|
|
configuration[EXPECT_SPEECH] = avsCommon::avs::BlockingPolicy::NON_BLOCKING;
|
|
return configuration;
|
|
}
|
|
|
|
void AudioInputProcessor::addObserver(std::shared_ptr<ObserverInterface> observer) {
|
|
if (!observer) {
|
|
ACSDK_ERROR(LX("addObserverFailed").d("reason", "nullObserver"));
|
|
return;
|
|
}
|
|
m_executor.submit([this, observer] () { m_observers.insert(observer); });
|
|
}
|
|
|
|
void AudioInputProcessor::removeObserver(std::shared_ptr<ObserverInterface> observer) {
|
|
if (!observer) {
|
|
ACSDK_ERROR(LX("removeObserverFailed").d("reason", "nullObserver"));
|
|
return;
|
|
}
|
|
m_executor.submit([this, observer] () { m_observers.erase(observer); }).wait();
|
|
}
|
|
|
|
std::future<bool> AudioInputProcessor::recognize(
|
|
AudioProvider audioProvider,
|
|
Initiator initiator,
|
|
avsCommon::avs::AudioInputStream::Index begin,
|
|
avsCommon::avs::AudioInputStream::Index keywordEnd,
|
|
std::string keyword) {
|
|
// If no begin index was provided, grab the current index ASAP so that we can start streaming from the time this
|
|
// call was made.
|
|
if (audioProvider.stream && INVALID_INDEX == begin) {
|
|
static const bool startWithNewData = true;
|
|
auto reader = audioProvider.stream->createReader(
|
|
avsCommon::avs::AudioInputStream::Reader::Policy::NONBLOCKING,
|
|
startWithNewData);
|
|
if (!reader) {
|
|
ACSDK_ERROR(LX("recognizeFailed").d("reason", "createReaderFailed"));
|
|
std::promise<bool> ret;
|
|
ret.set_value(false);
|
|
return ret.get_future();
|
|
}
|
|
begin = reader->tell();
|
|
}
|
|
return m_executor.submit(
|
|
[this, audioProvider, initiator, begin, keywordEnd, keyword] () {
|
|
return executeRecognize(audioProvider, initiator, begin, keywordEnd, keyword);
|
|
}
|
|
);
|
|
}
|
|
|
|
std::future<bool> AudioInputProcessor::stopCapture() {
|
|
return m_executor.submit(
|
|
[this] () {
|
|
return executeStopCapture();
|
|
}
|
|
);
|
|
}
|
|
|
|
std::future<void> AudioInputProcessor::resetState() {
|
|
return m_executor.submit(
|
|
[this] () {
|
|
executeResetState();
|
|
}
|
|
);
|
|
}
|
|
|
|
void AudioInputProcessor::provideState(unsigned int stateRequestToken) {
|
|
m_executor.submit(
|
|
[this, stateRequestToken] () {
|
|
executeProvideState(true, stateRequestToken);
|
|
}
|
|
);
|
|
}
|
|
|
|
void AudioInputProcessor::onContextAvailable(const std::string& jsonContext) {
|
|
m_executor.submit(
|
|
[this, jsonContext] () {
|
|
executeOnContextAvailable(jsonContext);
|
|
}
|
|
);
|
|
}
|
|
|
|
void AudioInputProcessor::onContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error) {
|
|
m_executor.submit(
|
|
[this, error] () {
|
|
executeOnContextFailure(error);
|
|
}
|
|
);
|
|
}
|
|
|
|
void AudioInputProcessor::handleDirectiveImmediately(std::shared_ptr<avsCommon::avs::AVSDirective> directive) {
|
|
handleDirective(std::make_shared<DirectiveInfo>(directive, nullptr));
|
|
}
|
|
|
|
void AudioInputProcessor::preHandleDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
}
|
|
|
|
void AudioInputProcessor::handleDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
if (!info) {
|
|
ACSDK_ERROR(LX("handleDirectiveFailed").d("reason", "nullDirectiveInfo"));
|
|
return;
|
|
}
|
|
if (info->directive->getName() == STOP_CAPTURE.name) {
|
|
handleStopCaptureDirective(info);
|
|
} else if (info->directive->getName() == EXPECT_SPEECH.name) {
|
|
handleExpectSpeechDirective(info);
|
|
} else {
|
|
std::string errorMessage =
|
|
"unexpected directive " +
|
|
info->directive->getNamespace() + ":" + info->directive->getName();
|
|
m_exceptionEncounteredSender->sendExceptionEncountered(
|
|
info->directive->getUnparsedDirective(),
|
|
avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED,
|
|
errorMessage);
|
|
if (info->result) {
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
ACSDK_ERROR(LX("handleDirectiveFailed")
|
|
.d("reason", "unknownDirective")
|
|
.d("namespace", info->directive->getNamespace())
|
|
.d("name", info->directive->getName()));
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::cancelDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
resetState();
|
|
removeDirective(info);
|
|
}
|
|
|
|
void AudioInputProcessor::onDeregistered() {
|
|
resetState();
|
|
}
|
|
|
|
void AudioInputProcessor::onFocusChanged(avsCommon::avs::FocusState newFocus) {
|
|
m_executor.submit(
|
|
[this, newFocus] () {
|
|
executeOnFocusChanged(newFocus);
|
|
}
|
|
);
|
|
}
|
|
|
|
void AudioInputProcessor::onDialogUXStateChanged(
|
|
avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState newState) {
|
|
m_executor.submit(
|
|
[this, newState] () {
|
|
executeOnDialogUXStateChanged(newState);
|
|
}
|
|
);
|
|
}
|
|
|
|
AudioInputProcessor::AudioInputProcessor(
|
|
std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
|
|
std::shared_ptr<avsCommon::sdkInterfaces::UserActivityNotifierInterface> userActivityNotifier,
|
|
AudioProvider defaultAudioProvider) :
|
|
CapabilityAgent{NAMESPACE, exceptionEncounteredSender},
|
|
RequiresShutdown{"AudioInputProcessor"},
|
|
m_directiveSequencer{directiveSequencer},
|
|
m_messageSender{messageSender},
|
|
m_contextManager{contextManager},
|
|
m_focusManager{focusManager},
|
|
m_userActivityNotifier{userActivityNotifier},
|
|
m_defaultAudioProvider{defaultAudioProvider},
|
|
m_lastAudioProvider{AudioProvider::null()},
|
|
m_state{ObserverInterface::State::IDLE},
|
|
m_focusState{avsCommon::avs::FocusState::NONE},
|
|
m_preparingToSend{false},
|
|
m_initialDialogUXStateReceived{false} {
|
|
}
|
|
|
|
void AudioInputProcessor::doShutdown() {
|
|
m_executor.shutdown();
|
|
executeResetState();
|
|
m_directiveSequencer.reset();
|
|
m_messageSender.reset();
|
|
m_contextManager.reset();
|
|
m_focusManager.reset();
|
|
m_userActivityNotifier.reset();
|
|
m_observers.clear();
|
|
}
|
|
|
|
std::future<bool> AudioInputProcessor::expectSpeechTimedOut() {
|
|
return m_executor.submit(
|
|
[this] () {
|
|
return executeExpectSpeechTimedOut();
|
|
}
|
|
);
|
|
}
|
|
|
|
void AudioInputProcessor::handleStopCaptureDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
m_executor.submit(
|
|
[this, info] () {
|
|
bool stopImmediately = true;
|
|
executeStopCapture(stopImmediately, info);
|
|
}
|
|
);
|
|
}
|
|
|
|
void AudioInputProcessor::handleExpectSpeechDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
int64_t timeout;
|
|
bool found = avsCommon::utils::json::jsonUtils::lookupInt64Value(
|
|
info->directive->getPayload(),
|
|
"timeoutInMilliseconds",
|
|
&timeout);
|
|
|
|
if (!found) {
|
|
static const char * errorMessage = "missing/invalid timeoutInMilliseconds";
|
|
m_exceptionEncounteredSender->sendExceptionEncountered(
|
|
info->directive->getUnparsedDirective(),
|
|
avsCommon::avs::ExceptionErrorType::UNSUPPORTED_OPERATION,
|
|
errorMessage);
|
|
if (info->result) {
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
ACSDK_ERROR(LX("handleExpectSpeechDirectiveFailed")
|
|
.d("reason", "missingJsonField")
|
|
.d("field", "timeoutInMilliseconds"));
|
|
return;
|
|
}
|
|
|
|
std::string initiator;
|
|
avsCommon::utils::json::jsonUtils::lookupStringValue(info->directive->getPayload(), "initiator", &initiator);
|
|
|
|
m_executor.submit(
|
|
[this, timeout, initiator, info] () {
|
|
executeExpectSpeech(std::chrono::milliseconds{timeout}, initiator, info);
|
|
}
|
|
);
|
|
}
|
|
|
|
bool AudioInputProcessor::executeRecognize(
|
|
AudioProvider provider,
|
|
Initiator initiator,
|
|
avsCommon::avs::AudioInputStream::Index begin,
|
|
avsCommon::avs::AudioInputStream::Index end,
|
|
const std::string& keyword) {
|
|
// Make sure we have a keyword if this is a wakeword initiator.
|
|
if (Initiator::WAKEWORD == initiator && keyword.empty()) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "emptyKeywordWithWakewordInitiator"));
|
|
return false;
|
|
}
|
|
|
|
// 500ms preroll.
|
|
avsCommon::avs::AudioInputStream::Index preroll = provider.format.sampleRateHz / 2;
|
|
|
|
// Check if we have everything we need to enable false wakeword detection.
|
|
// TODO: Consider relaxing the hard requirement for a full 500ms preroll - ACSDK-276.
|
|
bool falseWakewordDetection =
|
|
Initiator::WAKEWORD == initiator &&
|
|
begin != INVALID_INDEX &&
|
|
begin >= preroll &&
|
|
end != INVALID_INDEX;
|
|
|
|
// If we will be enabling false wakeword detection, add preroll and build the initiator payload.
|
|
std::ostringstream initiatorPayloadJson;
|
|
// TODO: Consider reworking this code to use RapidJSON - ACSDK-279.
|
|
if (falseWakewordDetection) {
|
|
initiatorPayloadJson
|
|
<< R"("wakeWordIndices":{)"
|
|
R"("startIndexInSamples":)" << preroll << R"(,)"
|
|
R"("endIndexInSamples":)" << preroll + end - begin
|
|
<< R"(})";
|
|
begin -= preroll;
|
|
}
|
|
|
|
// Build the initiator json.
|
|
std::ostringstream initiatorJson;
|
|
initiatorJson
|
|
<< R"("initiator":{)"
|
|
R"("type":")" << initiatorToString(initiator) << R"(",)"
|
|
R"("payload":{)"
|
|
<< initiatorPayloadJson.str()
|
|
<< R"(})"
|
|
R"(})";
|
|
|
|
return executeRecognize(provider, initiatorJson.str(), begin, keyword);
|
|
}
|
|
|
|
bool AudioInputProcessor::executeRecognize(
|
|
AudioProvider provider,
|
|
const std::string& initiatorJson,
|
|
avsCommon::avs::AudioInputStream::Index begin,
|
|
const std::string& keyword) {
|
|
if (!provider.stream) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "nullAudioInputStream"));
|
|
return false;
|
|
}
|
|
|
|
if (provider.format.encoding != avsCommon::utils::AudioFormat::Encoding::LPCM) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedEncoding")
|
|
.d("encoding", provider.format.encoding));
|
|
return false;
|
|
} else if (provider.format.endianness != avsCommon::utils::AudioFormat::Endianness::LITTLE) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedEndianness")
|
|
.d("endianness", provider.format.endianness));
|
|
return false;
|
|
} else if (provider.format.sampleSizeInBits != 16) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedSampleSize")
|
|
.d("sampleSize", provider.format.sampleSizeInBits));
|
|
return false;
|
|
} else if (provider.format.sampleRateHz != 16000) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedSampleRate")
|
|
.d("sampleRate", provider.format.sampleRateHz));
|
|
return false;
|
|
} else if (provider.format.numChannels != 1) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedNumChannels")
|
|
.d("channels", provider.format.numChannels));
|
|
return false;
|
|
}
|
|
|
|
// If this is a barge-in, verify that it is permitted.
|
|
switch (m_state) {
|
|
case ObserverInterface::State::IDLE:
|
|
case ObserverInterface::State::EXPECTING_SPEECH:
|
|
break;
|
|
case ObserverInterface::State::RECOGNIZING:
|
|
// Barge-in is only permitted if the audio providers have compatible policies.
|
|
if (!m_lastAudioProvider.canBeOverridden) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Active audio provider can not be overridden"));
|
|
return false;
|
|
}
|
|
if (!provider.canOverride) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "New audio provider can not override"));
|
|
return false;
|
|
}
|
|
|
|
// Note: AVS only sends StopCapture for NEAR_FIELD/FAR_FIELD; it does not send StopCapture for CLOSE_TALK.
|
|
if (m_lastAudioProvider && ASRProfile::CLOSE_TALK != m_lastAudioProvider.profile) {
|
|
// TODO: Enable barge-in during recognize once AVS sends dialogRequestId with StopCapture(ACSDK-355).
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Barge-in is not permitted while recognizing"));
|
|
return false;
|
|
}
|
|
break;
|
|
case ObserverInterface::State::BUSY:
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Barge-in is not permitted while busy"));
|
|
return false;
|
|
}
|
|
|
|
// Assemble the event payload.
|
|
std::ostringstream payload;
|
|
payload << R"({)"
|
|
R"("profile":")" << provider.profile << R"(",)"
|
|
R"("format":"AUDIO_L16_RATE_16000_CHANNELS_1")";
|
|
if (!initiatorJson.empty()) {
|
|
payload << "," << initiatorJson;
|
|
}
|
|
payload << R"(})";
|
|
|
|
// Set up an attachment reader for the event.
|
|
avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeIndex offset = 0;
|
|
avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeReader::Reference reference =
|
|
avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeReader::Reference::BEFORE_WRITER;
|
|
if (INVALID_INDEX != begin) {
|
|
offset = begin;
|
|
reference = avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeReader::Reference::ABSOLUTE;
|
|
}
|
|
m_reader = avsCommon::avs::attachment::InProcessAttachmentReader::create(
|
|
avsCommon::avs::attachment::AttachmentReader::Policy::NON_BLOCKING,
|
|
provider.stream,
|
|
offset,
|
|
reference);
|
|
if (!m_reader) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Failed to create attachment reader"));
|
|
return false;
|
|
}
|
|
|
|
// Code below this point changes the state of AIP. Formally update state now, and don't error out without calling
|
|
// executeResetState() after this point.
|
|
setState(ObserverInterface::State::RECOGNIZING);
|
|
|
|
// Note that we're preparing to send a Recognize event.
|
|
m_preparingToSend = true;
|
|
|
|
// Start assembling the context; we'll service the callback after assembling our Recognize event.
|
|
m_contextManager->getContext(shared_from_this());
|
|
|
|
// Stop the ExpectSpeech timer so we don't get a timeout.
|
|
m_expectingSpeechTimer.stop();
|
|
|
|
// Update state if we're changing wakewords.
|
|
if (!keyword.empty() && m_wakeword != keyword) {
|
|
m_wakeword = keyword;
|
|
executeProvideState();
|
|
}
|
|
|
|
// Record provider as the last-used AudioProvider so it can be used in the event of an ExpectSpeech directive.
|
|
m_lastAudioProvider = provider;
|
|
|
|
// Record the event payload for later use by executeContextAvailable().
|
|
m_payload = payload.str();
|
|
|
|
// We can't assemble the MessageRequest until we receive the context.
|
|
m_request.reset();
|
|
|
|
return true;
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnContextAvailable(const std::string jsonContext) {
|
|
ACSDK_DEBUG(LX("executeOnContextAvailable")
|
|
.d("jsonContext", jsonContext));
|
|
|
|
// Should already be RECOGNIZING if we get here.
|
|
if (m_state != ObserverInterface::State::RECOGNIZING) {
|
|
ACSDK_ERROR(LX("executeOnContextAvailableFailed")
|
|
.d("reason", "Not permitted in current state")
|
|
.d("state", m_state));
|
|
return;
|
|
}
|
|
|
|
// Should already have a reader.
|
|
if (!m_reader) {
|
|
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "nullReader"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
// Payload should not be empty.
|
|
if (m_payload.empty()) {
|
|
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "payloadEmpty"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
|
|
// Start acquiring the channel right away; we'll service the callback after assembling our Recognize event.
|
|
if (m_focusState != avsCommon::avs::FocusState::FOREGROUND) {
|
|
if (!m_focusManager->acquireChannel(CHANNEL_NAME, shared_from_this(), ACTIVITY_ID)) {
|
|
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "Unable to acquire channel"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Assemble the MessageRequest. It will be sent by executeOnFocusChanged when we acquire the channel.
|
|
auto dialogRequestId = avsCommon::utils::uuidGeneration::generateUUID();
|
|
m_directiveSequencer->setDialogRequestId(dialogRequestId);
|
|
auto msgIdAndJsonEvent = buildJsonEventString(
|
|
"Recognize",
|
|
dialogRequestId,
|
|
m_payload,
|
|
jsonContext);
|
|
m_request = std::make_shared<MessageRequest>(shared_from_this(), msgIdAndJsonEvent.second, m_reader);
|
|
|
|
// If we already have focus, there won't be a callback to send the message, so send it now.
|
|
if (avsCommon::avs::FocusState::FOREGROUND == m_focusState) {
|
|
sendRequestNow();
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error) {
|
|
ACSDK_ERROR(LX("executeOnContextFailure").d("error", error));
|
|
executeResetState();
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnFocusChanged(avsCommon::avs::FocusState newFocus) {
|
|
ACSDK_DEBUG(LX("executeOnFocusChanged").d("newFocus", newFocus));
|
|
|
|
// Note new focus state.
|
|
m_focusState = newFocus;
|
|
|
|
// If we're losing focus, stop using the channel.
|
|
if (newFocus != avsCommon::avs::FocusState::FOREGROUND) {
|
|
ACSDK_DEBUG(LX("executeOnFocusChanged").d("reason", "Lost focus"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
|
|
// We're not losing the channel (m_focusState == avsCommon::avs::FocusState::FOREGROUND). For all
|
|
// states except RECOGNIZING, there's nothing more to do here.
|
|
if (m_state != ObserverInterface::State::RECOGNIZING) {
|
|
return;
|
|
}
|
|
|
|
// For a focus change to FOREGROUND in the Recognizing state, we may have a message queued up to send. If we do,
|
|
// we can safely send it now.
|
|
if (m_request) {
|
|
sendRequestNow();
|
|
}
|
|
}
|
|
|
|
bool AudioInputProcessor::executeStopCapture(bool stopImmediately, std::shared_ptr<DirectiveInfo> info) {
|
|
if (m_state != ObserverInterface::State::RECOGNIZING) {
|
|
static const char * errorMessage = "StopCapture only allowed in RECOGNIZING state.";
|
|
if (info) {
|
|
if (info->result) {
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
removeDirective(info);
|
|
}
|
|
ACSDK_ERROR(LX("executeStopCaptureFailed")
|
|
.d("reason", "invalidState")
|
|
.d("expectedState", "RECOGNIZING")
|
|
.d("state", m_state));
|
|
return false;
|
|
}
|
|
// Create a lambda to do the StopCapture.
|
|
std::function<void()> stopCapture = [=] {
|
|
ACSDK_DEBUG(LX("stopCapture").d("stopImmediately", stopImmediately));
|
|
if (stopImmediately) {
|
|
m_reader->close(avsCommon::avs::attachment::AttachmentReader::ClosePoint::IMMEDIATELY);
|
|
} else {
|
|
m_reader->close(avsCommon::avs::attachment::AttachmentReader::ClosePoint::AFTER_DRAINING_CURRENT_BUFFER);
|
|
}
|
|
|
|
m_reader.reset();
|
|
setState(ObserverInterface::State::BUSY);
|
|
|
|
if (info) {
|
|
if (info->result) {
|
|
info->result->setCompleted();
|
|
}
|
|
removeDirective(info);
|
|
}
|
|
};
|
|
|
|
if (m_preparingToSend) {
|
|
// If we're still preparing to send, we'll save the lambda and call it after we start sending.
|
|
m_deferredStopCapture = stopCapture;
|
|
} else {
|
|
// Otherwise, we can call the lambda now.
|
|
stopCapture();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void AudioInputProcessor::executeResetState() {
|
|
// Irrespective of current state, clean up and go back to idle.
|
|
m_expectingSpeechTimer.stop();
|
|
if (m_reader) {
|
|
m_reader->close();
|
|
}
|
|
m_reader.reset();
|
|
m_request.reset();
|
|
m_preparingToSend = false;
|
|
m_deferredStopCapture = nullptr;
|
|
if (m_focusState != avsCommon::avs::FocusState::NONE) {
|
|
m_focusManager->releaseChannel(CHANNEL_NAME, shared_from_this());
|
|
}
|
|
m_focusState = avsCommon::avs::FocusState::NONE;
|
|
setState(ObserverInterface::State::IDLE);
|
|
}
|
|
|
|
bool AudioInputProcessor::executeExpectSpeech(
|
|
std::chrono::milliseconds timeout, std::string initiator, std::shared_ptr<DirectiveInfo> info) {
|
|
|
|
if (m_state != ObserverInterface::State::IDLE &&
|
|
m_state != ObserverInterface::State::BUSY) {
|
|
static const char * errorMessage = "ExpectSpeech only allowed in IDLE or BUSY state.";
|
|
if (info->result) {
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
removeDirective(info);
|
|
ACSDK_ERROR(LX("executeExpectSpeechFailed")
|
|
.d("reason", "invalidState")
|
|
.d("expectedState", "IDLE/BUSY")
|
|
.d("state", m_state));
|
|
return false;
|
|
}
|
|
|
|
// Start the ExpectSpeech timer.
|
|
if (!m_expectingSpeechTimer.start(timeout, std::bind(&AudioInputProcessor::expectSpeechTimedOut, this)).valid()) {
|
|
ACSDK_ERROR(LX("executeExpectSpeechFailed").d("reason", "startTimerFailed"));
|
|
}
|
|
setState(ObserverInterface::State::EXPECTING_SPEECH);
|
|
if (info->result) {
|
|
info->result->setCompleted();
|
|
}
|
|
removeDirective(info);
|
|
|
|
// If possible, start recognizing immediately.
|
|
if (m_lastAudioProvider && m_lastAudioProvider.alwaysReadable) {
|
|
return executeRecognize(m_lastAudioProvider, initiator);
|
|
} else if (m_defaultAudioProvider && m_defaultAudioProvider.alwaysReadable) {
|
|
return executeRecognize(m_defaultAudioProvider, initiator);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool AudioInputProcessor::executeExpectSpeechTimedOut() {
|
|
if (m_state != ObserverInterface::State::EXPECTING_SPEECH) {
|
|
ACSDK_ERROR(LX("executeExpectSpeechTimedOutFailure")
|
|
.d("reason", "invalidState")
|
|
.d("expectedState", "EXPECTING_SPEECH")
|
|
.d("state", m_state));
|
|
return false;
|
|
}
|
|
|
|
auto msgIdAndJsonEvent = buildJsonEventString("ExpectSpeechTimedOut");
|
|
auto request = std::make_shared<MessageRequest>(shared_from_this(), msgIdAndJsonEvent.second, m_reader);
|
|
m_messageSender->sendMessage(request);
|
|
setState(ObserverInterface::State::IDLE);
|
|
ACSDK_ERROR(LX("executeExpectSpeechFailed").d("reason", "Timed Out"));
|
|
return true;
|
|
}
|
|
|
|
void AudioInputProcessor::executeProvideState(bool sendToken, unsigned int stateRequestToken) {
|
|
std::ostringstream context;
|
|
context << R"({"wakeword" : ")" << m_wakeword << R"("})";
|
|
if (sendToken) {
|
|
m_contextManager->setState(
|
|
RECOGNIZER_STATE,
|
|
context.str(),
|
|
avsCommon::avs::StateRefreshPolicy::NEVER,
|
|
stateRequestToken);
|
|
} else {
|
|
m_contextManager->setState(
|
|
RECOGNIZER_STATE,
|
|
context.str(),
|
|
avsCommon::avs::StateRefreshPolicy::NEVER);
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnDialogUXStateChanged(
|
|
avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState newState) {
|
|
if (!m_initialDialogUXStateReceived) {
|
|
// The initial dialog UX state change call comes from simply registering as an observer; it is not a deliberate
|
|
// change to the dialog state which should interrupt a recognize event.
|
|
m_initialDialogUXStateReceived = true;
|
|
return;
|
|
}
|
|
if (newState != avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState::IDLE) {
|
|
return;
|
|
}
|
|
if (m_focusState != avsCommon::avs::FocusState::NONE) {
|
|
m_focusManager->releaseChannel(CHANNEL_NAME, shared_from_this());
|
|
m_focusState = avsCommon::avs::FocusState::NONE;
|
|
}
|
|
setState(ObserverInterface::State::IDLE);
|
|
}
|
|
|
|
void AudioInputProcessor::setState(ObserverInterface::State state) {
|
|
if (m_state == state) {
|
|
return;
|
|
}
|
|
|
|
// Reset the user inactivity if transitioning to or from `RECOGNIZING` state.
|
|
if (ObserverInterface::State::RECOGNIZING == m_state || ObserverInterface::State::RECOGNIZING == state) {
|
|
m_userActivityNotifier->onUserActive();
|
|
}
|
|
|
|
ACSDK_DEBUG(LX("setState").d("from", m_state).d("to", state));
|
|
m_state = state;
|
|
for (auto observer: m_observers) {
|
|
observer->onStateChanged(m_state);
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::removeDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
// Check result too, to catch cases where DirectiveInfo was created locally, without a nullptr result.
|
|
// In those cases there is no messageId to remove because no result was expected.
|
|
if (info->directive && info->result) {
|
|
CapabilityAgent::removeDirective(info->directive->getMessageId());
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::sendRequestNow() {
|
|
m_messageSender->sendMessage(m_request);
|
|
m_request.reset();
|
|
m_preparingToSend = false;
|
|
if (m_deferredStopCapture) {
|
|
m_deferredStopCapture();
|
|
m_deferredStopCapture = nullptr;
|
|
}
|
|
}
|
|
|
|
} // namespace aip
|
|
} // namespace capabilityAgents
|
|
} // namespace alexaClientSDK
|