avs-device-sdk/CapabilityAgents/AIP/src/AudioInputProcessor.cpp

788 lines
30 KiB
C++

/*
* AudioInputProcessor.cpp
*
* Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#include <sstream>
#include <AVSCommon/Utils/JSON/JSONUtils.h>
#include <AVSCommon/AVS/FocusState.h>
#include <AVSCommon/Utils/Logger/Logger.h>
#include <AVSCommon/Utils/UUIDGeneration/UUIDGeneration.h>
#include "AIP/AudioInputProcessor.h"
#include "AIP/MessageRequest.h"
namespace alexaClientSDK {
namespace capabilityAgents {
namespace aip {
using namespace avsCommon::utils::logger;
/// String to identify log entries originating from this file.
static const std::string TAG("AudioInputProcessor");
/**
* Create a LogEntry using this file's TAG and the specified event string.
*
* @param The event string for this @c LogEntry.
*/
#define LX(event) alexaClientSDK::avsCommon::utils::logger::LogEntry(TAG, event)
/// The name of the @c FocusManager channel used by @c AudioInputProvider.
static const std::string CHANNEL_NAME = avsCommon::sdkInterfaces::FocusManagerInterface::DIALOG_CHANNEL_NAME;
/// The activityId string used with @c FocusManager by @c AudioInputProvider.
static const std::string ACTIVITY_ID = "SpeechRecognizer.Recognize";
/// The namespace for this capability agent.
static const std::string NAMESPACE = "SpeechRecognizer";
/// The StopCapture directive signature.
static const avsCommon::avs::NamespaceAndName STOP_CAPTURE{NAMESPACE, "StopCapture"};
/// The ExpectSpeech directive signature.
static const avsCommon::avs::NamespaceAndName EXPECT_SPEECH{NAMESPACE, "ExpectSpeech"};
/// The SpeechRecognizer context state signature.
static const avsCommon::avs::NamespaceAndName RECOGNIZER_STATE{NAMESPACE, "RecognizerState"};
std::shared_ptr<AudioInputProcessor> AudioInputProcessor::create(
std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
std::shared_ptr<avsCommon::avs::DialogUXStateAggregator> dialogUXStateAggregator,
std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
std::shared_ptr<avsCommon::sdkInterfaces::UserActivityNotifierInterface> userActivityNotifier,
AudioProvider defaultAudioProvider) {
if (!directiveSequencer) {
ACSDK_ERROR(LX("createFailed").d("reason", "nullDirectiveSequencer"));
return nullptr;
} else if (!messageSender) {
ACSDK_ERROR(LX("createFailed").d("reason", "nullMessageSender"));
return nullptr;
} else if (!contextManager) {
ACSDK_ERROR(LX("createFailed").d("reason", "nullContextManager"));
return nullptr;
} else if (!focusManager) {
ACSDK_ERROR(LX("createFailed").d("reason", "nullFocusManager"));
return nullptr;
} else if (!dialogUXStateAggregator) {
ACSDK_ERROR(LX("createFailed").d("reason", "nullDialogUXStateAggregator"));
return nullptr;
} else if (!exceptionEncounteredSender) {
ACSDK_ERROR(LX("createFailed").d("reason", "nullExceptionEncounteredSender"));
return nullptr;
} else if (!userActivityNotifier) {
ACSDK_ERROR(LX("createFailed").d("reason", "nullUserActivityNotifier"));
return nullptr;
}
auto aip = std::shared_ptr<AudioInputProcessor>(new AudioInputProcessor(
directiveSequencer,
messageSender,
contextManager,
focusManager,
exceptionEncounteredSender,
userActivityNotifier,
defaultAudioProvider));
if (aip) {
contextManager->setStateProvider(RECOGNIZER_STATE, aip);
dialogUXStateAggregator->addObserver(aip);
}
return aip;
}
avsCommon::avs::DirectiveHandlerConfiguration AudioInputProcessor::getConfiguration() const {
avsCommon::avs::DirectiveHandlerConfiguration configuration;
configuration[STOP_CAPTURE] = avsCommon::avs::BlockingPolicy::NON_BLOCKING;
configuration[EXPECT_SPEECH] = avsCommon::avs::BlockingPolicy::NON_BLOCKING;
return configuration;
}
void AudioInputProcessor::addObserver(std::shared_ptr<ObserverInterface> observer) {
if (!observer) {
ACSDK_ERROR(LX("addObserverFailed").d("reason", "nullObserver"));
return;
}
m_executor.submit([this, observer] () { m_observers.insert(observer); });
}
void AudioInputProcessor::removeObserver(std::shared_ptr<ObserverInterface> observer) {
if (!observer) {
ACSDK_ERROR(LX("removeObserverFailed").d("reason", "nullObserver"));
return;
}
m_executor.submit([this, observer] () { m_observers.erase(observer); }).wait();
}
std::future<bool> AudioInputProcessor::recognize(
AudioProvider audioProvider,
Initiator initiator,
avsCommon::avs::AudioInputStream::Index begin,
avsCommon::avs::AudioInputStream::Index keywordEnd,
std::string keyword) {
// If no begin index was provided, grab the current index ASAP so that we can start streaming from the time this
// call was made.
if (audioProvider.stream && INVALID_INDEX == begin) {
static const bool startWithNewData = true;
auto reader = audioProvider.stream->createReader(
avsCommon::avs::AudioInputStream::Reader::Policy::NONBLOCKING,
startWithNewData);
if (!reader) {
ACSDK_ERROR(LX("recognizeFailed").d("reason", "createReaderFailed"));
std::promise<bool> ret;
ret.set_value(false);
return ret.get_future();
}
begin = reader->tell();
}
return m_executor.submit(
[this, audioProvider, initiator, begin, keywordEnd, keyword] () {
return executeRecognize(audioProvider, initiator, begin, keywordEnd, keyword);
}
);
}
std::future<bool> AudioInputProcessor::stopCapture() {
return m_executor.submit(
[this] () {
return executeStopCapture();
}
);
}
std::future<void> AudioInputProcessor::resetState() {
return m_executor.submit(
[this] () {
executeResetState();
}
);
}
void AudioInputProcessor::provideState(unsigned int stateRequestToken) {
m_executor.submit(
[this, stateRequestToken] () {
executeProvideState(true, stateRequestToken);
}
);
}
void AudioInputProcessor::onContextAvailable(const std::string& jsonContext) {
m_executor.submit(
[this, jsonContext] () {
executeOnContextAvailable(jsonContext);
}
);
}
void AudioInputProcessor::onContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error) {
m_executor.submit(
[this, error] () {
executeOnContextFailure(error);
}
);
}
void AudioInputProcessor::handleDirectiveImmediately(std::shared_ptr<avsCommon::avs::AVSDirective> directive) {
handleDirective(std::make_shared<DirectiveInfo>(directive, nullptr));
}
void AudioInputProcessor::preHandleDirective(std::shared_ptr<DirectiveInfo> info) {
}
void AudioInputProcessor::handleDirective(std::shared_ptr<DirectiveInfo> info) {
if (!info) {
ACSDK_ERROR(LX("handleDirectiveFailed").d("reason", "nullDirectiveInfo"));
return;
}
if (info->directive->getName() == STOP_CAPTURE.name) {
handleStopCaptureDirective(info);
} else if (info->directive->getName() == EXPECT_SPEECH.name) {
handleExpectSpeechDirective(info);
} else {
std::string errorMessage =
"unexpected directive " +
info->directive->getNamespace() + ":" + info->directive->getName();
m_exceptionEncounteredSender->sendExceptionEncountered(
info->directive->getUnparsedDirective(),
avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED,
errorMessage);
if (info->result) {
info->result->setFailed(errorMessage);
}
ACSDK_ERROR(LX("handleDirectiveFailed")
.d("reason", "unknownDirective")
.d("namespace", info->directive->getNamespace())
.d("name", info->directive->getName()));
}
}
void AudioInputProcessor::cancelDirective(std::shared_ptr<DirectiveInfo> info) {
resetState();
removeDirective(info);
}
void AudioInputProcessor::onDeregistered() {
resetState();
}
void AudioInputProcessor::onFocusChanged(avsCommon::avs::FocusState newFocus) {
m_executor.submit(
[this, newFocus] () {
executeOnFocusChanged(newFocus);
}
);
}
void AudioInputProcessor::onDialogUXStateChanged(
avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState newState) {
m_executor.submit(
[this, newState] () {
executeOnDialogUXStateChanged(newState);
}
);
}
AudioInputProcessor::AudioInputProcessor(
std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
std::shared_ptr<avsCommon::sdkInterfaces::UserActivityNotifierInterface> userActivityNotifier,
AudioProvider defaultAudioProvider) :
CapabilityAgent{NAMESPACE, exceptionEncounteredSender},
RequiresShutdown{"AudioInputProcessor"},
m_directiveSequencer{directiveSequencer},
m_messageSender{messageSender},
m_contextManager{contextManager},
m_focusManager{focusManager},
m_userActivityNotifier{userActivityNotifier},
m_defaultAudioProvider{defaultAudioProvider},
m_lastAudioProvider{AudioProvider::null()},
m_state{ObserverInterface::State::IDLE},
m_focusState{avsCommon::avs::FocusState::NONE},
m_preparingToSend{false},
m_initialDialogUXStateReceived{false} {
}
void AudioInputProcessor::doShutdown() {
m_executor.shutdown();
executeResetState();
m_directiveSequencer.reset();
m_messageSender.reset();
m_contextManager.reset();
m_focusManager.reset();
m_userActivityNotifier.reset();
m_observers.clear();
}
std::future<bool> AudioInputProcessor::expectSpeechTimedOut() {
return m_executor.submit(
[this] () {
return executeExpectSpeechTimedOut();
}
);
}
void AudioInputProcessor::handleStopCaptureDirective(std::shared_ptr<DirectiveInfo> info) {
m_executor.submit(
[this, info] () {
bool stopImmediately = true;
executeStopCapture(stopImmediately, info);
}
);
}
void AudioInputProcessor::handleExpectSpeechDirective(std::shared_ptr<DirectiveInfo> info) {
int64_t timeout;
bool found = avsCommon::utils::json::jsonUtils::lookupInt64Value(
info->directive->getPayload(),
"timeoutInMilliseconds",
&timeout);
if (!found) {
static const char * errorMessage = "missing/invalid timeoutInMilliseconds";
m_exceptionEncounteredSender->sendExceptionEncountered(
info->directive->getUnparsedDirective(),
avsCommon::avs::ExceptionErrorType::UNSUPPORTED_OPERATION,
errorMessage);
if (info->result) {
info->result->setFailed(errorMessage);
}
ACSDK_ERROR(LX("handleExpectSpeechDirectiveFailed")
.d("reason", "missingJsonField")
.d("field", "timeoutInMilliseconds"));
return;
}
std::string initiator;
avsCommon::utils::json::jsonUtils::lookupStringValue(info->directive->getPayload(), "initiator", &initiator);
m_executor.submit(
[this, timeout, initiator, info] () {
executeExpectSpeech(std::chrono::milliseconds{timeout}, initiator, info);
}
);
}
bool AudioInputProcessor::executeRecognize(
AudioProvider provider,
Initiator initiator,
avsCommon::avs::AudioInputStream::Index begin,
avsCommon::avs::AudioInputStream::Index end,
const std::string& keyword) {
// Make sure we have a keyword if this is a wakeword initiator.
if (Initiator::WAKEWORD == initiator && keyword.empty()) {
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "emptyKeywordWithWakewordInitiator"));
return false;
}
// 500ms preroll.
avsCommon::avs::AudioInputStream::Index preroll = provider.format.sampleRateHz / 2;
// Check if we have everything we need to enable false wakeword detection.
// TODO: Consider relaxing the hard requirement for a full 500ms preroll - ACSDK-276.
bool falseWakewordDetection =
Initiator::WAKEWORD == initiator &&
begin != INVALID_INDEX &&
begin >= preroll &&
end != INVALID_INDEX;
// If we will be enabling false wakeword detection, add preroll and build the initiator payload.
std::ostringstream initiatorPayloadJson;
// TODO: Consider reworking this code to use RapidJSON - ACSDK-279.
if (falseWakewordDetection) {
initiatorPayloadJson
<< R"("wakeWordIndices":{)"
R"("startIndexInSamples":)" << preroll << R"(,)"
R"("endIndexInSamples":)" << preroll + end - begin
<< R"(})";
begin -= preroll;
}
// Build the initiator json.
std::ostringstream initiatorJson;
initiatorJson
<< R"("initiator":{)"
R"("type":")" << initiatorToString(initiator) << R"(",)"
R"("payload":{)"
<< initiatorPayloadJson.str()
<< R"(})"
R"(})";
return executeRecognize(provider, initiatorJson.str(), begin, keyword);
}
bool AudioInputProcessor::executeRecognize(
AudioProvider provider,
const std::string& initiatorJson,
avsCommon::avs::AudioInputStream::Index begin,
const std::string& keyword) {
if (!provider.stream) {
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "nullAudioInputStream"));
return false;
}
if (provider.format.encoding != avsCommon::utils::AudioFormat::Encoding::LPCM) {
ACSDK_ERROR(LX("executeRecognizeFailed")
.d("reason", "unsupportedEncoding")
.d("encoding", provider.format.encoding));
return false;
} else if (provider.format.endianness != avsCommon::utils::AudioFormat::Endianness::LITTLE) {
ACSDK_ERROR(LX("executeRecognizeFailed")
.d("reason", "unsupportedEndianness")
.d("endianness", provider.format.endianness));
return false;
} else if (provider.format.sampleSizeInBits != 16) {
ACSDK_ERROR(LX("executeRecognizeFailed")
.d("reason", "unsupportedSampleSize")
.d("sampleSize", provider.format.sampleSizeInBits));
return false;
} else if (provider.format.sampleRateHz != 16000) {
ACSDK_ERROR(LX("executeRecognizeFailed")
.d("reason", "unsupportedSampleRate")
.d("sampleRate", provider.format.sampleRateHz));
return false;
} else if (provider.format.numChannels != 1) {
ACSDK_ERROR(LX("executeRecognizeFailed")
.d("reason", "unsupportedNumChannels")
.d("channels", provider.format.numChannels));
return false;
}
// If this is a barge-in, verify that it is permitted.
switch (m_state) {
case ObserverInterface::State::IDLE:
case ObserverInterface::State::EXPECTING_SPEECH:
break;
case ObserverInterface::State::RECOGNIZING:
// Barge-in is only permitted if the audio providers have compatible policies.
if (!m_lastAudioProvider.canBeOverridden) {
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Active audio provider can not be overridden"));
return false;
}
if (!provider.canOverride) {
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "New audio provider can not override"));
return false;
}
// Note: AVS only sends StopCapture for NEAR_FIELD/FAR_FIELD; it does not send StopCapture for CLOSE_TALK.
if (m_lastAudioProvider && ASRProfile::CLOSE_TALK != m_lastAudioProvider.profile) {
// TODO: Enable barge-in during recognize once AVS sends dialogRequestId with StopCapture(ACSDK-355).
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Barge-in is not permitted while recognizing"));
return false;
}
break;
case ObserverInterface::State::BUSY:
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Barge-in is not permitted while busy"));
return false;
}
// Assemble the event payload.
std::ostringstream payload;
payload << R"({)"
R"("profile":")" << provider.profile << R"(",)"
R"("format":"AUDIO_L16_RATE_16000_CHANNELS_1")";
if (!initiatorJson.empty()) {
payload << "," << initiatorJson;
}
payload << R"(})";
// Set up an attachment reader for the event.
avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeIndex offset = 0;
avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeReader::Reference reference =
avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeReader::Reference::BEFORE_WRITER;
if (INVALID_INDEX != begin) {
offset = begin;
reference = avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeReader::Reference::ABSOLUTE;
}
m_reader = avsCommon::avs::attachment::InProcessAttachmentReader::create(
avsCommon::avs::attachment::AttachmentReader::Policy::NON_BLOCKING,
provider.stream,
offset,
reference);
if (!m_reader) {
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Failed to create attachment reader"));
return false;
}
// Code below this point changes the state of AIP. Formally update state now, and don't error out without calling
// executeResetState() after this point.
setState(ObserverInterface::State::RECOGNIZING);
// Note that we're preparing to send a Recognize event.
m_preparingToSend = true;
// Start assembling the context; we'll service the callback after assembling our Recognize event.
m_contextManager->getContext(shared_from_this());
// Stop the ExpectSpeech timer so we don't get a timeout.
m_expectingSpeechTimer.stop();
// Update state if we're changing wakewords.
if (!keyword.empty() && m_wakeword != keyword) {
m_wakeword = keyword;
executeProvideState();
}
// Record provider as the last-used AudioProvider so it can be used in the event of an ExpectSpeech directive.
m_lastAudioProvider = provider;
// Record the event payload for later use by executeContextAvailable().
m_payload = payload.str();
// We can't assemble the MessageRequest until we receive the context.
m_request.reset();
return true;
}
void AudioInputProcessor::executeOnContextAvailable(const std::string jsonContext) {
ACSDK_DEBUG(LX("executeOnContextAvailable")
.d("jsonContext", jsonContext));
// Should already be RECOGNIZING if we get here.
if (m_state != ObserverInterface::State::RECOGNIZING) {
ACSDK_ERROR(LX("executeOnContextAvailableFailed")
.d("reason", "Not permitted in current state")
.d("state", m_state));
return;
}
// Should already have a reader.
if (!m_reader) {
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "nullReader"));
executeResetState();
return;
}
// Payload should not be empty.
if (m_payload.empty()) {
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "payloadEmpty"));
executeResetState();
return;
}
// Start acquiring the channel right away; we'll service the callback after assembling our Recognize event.
if (m_focusState != avsCommon::avs::FocusState::FOREGROUND) {
if (!m_focusManager->acquireChannel(CHANNEL_NAME, shared_from_this(), ACTIVITY_ID)) {
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "Unable to acquire channel"));
executeResetState();
return;
}
}
// Assemble the MessageRequest. It will be sent by executeOnFocusChanged when we acquire the channel.
auto dialogRequestId = avsCommon::utils::uuidGeneration::generateUUID();
m_directiveSequencer->setDialogRequestId(dialogRequestId);
auto msgIdAndJsonEvent = buildJsonEventString(
"Recognize",
dialogRequestId,
m_payload,
jsonContext);
m_request = std::make_shared<MessageRequest>(shared_from_this(), msgIdAndJsonEvent.second, m_reader);
// If we already have focus, there won't be a callback to send the message, so send it now.
if (avsCommon::avs::FocusState::FOREGROUND == m_focusState) {
sendRequestNow();
}
}
void AudioInputProcessor::executeOnContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error) {
ACSDK_ERROR(LX("executeOnContextFailure").d("error", error));
executeResetState();
}
void AudioInputProcessor::executeOnFocusChanged(avsCommon::avs::FocusState newFocus) {
ACSDK_DEBUG(LX("executeOnFocusChanged").d("newFocus", newFocus));
// Note new focus state.
m_focusState = newFocus;
// If we're losing focus, stop using the channel.
if (newFocus != avsCommon::avs::FocusState::FOREGROUND) {
ACSDK_DEBUG(LX("executeOnFocusChanged").d("reason", "Lost focus"));
executeResetState();
return;
}
// We're not losing the channel (m_focusState == avsCommon::avs::FocusState::FOREGROUND). For all
// states except RECOGNIZING, there's nothing more to do here.
if (m_state != ObserverInterface::State::RECOGNIZING) {
return;
}
// For a focus change to FOREGROUND in the Recognizing state, we may have a message queued up to send. If we do,
// we can safely send it now.
if (m_request) {
sendRequestNow();
}
}
bool AudioInputProcessor::executeStopCapture(bool stopImmediately, std::shared_ptr<DirectiveInfo> info) {
if (m_state != ObserverInterface::State::RECOGNIZING) {
static const char * errorMessage = "StopCapture only allowed in RECOGNIZING state.";
if (info) {
if (info->result) {
info->result->setFailed(errorMessage);
}
removeDirective(info);
}
ACSDK_ERROR(LX("executeStopCaptureFailed")
.d("reason", "invalidState")
.d("expectedState", "RECOGNIZING")
.d("state", m_state));
return false;
}
// Create a lambda to do the StopCapture.
std::function<void()> stopCapture = [=] {
ACSDK_DEBUG(LX("stopCapture").d("stopImmediately", stopImmediately));
if (stopImmediately) {
m_reader->close(avsCommon::avs::attachment::AttachmentReader::ClosePoint::IMMEDIATELY);
} else {
m_reader->close(avsCommon::avs::attachment::AttachmentReader::ClosePoint::AFTER_DRAINING_CURRENT_BUFFER);
}
m_reader.reset();
setState(ObserverInterface::State::BUSY);
if (info) {
if (info->result) {
info->result->setCompleted();
}
removeDirective(info);
}
};
if (m_preparingToSend) {
// If we're still preparing to send, we'll save the lambda and call it after we start sending.
m_deferredStopCapture = stopCapture;
} else {
// Otherwise, we can call the lambda now.
stopCapture();
}
return true;
}
void AudioInputProcessor::executeResetState() {
// Irrespective of current state, clean up and go back to idle.
m_expectingSpeechTimer.stop();
if (m_reader) {
m_reader->close();
}
m_reader.reset();
m_request.reset();
m_preparingToSend = false;
m_deferredStopCapture = nullptr;
if (m_focusState != avsCommon::avs::FocusState::NONE) {
m_focusManager->releaseChannel(CHANNEL_NAME, shared_from_this());
}
m_focusState = avsCommon::avs::FocusState::NONE;
setState(ObserverInterface::State::IDLE);
}
bool AudioInputProcessor::executeExpectSpeech(
std::chrono::milliseconds timeout, std::string initiator, std::shared_ptr<DirectiveInfo> info) {
if (m_state != ObserverInterface::State::IDLE &&
m_state != ObserverInterface::State::BUSY) {
static const char * errorMessage = "ExpectSpeech only allowed in IDLE or BUSY state.";
if (info->result) {
info->result->setFailed(errorMessage);
}
removeDirective(info);
ACSDK_ERROR(LX("executeExpectSpeechFailed")
.d("reason", "invalidState")
.d("expectedState", "IDLE/BUSY")
.d("state", m_state));
return false;
}
// Start the ExpectSpeech timer.
if (!m_expectingSpeechTimer.start(timeout, std::bind(&AudioInputProcessor::expectSpeechTimedOut, this)).valid()) {
ACSDK_ERROR(LX("executeExpectSpeechFailed").d("reason", "startTimerFailed"));
}
setState(ObserverInterface::State::EXPECTING_SPEECH);
if (info->result) {
info->result->setCompleted();
}
removeDirective(info);
// If possible, start recognizing immediately.
if (m_lastAudioProvider && m_lastAudioProvider.alwaysReadable) {
return executeRecognize(m_lastAudioProvider, initiator);
} else if (m_defaultAudioProvider && m_defaultAudioProvider.alwaysReadable) {
return executeRecognize(m_defaultAudioProvider, initiator);
}
return true;
}
bool AudioInputProcessor::executeExpectSpeechTimedOut() {
if (m_state != ObserverInterface::State::EXPECTING_SPEECH) {
ACSDK_ERROR(LX("executeExpectSpeechTimedOutFailure")
.d("reason", "invalidState")
.d("expectedState", "EXPECTING_SPEECH")
.d("state", m_state));
return false;
}
auto msgIdAndJsonEvent = buildJsonEventString("ExpectSpeechTimedOut");
auto request = std::make_shared<MessageRequest>(shared_from_this(), msgIdAndJsonEvent.second, m_reader);
m_messageSender->sendMessage(request);
setState(ObserverInterface::State::IDLE);
ACSDK_ERROR(LX("executeExpectSpeechFailed").d("reason", "Timed Out"));
return true;
}
void AudioInputProcessor::executeProvideState(bool sendToken, unsigned int stateRequestToken) {
std::ostringstream context;
context << R"({"wakeword" : ")" << m_wakeword << R"("})";
if (sendToken) {
m_contextManager->setState(
RECOGNIZER_STATE,
context.str(),
avsCommon::avs::StateRefreshPolicy::NEVER,
stateRequestToken);
} else {
m_contextManager->setState(
RECOGNIZER_STATE,
context.str(),
avsCommon::avs::StateRefreshPolicy::NEVER);
}
}
void AudioInputProcessor::executeOnDialogUXStateChanged(
avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState newState) {
if (!m_initialDialogUXStateReceived) {
// The initial dialog UX state change call comes from simply registering as an observer; it is not a deliberate
// change to the dialog state which should interrupt a recognize event.
m_initialDialogUXStateReceived = true;
return;
}
if (newState != avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState::IDLE) {
return;
}
if (m_focusState != avsCommon::avs::FocusState::NONE) {
m_focusManager->releaseChannel(CHANNEL_NAME, shared_from_this());
m_focusState = avsCommon::avs::FocusState::NONE;
}
setState(ObserverInterface::State::IDLE);
}
void AudioInputProcessor::setState(ObserverInterface::State state) {
if (m_state == state) {
return;
}
// Reset the user inactivity if transitioning to or from `RECOGNIZING` state.
if (ObserverInterface::State::RECOGNIZING == m_state || ObserverInterface::State::RECOGNIZING == state) {
m_userActivityNotifier->onUserActive();
}
ACSDK_DEBUG(LX("setState").d("from", m_state).d("to", state));
m_state = state;
for (auto observer: m_observers) {
observer->onStateChanged(m_state);
}
}
void AudioInputProcessor::removeDirective(std::shared_ptr<DirectiveInfo> info) {
// Check result too, to catch cases where DirectiveInfo was created locally, without a nullptr result.
// In those cases there is no messageId to remove because no result was expected.
if (info->directive && info->result) {
CapabilityAgent::removeDirective(info->directive->getMessageId());
}
}
void AudioInputProcessor::sendRequestNow() {
m_messageSender->sendMessage(m_request);
m_request.reset();
m_preparingToSend = false;
if (m_deferredStopCapture) {
m_deferredStopCapture();
m_deferredStopCapture = nullptr;
}
}
} // namespace aip
} // namespace capabilityAgents
} // namespace alexaClientSDK