2018 lines
87 KiB
C++
2018 lines
87 KiB
C++
/*
|
|
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
* A copy of the License is located at
|
|
*
|
|
* http://aws.amazon.com/apache2.0/
|
|
*
|
|
* or in the "license" file accompanying this file. This file is distributed
|
|
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
|
* express or implied. See the License for the specific language governing
|
|
* permissions and limitations under the License.
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <cctype>
|
|
#include <functional>
|
|
#include <list>
|
|
#include <sstream>
|
|
|
|
#include <AVSCommon/AVS/CapabilityConfiguration.h>
|
|
#include <AVSCommon/AVS/FocusState.h>
|
|
#include <AVSCommon/AVS/MessageRequest.h>
|
|
#include <AVSCommon/Utils/JSON/JSONGenerator.h>
|
|
#include <AVSCommon/Utils/JSON/JSONUtils.h>
|
|
#include <AVSCommon/Utils/Logger/Logger.h>
|
|
#include <AVSCommon/Utils/Memory/Memory.h>
|
|
#include <AVSCommon/Utils/Metrics.h>
|
|
#include <AVSCommon/Utils/Metrics/DataPointDurationBuilder.h>
|
|
#include <AVSCommon/Utils/Metrics/DataPointCounterBuilder.h>
|
|
#include <AVSCommon/Utils/Metrics/DataPointStringBuilder.h>
|
|
#include <AVSCommon/Utils/Metrics/MetricEventBuilder.h>
|
|
#include <AVSCommon/Utils/String/StringUtils.h>
|
|
#include <AVSCommon/Utils/UUIDGeneration/UUIDGeneration.h>
|
|
#include <AVSCommon/AVS/Attachment/AttachmentUtils.h>
|
|
#include <Settings/SettingEventMetadata.h>
|
|
#include <Settings/SharedAVSSettingProtocol.h>
|
|
#include <SpeechEncoder/SpeechEncoder.h>
|
|
#include <AVSCommon/AVS/Attachment/DefaultAttachmentReader.h>
|
|
|
|
#include "AIP/AudioInputProcessor.h"
|
|
|
|
namespace alexaClientSDK {
|
|
namespace capabilityAgents {
|
|
namespace aip {
|
|
using namespace avsCommon::avs;
|
|
using namespace avsCommon::utils;
|
|
using namespace avsCommon::utils::logger;
|
|
using namespace avsCommon::utils::metrics;
|
|
using namespace avsCommon::utils::json;
|
|
using namespace avsCommon::sdkInterfaces;
|
|
using namespace std::chrono;
|
|
|
|
/// SpeechRecognizer capability constants
|
|
/// SpeechRecognizer interface type
|
|
static const std::string SPEECHRECOGNIZER_CAPABILITY_INTERFACE_TYPE = "AlexaInterface";
|
|
/// SpeechRecognizer interface name
|
|
static const std::string SPEECHRECOGNIZER_CAPABILITY_INTERFACE_NAME = "SpeechRecognizer";
|
|
|
|
/// SpeechRecognizer interface version
|
|
#ifdef OVERRIDE_SPEECHRECOGNIZER_VERSION
|
|
static const std::string SPEECHRECOGNIZER_CAPABILITY_INTERFACE_VERSION = OVERRIDE_SPEECHRECOGNIZER_VERSION;
|
|
#else
|
|
static const std::string SPEECHRECOGNIZER_CAPABILITY_INTERFACE_VERSION = "2.3";
|
|
#endif
|
|
|
|
/// Configuration key used to give more details about the device configuration.
|
|
static const std::string CAPABILITY_INTERFACE_CONFIGURATIONS_KEY = "configurations";
|
|
|
|
/// Supported wake words key.
|
|
static const std::string CAPABILITY_INTERFACE_WAKE_WORDS_KEY = "wakeWords";
|
|
|
|
/// Supported maximum number of concurrent wakewords key.
|
|
static const std::string CAPABILITY_MAXIMUM_CONCURRENT_WAKEWORDS_KEY = "maximumConcurrentWakeWords";
|
|
|
|
/// The scope key for each wake words set.
|
|
static const std::string CAPABILITY_INTERFACE_SCOPES_KEY = "scopes";
|
|
|
|
/// The wake word values for a given scope.
|
|
static const std::string CAPABILITY_INTERFACE_VALUES_KEY = "values";
|
|
|
|
/// The scope configuration used as default locale wake words support.
|
|
static const std::string CAPABILITY_INTERFACE_DEFAULT_LOCALE = "DEFAULT";
|
|
|
|
/// String to identify log entries originating from this file.
|
|
static const std::string TAG("AudioInputProcessor");
|
|
|
|
/**
|
|
* Create a LogEntry using this file's TAG and the specified event string.
|
|
*
|
|
* @param The event string for this @c LogEntry.
|
|
*/
|
|
#define LX(event) alexaClientSDK::avsCommon::utils::logger::LogEntry(TAG, event)
|
|
|
|
/// The name of the @c FocusManager channel used by @c AudioInputProvider.
|
|
static const std::string CHANNEL_NAME = FocusManagerInterface::DIALOG_CHANNEL_NAME;
|
|
|
|
/// The namespace for this capability agent.
|
|
static const std::string NAMESPACE = "SpeechRecognizer";
|
|
|
|
/// The StopCapture directive signature.
|
|
static const avsCommon::avs::NamespaceAndName STOP_CAPTURE{NAMESPACE, "StopCapture"};
|
|
|
|
/// The ExpectSpeech directive signature.
|
|
static const avsCommon::avs::NamespaceAndName EXPECT_SPEECH{NAMESPACE, "ExpectSpeech"};
|
|
|
|
/// The SetEndOfSpeechOffset directive signature.
|
|
static const avsCommon::avs::NamespaceAndName SET_END_OF_SPEECH_OFFSET{NAMESPACE, "SetEndOfSpeechOffset"};
|
|
|
|
/// The SetWakeWordConfirmation directive signature.
|
|
static const avsCommon::avs::NamespaceAndName SET_WAKE_WORD_CONFIRMATION{NAMESPACE, "SetWakeWordConfirmation"};
|
|
|
|
/// The SetSpeechConfirmation directive signature.
|
|
static const avsCommon::avs::NamespaceAndName SET_SPEECH_CONFIRMATION{NAMESPACE, "SetSpeechConfirmation"};
|
|
|
|
/// The SetWakeWords directive signature.
|
|
static const avsCommon::avs::NamespaceAndName SET_WAKE_WORDS{NAMESPACE, "SetWakeWords"};
|
|
|
|
/// The field identifying the initiator.
|
|
static const std::string INITIATOR_KEY = "initiator";
|
|
|
|
/// The field identifying the initiator's profile.
|
|
static const std::string PROFILE_KEY = "profile";
|
|
|
|
/// The field identifying the initiator's format.
|
|
static const std::string FORMAT_KEY = "format";
|
|
|
|
/// The field identifying the initiator's type.
|
|
static const std::string TYPE_KEY = "type";
|
|
|
|
/// The field identifying the initiator's token.
|
|
static const std::string TOKEN_KEY = "token";
|
|
|
|
/// The field identifying the initiator's payload.
|
|
static const std::string PAYLOAD_KEY = "payload";
|
|
|
|
/// The field identifying the initiator's wakeword indices.
|
|
static const std::string WAKEWORD_INDICES_KEY = "wakeWordIndices";
|
|
|
|
/// The field identifying the initiator's wakeword start index.
|
|
static const std::string START_INDEX_KEY = "startIndexInSamples";
|
|
|
|
/// The field identifying the initiator's wakeword end index.
|
|
static const std::string END_INDEX_KEY = "endIndexInSamples";
|
|
|
|
/// The field identifying the initiator's wake word.
|
|
static const std::string WAKE_WORD_KEY = "wakeWord";
|
|
|
|
/// The field name for the user voice attachment.
|
|
static const std::string AUDIO_ATTACHMENT_FIELD_NAME = "audio";
|
|
|
|
/// The field name for the wake word engine metadata.
|
|
static const std::string KWD_METADATA_FIELD_NAME = "wakewordEngineMetadata";
|
|
|
|
/// The field name for the start of speech timestamp, reported in milliseconds since epoch. This field is provided to
|
|
/// the Recognize event and it's sent back as part of SetEndOfSpeechOffset payload.
|
|
static const std::string START_OF_SPEECH_TIMESTAMP_FIELD_NAME = "startOfSpeechTimestamp";
|
|
|
|
/// The field name for the end of speech offset, reported in milliseconds, as part of SetEndOfSpeechOffset payload.
|
|
static const std::string END_OF_SPEECH_OFFSET_FIELD_NAME = "endOfSpeechOffsetInMilliseconds";
|
|
|
|
/// The value of the WakeWordConfirmationChanged Event name.
|
|
static const std::string WAKE_WORD_CONFIRMATION_CHANGED_EVENT_NAME = "WakeWordConfirmationChanged";
|
|
/// The value of the WakeWordConfirmationReport Event name.
|
|
static const std::string WAKE_WORD_CONFIRMATION_REPORT_EVENT_NAME = "WakeWordConfirmationReport";
|
|
/// The value of the payload key for wakeWordConfirmation.
|
|
static const std::string WAKE_WORD_CONFIRMATION_PAYLOAD_KEY = "wakeWordConfirmation";
|
|
|
|
/// The value of the SpeechConfirmationChanged Event name.
|
|
static const std::string SPEECH_CONFIRMATION_CHANGED_EVENT_NAME = "SpeechConfirmationChanged";
|
|
/// The value of the SpeechConfirmationReport Event name.
|
|
static const std::string SPEECH_CONFIRMATION_REPORT_EVENT_NAME = "SpeechConfirmationReport";
|
|
/// The value of the payload key for speechConfirmation.
|
|
static const std::string SPEECH_CONFIRMATION_PAYLOAD_KEY = "speechConfirmation";
|
|
|
|
/// The value of the WakeWordsChanged Event name.
|
|
static const std::string WAKE_WORDS_CHANGED_EVENT_NAME = "WakeWordsChanged";
|
|
/// The value of the WakeWordsReport Event name.
|
|
static const std::string WAKE_WORDS_REPORT_EVENT_NAME = "WakeWordsReport";
|
|
/// The value of the payload key for wake words.
|
|
static const std::string WAKE_WORDS_PAYLOAD_KEY = "wakeWords";
|
|
|
|
/// The component name of power management
|
|
static const std::string POWER_RESOURCE_COMPONENT_NAME = "AudioInputProcessor";
|
|
|
|
/// Metric Activity Name Prefix for AIP metric source
|
|
static const std::string METRIC_ACTIVITY_NAME_PREFIX_AIP = "AIP-";
|
|
|
|
/// Start of Utterance Activity Name for AIP metric source
|
|
static const std::string START_OF_UTTERANCE = "START_OF_UTTERANCE";
|
|
static const std::string START_OF_UTTERANCE_ACTIVITY_NAME = METRIC_ACTIVITY_NAME_PREFIX_AIP + START_OF_UTTERANCE;
|
|
|
|
/// Name of audio bytes stream metric Opus
|
|
static const std::string WAKEWORD_DETECTION_SEGMENT_UPLOADED_OPUS = "AIP_WAKEWORD_DETECTION_SEGMENT_UPLOADED_OPUS";
|
|
|
|
/// Name of audio bytes stream metric PCM
|
|
static const std::string WAKEWORD_DETECTION_SEGMENT_UPLOADED_PCM = "AIP_WAKEWORD_DETECTION_SEGMENT_UPLOADED_PCM";
|
|
|
|
/// Recognize EVENT is built for AIP metric source
|
|
static const std::string RECOGNIZE_START_SEND_MESSAGE = "RECOGNIZE_EVENT_IS_BUILT";
|
|
|
|
/// Wakeword Activity Name for AIP metric source
|
|
static const std::string START_OF_STREAM_TIMESTAMP = "START_OF_STREAM_TIMESTAMP";
|
|
static const std::string WW_DURATION = "WW_DURATION";
|
|
static const std::string WW_DURATION_ACTIVITY_NAME = METRIC_ACTIVITY_NAME_PREFIX_AIP + WW_DURATION;
|
|
|
|
/// Stop Capture Received Activity Name for AIP metric source
|
|
static const std::string STOP_CAPTURE_RECEIVED = "STOP_CAPTURE";
|
|
static const std::string STOP_CAPTURE_RECEIVED_ACTIVITY_NAME = METRIC_ACTIVITY_NAME_PREFIX_AIP + STOP_CAPTURE_RECEIVED;
|
|
|
|
/// The duration metric for acquire power resource
|
|
static const std::string ACQUIRE_POWER_RESOURCE = "ACQUIRE_POWER_RESOURCE";
|
|
static const std::string ACQUIRE_POWER_RESOURCE_ACTIVITY = METRIC_ACTIVITY_NAME_PREFIX_AIP + ACQUIRE_POWER_RESOURCE;
|
|
|
|
/// The duration metric for release power resource
|
|
static const std::string RELEASE_POWER_RESOURCE = "RELEASE_POWER_RESOURCE";
|
|
static const std::string RELEASE_POWER_RESOURCE_ACTIVITY = METRIC_ACTIVITY_NAME_PREFIX_AIP + RELEASE_POWER_RESOURCE;
|
|
|
|
/// End of Speech Offset Received Activity Name for AIP metric source
|
|
static const std::string END_OF_SPEECH_OFFSET_RECEIVED = "END_OF_SPEECH_OFFSET";
|
|
static const std::string END_OF_SPEECH_OFFSET_RECEIVED_ACTIVITY_NAME =
|
|
METRIC_ACTIVITY_NAME_PREFIX_AIP + END_OF_SPEECH_OFFSET_RECEIVED;
|
|
|
|
/// The duration metric for short time out
|
|
static const std::string STOP_CAPTURE_TO_END_OF_SPEECH_METRIC_NAME = "STOP_CAPTURE_TO_END_OF_SPEECH";
|
|
static const std::string STOP_CAPTURE_TO_END_OF_SPEECH_ACTIVITY_NAME =
|
|
METRIC_ACTIVITY_NAME_PREFIX_AIP + STOP_CAPTURE_TO_END_OF_SPEECH_METRIC_NAME;
|
|
|
|
/// The recognize request initiator metric.
|
|
static const std::string INITIATOR_PREFIX = "INITIATOR_";
|
|
static const std::string INITIATOR_ACTIVITY_NAME_PREFIX = METRIC_ACTIVITY_NAME_PREFIX_AIP + INITIATOR_PREFIX;
|
|
|
|
/// AudioEncodingFormat Activity Name for AIP metric source
|
|
static const std::string AUDIO_ENCODING_FORMAT_METRIC_NAME = "AUDIO_ENCODING_FORMAT";
|
|
static const std::string AUDIO_ENCODING_FORMAT_ACTIVITY_NAME =
|
|
METRIC_ACTIVITY_NAME_PREFIX_AIP + AUDIO_ENCODING_FORMAT_METRIC_NAME;
|
|
static const std::string AUDIO_ENCODING_FORMAT_OPUS = "OPUSAudioEncoding";
|
|
static const std::string AUDIO_ENCODING_FORMAT_LPCM = "LPCMAudioEncoding";
|
|
|
|
/// The default resolveKey used as a placeholder when only one encoding format is configured for @c AudioInputProcessor
|
|
static const std::string DEFAULT_RESOLVE_KEY = "DEFAULT_RESOLVE_KEY";
|
|
|
|
/// Keys for instance entry metric specific fields
|
|
static const std::string ENTRY_METRIC_ACTOR_NAME = "AudioInputProcessor";
|
|
static const std::string ENTRY_METRIC_ACTIVITY_NAME = METRIC_ACTIVITY_NAME_PREFIX_AIP + ENTRY_METRIC_ACTOR_NAME;
|
|
static const std::string ENTRY_METRIC_KEY_SEGMENT_ID = "segment_id";
|
|
static const std::string ENTRY_METRIC_KEY_ACTOR = "actor";
|
|
static const std::string ENTRY_METRIC_KEY_ENTRY_TYPE = "entry_type";
|
|
static const std::string ENTRY_METRIC_KEY_ENTRY_NAME = "entry_name";
|
|
static const std::string ENTRY_METRIC_NAME_STATE_CHANGE = "StateChange";
|
|
|
|
/// Preroll duration is a fixed 500ms.
|
|
static const std::chrono::milliseconds PREROLL_DURATION = std::chrono::milliseconds(500);
|
|
|
|
static const int MILLISECONDS_PER_SECOND = 1000;
|
|
|
|
/// Threshold number of bytes for OPUS Encoded Wakeword detection
|
|
static const int WAKEWORD_DETECTION_SEGMENT_SIZE_BYTES_OPUS = 5209;
|
|
/// Threshold number of bytes for PCM Encoded Wakeword detection
|
|
static const int WAKEWORD_DETECTION_SEGMENT_SIZE_BYTES_PCM = 40480;
|
|
|
|
/**
|
|
* Helper function to get string values of encoding audio format, which are used in Recognize event.
|
|
* @param encoding Target encoding format
|
|
* @return String name of the encoding format
|
|
*/
|
|
static std::string encodingFormatToString(avsCommon::utils::AudioFormat::Encoding encoding) {
|
|
switch (encoding) {
|
|
case avsCommon::utils::AudioFormat::Encoding::OPUS:
|
|
return "OPUS";
|
|
case avsCommon::utils::AudioFormat::Encoding::LPCM:
|
|
return "AUDIO_L16_RATE_16000_CHANNELS_1";
|
|
}
|
|
return "UNKNOWN";
|
|
}
|
|
|
|
/**
|
|
* Handles a Metric event by creating and recording it. Failure to create or record the event results
|
|
* in an early return.
|
|
*
|
|
* @param metricRecorder The @c MetricRecorderInterface which records Metric events.
|
|
* @param metricEventBuilder The @c MetricEventBuilder.
|
|
* @param dialogRequestId The dialogRequestId associated with this Metric event; default is empty string.
|
|
*/
|
|
static void submitMetric(
|
|
const std::shared_ptr<MetricRecorderInterface>& metricRecorder,
|
|
MetricEventBuilder& metricEventBuilder,
|
|
const std::string& dialogRequestId = "") {
|
|
metricEventBuilder.addDataPoint(
|
|
DataPointStringBuilder{}.setName("DIALOG_REQUEST_ID").setValue(dialogRequestId).build());
|
|
|
|
auto metricEvent = metricEventBuilder.build();
|
|
|
|
if (metricEvent == nullptr) {
|
|
ACSDK_ERROR(LX("Error creating metric with explicit dialogRequestId"));
|
|
return;
|
|
}
|
|
recordMetric(metricRecorder, metricEvent);
|
|
}
|
|
|
|
/**
|
|
* Handles a Metric event by creating and recording it. Failure to create or record the event results
|
|
* in an early return.
|
|
*
|
|
* @param metricRecorder The @c MetricRecorderInterface which records Metric events.
|
|
* @param activityName The activityName of the Metric event.
|
|
* @param dataPoint The @c DataPoint of this Metric event (e.g. duration).
|
|
* @param directive The @c AVSDirective associated with this Metric event; default is nullptr.
|
|
*/
|
|
static void submitMetric(
|
|
const std::shared_ptr<MetricRecorderInterface> metricRecorder,
|
|
const std::string& activityName,
|
|
const DataPoint& dataPoint,
|
|
const std::shared_ptr<AVSDirective>& directive = nullptr) {
|
|
auto metricEventBuilder = MetricEventBuilder{}.setActivityName(activityName).addDataPoint(dataPoint);
|
|
|
|
if (directive != nullptr) {
|
|
metricEventBuilder.addDataPoint(
|
|
DataPointStringBuilder{}.setName("HTTP2_STREAM").setValue(directive->getAttachmentContextId()).build());
|
|
metricEventBuilder.addDataPoint(
|
|
DataPointStringBuilder{}.setName("DIRECTIVE_MESSAGE_ID").setValue(directive->getMessageId()).build());
|
|
metricEventBuilder.addDataPoint(
|
|
DataPointStringBuilder{}.setName("DIALOG_REQUEST_ID").setValue(directive->getDialogRequestId()).build());
|
|
}
|
|
|
|
auto metricEvent = metricEventBuilder.build();
|
|
|
|
if (metricEvent == nullptr) {
|
|
ACSDK_ERROR(LX("Error creating metric from directive"));
|
|
return;
|
|
}
|
|
recordMetric(metricRecorder, metricEvent);
|
|
}
|
|
|
|
/**
|
|
* Creates and records an instance entry metric with the given identifiers and metadata.
|
|
* @param metricRecorder The @c MetricRecorderInterface which records Metric events.
|
|
* @param segmentId The segmentId corresponding to this metric event.
|
|
* @param name The name of this metric
|
|
* @param metadata Any metadata to be associated with this metric; default is empty
|
|
*/
|
|
static void submitInstanceEntryMetric(
|
|
const std::shared_ptr<MetricRecorderInterface>& metricRecorder,
|
|
const std::string& segmentId,
|
|
const std::string& name,
|
|
const std::map<std::string, std::string>& metadata = {}) {
|
|
if (segmentId.empty() || name.empty()) {
|
|
ACSDK_ERROR(LX(__FUNCTION__).m("Unable to create instance metric").d("segmentId", segmentId).d("name", name));
|
|
return;
|
|
}
|
|
|
|
auto metricBuilder = MetricEventBuilder{}.setActivityName(ENTRY_METRIC_ACTIVITY_NAME);
|
|
metricBuilder.addDataPoint(
|
|
DataPointStringBuilder{}.setName(ENTRY_METRIC_KEY_SEGMENT_ID).setValue(segmentId).build());
|
|
metricBuilder.addDataPoint(
|
|
DataPointStringBuilder{}.setName(ENTRY_METRIC_KEY_ACTOR).setValue(ENTRY_METRIC_ACTOR_NAME).build());
|
|
metricBuilder.addDataPoint(DataPointStringBuilder{}.setName(ENTRY_METRIC_KEY_ENTRY_NAME).setValue(name).build());
|
|
metricBuilder.addDataPoint(
|
|
DataPointStringBuilder{}.setName(ENTRY_METRIC_KEY_ENTRY_TYPE).setValue("INSTANCE").build());
|
|
for (auto const& pair : metadata) {
|
|
metricBuilder.addDataPoint(DataPointStringBuilder{}.setName(pair.first).setValue(pair.second).build());
|
|
}
|
|
auto metric = metricBuilder.build();
|
|
if (metric == nullptr) {
|
|
ACSDK_ERROR(LX(__FUNCTION__).m("Error creating instance entry metric."));
|
|
return;
|
|
}
|
|
recordMetric(metricRecorder, metric);
|
|
}
|
|
|
|
/**
|
|
* Creates the SpeechRecognizer capability configuration.
|
|
*
|
|
* @param assetsManager Responsible for retrieving and changing the wake words and locale.
|
|
* @return The SpeechRecognizer capability configuration.
|
|
*/
|
|
static std::shared_ptr<avsCommon::avs::CapabilityConfiguration> getSpeechRecognizerCapabilityConfiguration(
|
|
const LocaleAssetsManagerInterface& assetsManager);
|
|
|
|
/**
|
|
* Resolving method to be called in lambda function that will be passed to @c MessageRequest as @c
|
|
* MessageRequestResolveFunction
|
|
* @param[in,out] request @c EditableMessageRequest to resolve
|
|
* @param resolveKey Provided resolve key
|
|
* @param encodingFormats Mappings from resolveKey to encoding format
|
|
* @param attachmentReaders Mappings from resolveKey to attachment readers
|
|
* @param metricRecorder @c MetricRecorderInterface object to emit metrics
|
|
* @return @c true if the message is resolved successfully, else @c false
|
|
*/
|
|
static bool resolveMessageRequest(
|
|
const std::shared_ptr<avsCommon::avs::EditableMessageRequest>& request,
|
|
const std::string& resolveKey,
|
|
AudioInputProcessor::EncodingFormatResponse encodingFormats,
|
|
const std::unordered_map<std::string, std::vector<std::shared_ptr<avsCommon::avs::MessageRequest::NamedReader>>>&
|
|
attachmentReaders,
|
|
const std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface>& metricRecorder);
|
|
|
|
std::shared_ptr<AudioInputProcessor> AudioInputProcessor::create(
|
|
std::shared_ptr<DirectiveSequencerInterface> directiveSequencer,
|
|
std::shared_ptr<MessageSenderInterface> messageSender,
|
|
std::shared_ptr<ContextManagerInterface> contextManager,
|
|
std::shared_ptr<FocusManagerInterface> focusManager,
|
|
std::shared_ptr<avsCommon::avs::DialogUXStateAggregator> dialogUXStateAggregator,
|
|
std::shared_ptr<ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
|
|
std::shared_ptr<UserInactivityMonitorInterface> userInactivityMonitor,
|
|
std::shared_ptr<SystemSoundPlayerInterface> systemSoundPlayer,
|
|
const std::shared_ptr<LocaleAssetsManagerInterface>& assetsManager,
|
|
std::shared_ptr<settings::WakeWordConfirmationSetting> wakeWordConfirmation,
|
|
std::shared_ptr<settings::SpeechConfirmationSetting> speechConfirmation,
|
|
const std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface>& capabilityChangeNotifier,
|
|
std::shared_ptr<settings::WakeWordsSetting> wakeWordsSetting,
|
|
std::shared_ptr<speechencoder::SpeechEncoder> speechEncoder,
|
|
AudioProvider defaultAudioProvider,
|
|
std::shared_ptr<PowerResourceManagerInterface> powerResourceManager,
|
|
std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> metricRecorder,
|
|
const std::shared_ptr<ExpectSpeechTimeoutHandler>& expectSpeechTimeoutHandler) {
|
|
if (!directiveSequencer) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullDirectiveSequencer"));
|
|
return nullptr;
|
|
} else if (!messageSender) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullMessageSender"));
|
|
return nullptr;
|
|
} else if (!contextManager) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullContextManager"));
|
|
return nullptr;
|
|
} else if (!focusManager) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullFocusManager"));
|
|
return nullptr;
|
|
} else if (!dialogUXStateAggregator) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullDialogUXStateAggregator"));
|
|
return nullptr;
|
|
} else if (!exceptionEncounteredSender) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullExceptionEncounteredSender"));
|
|
return nullptr;
|
|
} else if (!userInactivityMonitor) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullUserInactivityMonitor"));
|
|
return nullptr;
|
|
} else if (!systemSoundPlayer) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullSystemSoundPlayer"));
|
|
return nullptr;
|
|
} else if (!wakeWordConfirmation) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullWakeWordsConfirmation"));
|
|
return nullptr;
|
|
} else if (!speechConfirmation) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullSpeechConfirmation"));
|
|
return nullptr;
|
|
} else if (!assetsManager) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullAssetsManager"));
|
|
return nullptr;
|
|
} else if (!assetsManager->getDefaultSupportedWakeWords().empty() && !wakeWordsSetting) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullWakeWordsSetting"));
|
|
return nullptr;
|
|
} else if (!capabilityChangeNotifier) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "nullCapabilityChangeNotifier"));
|
|
return nullptr;
|
|
}
|
|
|
|
auto capabilitiesConfiguration = getSpeechRecognizerCapabilityConfiguration(*assetsManager);
|
|
if (!capabilitiesConfiguration) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "unableToCreateCapabilitiesConfiguration"));
|
|
return nullptr;
|
|
}
|
|
auto aip = std::shared_ptr<AudioInputProcessor>(new AudioInputProcessor(
|
|
directiveSequencer,
|
|
messageSender,
|
|
contextManager,
|
|
focusManager,
|
|
exceptionEncounteredSender,
|
|
userInactivityMonitor,
|
|
systemSoundPlayer,
|
|
assetsManager,
|
|
speechEncoder,
|
|
defaultAudioProvider,
|
|
wakeWordConfirmation,
|
|
speechConfirmation,
|
|
capabilityChangeNotifier,
|
|
wakeWordsSetting,
|
|
capabilitiesConfiguration,
|
|
powerResourceManager,
|
|
std::move(metricRecorder),
|
|
expectSpeechTimeoutHandler));
|
|
|
|
if (!aip->initialize()) {
|
|
ACSDK_ERROR(LX("createFailed").d("reason", "unableToInitialize"));
|
|
return nullptr;
|
|
}
|
|
|
|
if (aip) {
|
|
dialogUXStateAggregator->addObserver(aip);
|
|
}
|
|
return aip;
|
|
}
|
|
|
|
avsCommon::avs::DirectiveHandlerConfiguration AudioInputProcessor::getConfiguration() const {
|
|
avsCommon::avs::DirectiveHandlerConfiguration configuration;
|
|
configuration[STOP_CAPTURE] = BlockingPolicy(BlockingPolicy::MEDIUMS_NONE, false);
|
|
configuration[EXPECT_SPEECH] = BlockingPolicy(BlockingPolicy::MEDIUM_AUDIO, true);
|
|
configuration[SET_END_OF_SPEECH_OFFSET] = BlockingPolicy(BlockingPolicy::MEDIUMS_NONE, false);
|
|
configuration[SET_WAKE_WORD_CONFIRMATION] = BlockingPolicy(BlockingPolicy::MEDIUMS_NONE, false);
|
|
configuration[SET_SPEECH_CONFIRMATION] = BlockingPolicy(BlockingPolicy::MEDIUMS_NONE, false);
|
|
configuration[SET_WAKE_WORDS] = BlockingPolicy(BlockingPolicy::MEDIUMS_NONE, false);
|
|
return configuration;
|
|
}
|
|
|
|
void AudioInputProcessor::addObserver(std::shared_ptr<ObserverInterface> observer) {
|
|
if (!observer) {
|
|
ACSDK_ERROR(LX("addObserverFailed").d("reason", "nullObserver"));
|
|
return;
|
|
}
|
|
m_executor.submit([this, observer]() { m_observers.insert(observer); });
|
|
}
|
|
|
|
void AudioInputProcessor::removeObserver(std::shared_ptr<ObserverInterface> observer) {
|
|
if (!observer) {
|
|
ACSDK_ERROR(LX("removeObserverFailed").d("reason", "nullObserver"));
|
|
return;
|
|
}
|
|
m_executor.submit([this, observer]() { m_observers.erase(observer); }).wait();
|
|
}
|
|
|
|
std::future<bool> AudioInputProcessor::recognize(
|
|
AudioProvider audioProvider,
|
|
Initiator initiator,
|
|
steady_clock::time_point startOfSpeechTimestamp,
|
|
avsCommon::avs::AudioInputStream::Index begin,
|
|
avsCommon::avs::AudioInputStream::Index keywordEnd,
|
|
std::string keyword,
|
|
std::shared_ptr<const std::vector<char>> KWDMetadata,
|
|
const std::string& initiatorToken) {
|
|
ACSDK_METRIC_IDS(TAG, "Recognize", "", "", Metrics::Location::AIP_RECEIVE);
|
|
ACSDK_DEBUG5(LX(__func__));
|
|
|
|
std::string upperCaseKeyword = keyword;
|
|
std::transform(upperCaseKeyword.begin(), upperCaseKeyword.end(), upperCaseKeyword.begin(), ::toupper);
|
|
if (KEYWORD_TEXT_STOP == upperCaseKeyword) {
|
|
ACSDK_DEBUG(LX("skippingRecognizeEvent").d("reason", "invalidKeyword").d("keyword", keyword));
|
|
std::promise<bool> ret;
|
|
ret.set_value(false);
|
|
return ret.get_future();
|
|
}
|
|
|
|
// If no begin index was provided, grab the current index ASAP so that we can start streaming from the time this
|
|
// call was made.
|
|
if (audioProvider.stream && INVALID_INDEX == begin) {
|
|
static const bool startWithNewData = true;
|
|
auto reader = audioProvider.stream->createReader(
|
|
avsCommon::avs::AudioInputStream::Reader::Policy::NONBLOCKING, startWithNewData);
|
|
if (!reader) {
|
|
ACSDK_ERROR(LX("recognizeFailed").d("reason", "createReaderFailed"));
|
|
std::promise<bool> ret;
|
|
ret.set_value(false);
|
|
return ret.get_future();
|
|
}
|
|
begin = reader->tell();
|
|
}
|
|
|
|
return m_executor.submit([this,
|
|
audioProvider,
|
|
initiator,
|
|
startOfSpeechTimestamp,
|
|
begin,
|
|
keywordEnd,
|
|
keyword,
|
|
KWDMetadata,
|
|
initiatorToken]() {
|
|
return executeRecognize(
|
|
audioProvider, initiator, startOfSpeechTimestamp, begin, keywordEnd, keyword, KWDMetadata, initiatorToken);
|
|
});
|
|
}
|
|
|
|
std::future<bool> AudioInputProcessor::stopCapture() {
|
|
return m_executor.submit([this]() { return executeStopCapture(); });
|
|
}
|
|
|
|
std::future<void> AudioInputProcessor::resetState() {
|
|
ACSDK_DEBUG0(LX(__func__));
|
|
return m_executor.submit([this]() { executeResetState(); });
|
|
}
|
|
|
|
void AudioInputProcessor::onContextAvailable(const std::string& jsonContext) {
|
|
m_executor.submit([this, jsonContext]() { executeOnContextAvailable(jsonContext); });
|
|
}
|
|
|
|
void AudioInputProcessor::onContextFailure(const ContextRequestError error) {
|
|
m_executor.submit([this, error]() { executeOnContextFailure(error); });
|
|
}
|
|
|
|
void AudioInputProcessor::handleDirectiveImmediately(std::shared_ptr<avsCommon::avs::AVSDirective> directive) {
|
|
handleDirective(std::make_shared<DirectiveInfo>(directive, nullptr));
|
|
}
|
|
|
|
void AudioInputProcessor::preHandleDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
}
|
|
|
|
void AudioInputProcessor::handleDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
if (!info) {
|
|
ACSDK_ERROR(LX("handleDirectiveFailed").d("reason", "nullDirectiveInfo"));
|
|
return;
|
|
}
|
|
|
|
if (!info->directive) {
|
|
ACSDK_ERROR(LX("handleDirectiveFailed").d("reason", "nullDirective"));
|
|
return;
|
|
}
|
|
|
|
if (info->directive->getName() == STOP_CAPTURE.name) {
|
|
ACSDK_METRIC_MSG(TAG, info->directive, Metrics::Location::AIP_RECEIVE);
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
STOP_CAPTURE_RECEIVED_ACTIVITY_NAME,
|
|
DataPointCounterBuilder{}.setName(STOP_CAPTURE_RECEIVED).increment(1).build(),
|
|
info->directive);
|
|
handleStopCaptureDirective(info);
|
|
} else if (info->directive->getName() == EXPECT_SPEECH.name) {
|
|
handleExpectSpeechDirective(info);
|
|
} else if (info->directive->getName() == SET_END_OF_SPEECH_OFFSET.name) {
|
|
handleSetEndOfSpeechOffsetDirective(info);
|
|
} else if (info->directive->getName() == SET_WAKE_WORD_CONFIRMATION.name) {
|
|
handleSetWakeWordConfirmation(info);
|
|
} else if (info->directive->getName() == SET_SPEECH_CONFIRMATION.name) {
|
|
handleSetSpeechConfirmation(info);
|
|
} else if (info->directive->getName() == SET_WAKE_WORDS.name) {
|
|
handleSetWakeWords(info);
|
|
} else {
|
|
std::string errorMessage =
|
|
"unexpected directive " + info->directive->getNamespace() + ":" + info->directive->getName();
|
|
m_exceptionEncounteredSender->sendExceptionEncountered(
|
|
info->directive->getUnparsedDirective(),
|
|
avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED,
|
|
errorMessage);
|
|
if (info->result) {
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
ACSDK_ERROR(LX("handleDirectiveFailed")
|
|
.d("reason", "unknownDirective")
|
|
.d("namespace", info->directive->getNamespace())
|
|
.d("name", info->directive->getName()));
|
|
removeDirective(info);
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::cancelDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
removeDirective(info);
|
|
}
|
|
|
|
void AudioInputProcessor::onDeregistered() {
|
|
resetState();
|
|
}
|
|
|
|
void AudioInputProcessor::onFocusChanged(avsCommon::avs::FocusState newFocus, avsCommon::avs::MixingBehavior behavior) {
|
|
ACSDK_DEBUG9(LX("onFocusChanged").d("newFocus", newFocus).d("MixingBehavior", behavior));
|
|
m_executor.submit([this, newFocus]() { executeOnFocusChanged(newFocus); });
|
|
}
|
|
|
|
void AudioInputProcessor::onDialogUXStateChanged(DialogUXStateObserverInterface::DialogUXState newState) {
|
|
m_executor.submit([this, newState]() { executeOnDialogUXStateChanged(newState); });
|
|
}
|
|
|
|
AudioInputProcessor::AudioInputProcessor(
|
|
std::shared_ptr<DirectiveSequencerInterface> directiveSequencer,
|
|
std::shared_ptr<MessageSenderInterface> messageSender,
|
|
std::shared_ptr<ContextManagerInterface> contextManager,
|
|
std::shared_ptr<FocusManagerInterface> focusManager,
|
|
std::shared_ptr<ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
|
|
std::shared_ptr<UserInactivityMonitorInterface> userInactivityMonitor,
|
|
std::shared_ptr<SystemSoundPlayerInterface> systemSoundPlayer,
|
|
const std::shared_ptr<LocaleAssetsManagerInterface>& assetsManager,
|
|
std::shared_ptr<speechencoder::SpeechEncoder> speechEncoder,
|
|
AudioProvider defaultAudioProvider,
|
|
std::shared_ptr<settings::WakeWordConfirmationSetting> wakeWordConfirmation,
|
|
std::shared_ptr<settings::SpeechConfirmationSetting> speechConfirmation,
|
|
const std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface>& capabilityChangeNotifier,
|
|
std::shared_ptr<settings::WakeWordsSetting> wakeWordsSetting,
|
|
std::shared_ptr<avsCommon::avs::CapabilityConfiguration> capabilitiesConfiguration,
|
|
std::shared_ptr<PowerResourceManagerInterface> powerResourceManager,
|
|
std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> metricRecorder,
|
|
const std::shared_ptr<ExpectSpeechTimeoutHandler>& expectSpeechTimeoutHandler) :
|
|
CapabilityAgent{NAMESPACE, exceptionEncounteredSender},
|
|
RequiresShutdown{"AudioInputProcessor"},
|
|
m_metricRecorder{metricRecorder},
|
|
m_directiveSequencer{directiveSequencer},
|
|
m_messageSender{messageSender},
|
|
m_contextManager{contextManager},
|
|
m_focusManager{focusManager},
|
|
m_userInactivityMonitor{userInactivityMonitor},
|
|
m_encoder{speechEncoder},
|
|
m_defaultAudioProvider{defaultAudioProvider},
|
|
m_lastAudioProvider{AudioProvider::null()},
|
|
m_state{ObserverInterface::State::IDLE},
|
|
m_focusState{avsCommon::avs::FocusState::NONE},
|
|
m_preparingToSend{false},
|
|
m_initialDialogUXStateReceived{false},
|
|
m_localStopCapturePerformed{false},
|
|
m_streamIsClosedInRecognizingState{false},
|
|
m_systemSoundPlayer{systemSoundPlayer},
|
|
m_assetsManager{assetsManager},
|
|
m_precedingExpectSpeechInitiator{nullptr},
|
|
m_wakeWordConfirmation{wakeWordConfirmation},
|
|
m_speechConfirmation{speechConfirmation},
|
|
m_capabilityChangeNotifier{capabilityChangeNotifier},
|
|
m_wakeWordsSetting{wakeWordsSetting},
|
|
m_powerResourceManager{powerResourceManager},
|
|
m_expectSpeechTimeoutHandler{expectSpeechTimeoutHandler},
|
|
m_timeSinceLastResumeMS{std::chrono::milliseconds(0)},
|
|
m_timeSinceLastPartialMS{std::chrono::milliseconds(0)},
|
|
m_resourceFlags{0},
|
|
m_usingEncoder{false},
|
|
m_messageRequestResolver{nullptr},
|
|
m_encodingAudioFormats{{DEFAULT_RESOLVE_KEY, AudioFormat::Encoding::LPCM}} {
|
|
m_capabilityConfigurations.insert(capabilitiesConfiguration);
|
|
|
|
if (m_powerResourceManager) {
|
|
m_powerResourceId = m_powerResourceManager->create(
|
|
POWER_RESOURCE_COMPONENT_NAME, false, PowerResourceManagerInterface::PowerResourceLevel::ACTIVE_HIGH);
|
|
if (!m_powerResourceId) {
|
|
ACSDK_ERROR(
|
|
LX(__func__).d("reason", "createRawPowerResourceFailed").d("name", POWER_RESOURCE_COMPONENT_NAME));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate supported wake words json capability configuration for a given scope (default, language or locale).
|
|
*
|
|
* @param scope The scope being reported.
|
|
* @param wakeWordsCombination The set of a combination of wake words supported in the given scope.
|
|
* @return A json representation of the scope configuration.
|
|
*/
|
|
std::string generateSupportedWakeWordsJson(
|
|
const std::string& scope,
|
|
const LocaleAssetsManagerInterface::WakeWordsSets& wakeWordsCombination) {
|
|
json::JsonGenerator generator;
|
|
generator.addStringArray(CAPABILITY_INTERFACE_SCOPES_KEY, std::list<std::string>({scope}));
|
|
generator.addCollectionOfStringArray(CAPABILITY_INTERFACE_VALUES_KEY, wakeWordsCombination);
|
|
return generator.toString();
|
|
}
|
|
|
|
std::shared_ptr<avsCommon::avs::CapabilityConfiguration> getSpeechRecognizerCapabilityConfiguration(
|
|
const LocaleAssetsManagerInterface& assetsManager) {
|
|
std::unordered_map<std::string, std::string> configMap;
|
|
configMap.insert({CAPABILITY_INTERFACE_TYPE_KEY, SPEECHRECOGNIZER_CAPABILITY_INTERFACE_TYPE});
|
|
configMap.insert({CAPABILITY_INTERFACE_NAME_KEY, SPEECHRECOGNIZER_CAPABILITY_INTERFACE_NAME});
|
|
configMap.insert({CAPABILITY_INTERFACE_VERSION_KEY, SPEECHRECOGNIZER_CAPABILITY_INTERFACE_VERSION});
|
|
|
|
// Generate wake words capability configuration if supportedWakeWords is not empty.
|
|
auto defaultWakeWords = assetsManager.getDefaultSupportedWakeWords();
|
|
if (!defaultWakeWords.empty()) {
|
|
std::set<std::string> wakeWords;
|
|
wakeWords.insert(generateSupportedWakeWordsJson(CAPABILITY_INTERFACE_DEFAULT_LOCALE, defaultWakeWords));
|
|
for (const auto& entry : assetsManager.getLanguageSpecificWakeWords()) {
|
|
wakeWords.insert(generateSupportedWakeWordsJson(entry.first, entry.second));
|
|
}
|
|
|
|
for (const auto& entry : assetsManager.getLocaleSpecificWakeWords()) {
|
|
wakeWords.insert(generateSupportedWakeWordsJson(entry.first, entry.second));
|
|
}
|
|
|
|
json::JsonGenerator generator;
|
|
generator.addMembersArray(CAPABILITY_INTERFACE_WAKE_WORDS_KEY, wakeWords);
|
|
configMap.insert({CAPABILITY_INTERFACE_CONFIGURATIONS_KEY, generator.toString()});
|
|
ACSDK_DEBUG5(LX(__func__).d("wakeWords", generator.toString()));
|
|
}
|
|
|
|
return std::make_shared<avsCommon::avs::CapabilityConfiguration>(configMap);
|
|
}
|
|
|
|
settings::SettingEventMetadata AudioInputProcessor::getWakeWordsEventsMetadata() {
|
|
return settings::SettingEventMetadata{
|
|
NAMESPACE, WAKE_WORDS_CHANGED_EVENT_NAME, WAKE_WORDS_REPORT_EVENT_NAME, WAKE_WORDS_PAYLOAD_KEY};
|
|
}
|
|
|
|
void AudioInputProcessor::doShutdown() {
|
|
m_executor.shutdown();
|
|
executeResetState();
|
|
m_directiveSequencer.reset();
|
|
m_messageSender.reset();
|
|
m_contextManager.reset();
|
|
m_focusManager.reset();
|
|
m_userInactivityMonitor.reset();
|
|
m_observers.clear();
|
|
m_expectSpeechTimeoutHandler.reset();
|
|
if (m_powerResourceManager && m_powerResourceId) {
|
|
m_powerResourceManager->close(m_powerResourceId);
|
|
}
|
|
}
|
|
|
|
bool resolveMessageRequest(
|
|
const std::shared_ptr<EditableMessageRequest>& request,
|
|
const std::string& resolveKey,
|
|
AudioInputProcessor::EncodingFormatResponse encodingFormats,
|
|
const std::unordered_map<std::string, std::vector<std::shared_ptr<MessageRequest::NamedReader>>>& attachmentReaders,
|
|
const std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface>& metricRecorder) {
|
|
if (!request) {
|
|
ACSDK_ERROR(LX("Failed to resolve MessageRequest")
|
|
.d("reason", "Null pointer to MessageRequest")
|
|
.d("value", resolveKey));
|
|
return false;
|
|
}
|
|
|
|
if (encodingFormats.find(resolveKey) == encodingFormats.end()) {
|
|
ACSDK_ERROR(LX("Failed to resolve MessageRequest").d("reason", "Invalid resolveKey").d("value", resolveKey));
|
|
return false;
|
|
}
|
|
|
|
rapidjson::Document eventWithContext;
|
|
jsonUtils::parseJSON(request->getJsonContent(), &eventWithContext);
|
|
auto event = eventWithContext.FindMember("event");
|
|
if (event == eventWithContext.MemberEnd()) {
|
|
ACSDK_ERROR(LX("Failed to resolve MessageRequest").d("reason", "No event found in json"));
|
|
return false;
|
|
}
|
|
auto payload = event->value.FindMember("payload");
|
|
if (payload == event->value.MemberEnd()) {
|
|
ACSDK_ERROR(LX("Failed to resolve MessageRequest").d("reason", "No payload in json"));
|
|
return false;
|
|
}
|
|
|
|
rapidjson::Value formatValue;
|
|
auto encodingFormat = encodingFormats.at(resolveKey);
|
|
auto formatString = encodingFormatToString(encodingFormat);
|
|
formatValue.SetString(formatString.c_str(), formatString.length());
|
|
|
|
if (payload->value.FindMember(FORMAT_KEY) != payload->value.MemberEnd()) {
|
|
ACSDK_WARN(LX("Format already exists in Json payload. Replace it with").d("format", formatString));
|
|
payload->value.FindMember(FORMAT_KEY)->value.SetString(formatString.c_str(), eventWithContext.GetAllocator());
|
|
} else {
|
|
payload->value.AddMember(
|
|
rapidjson::StringRef(FORMAT_KEY.c_str()), formatValue, eventWithContext.GetAllocator());
|
|
}
|
|
|
|
// emit metric
|
|
auto metricEvent = MetricEventBuilder{}
|
|
.setActivityName(AUDIO_ENCODING_FORMAT_ACTIVITY_NAME)
|
|
.addDataPoint(DataPointStringBuilder{}.setName("resolveKey").setValue(resolveKey).build());
|
|
switch (encodingFormat) {
|
|
case AudioFormat::Encoding::LPCM:
|
|
submitMetric(
|
|
metricRecorder,
|
|
metricEvent.addDataPoint(
|
|
DataPointCounterBuilder{}.setName(AUDIO_ENCODING_FORMAT_LPCM).increment(1).build()));
|
|
break;
|
|
case AudioFormat::Encoding::OPUS:
|
|
submitMetric(
|
|
metricRecorder,
|
|
metricEvent.addDataPoint(
|
|
DataPointCounterBuilder{}.setName(AUDIO_ENCODING_FORMAT_OPUS).increment(1).build()));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
rapidjson::StringBuffer buffer;
|
|
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
|
|
eventWithContext.Accept(writer);
|
|
|
|
std::string jsonContent(buffer.GetString());
|
|
request->setJsonContent(jsonContent);
|
|
if (attachmentReaders.find(resolveKey) != attachmentReaders.end()) {
|
|
request->setAttachmentReaders(attachmentReaders.at(resolveKey));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
std::future<bool> AudioInputProcessor::expectSpeechTimedOut() {
|
|
return m_executor.submit([this]() { return executeExpectSpeechTimedOut(); });
|
|
}
|
|
|
|
void AudioInputProcessor::handleStopCaptureDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
m_stopCaptureReceivedTime = steady_clock::now();
|
|
m_executor.submit([this, info]() {
|
|
bool stopImmediately = true;
|
|
executeStopCapture(stopImmediately, info);
|
|
});
|
|
}
|
|
|
|
void AudioInputProcessor::handleExpectSpeechDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
int64_t timeout;
|
|
bool found = avsCommon::utils::json::jsonUtils::retrieveValue(
|
|
info->directive->getPayload(), "timeoutInMilliseconds", &timeout);
|
|
|
|
if (!found) {
|
|
static const char* errorMessage = "missing/invalid timeoutInMilliseconds";
|
|
m_exceptionEncounteredSender->sendExceptionEncountered(
|
|
info->directive->getUnparsedDirective(),
|
|
avsCommon::avs::ExceptionErrorType::UNSUPPORTED_OPERATION,
|
|
errorMessage);
|
|
if (info->result) {
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
ACSDK_ERROR(LX("handleExpectSpeechDirectiveFailed")
|
|
.d("reason", "missingJsonField")
|
|
.d("field", "timeoutInMilliseconds"));
|
|
removeDirective(info);
|
|
return;
|
|
}
|
|
|
|
m_executor.submit([this, timeout, info]() { executeExpectSpeech(milliseconds{timeout}, info); });
|
|
}
|
|
|
|
void AudioInputProcessor::handleSetEndOfSpeechOffsetDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
// The duration from StopCapture to SetEndOfSpeechOffset. END_OF_SPEECH_OFFSET_RECEIVED starts from Recognize,
|
|
// it is not helpful for figuring out the best SHORT timeout.
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
STOP_CAPTURE_TO_END_OF_SPEECH_ACTIVITY_NAME,
|
|
DataPointDurationBuilder{duration_cast<milliseconds>(steady_clock::now() - m_stopCaptureReceivedTime)}
|
|
.setName(STOP_CAPTURE_TO_END_OF_SPEECH_METRIC_NAME)
|
|
.build(),
|
|
info->directive);
|
|
|
|
auto payload = info->directive->getPayload();
|
|
int64_t endOfSpeechOffset = 0;
|
|
|
|
std::string startOfSpeechTimeStampInString;
|
|
bool foundEnd = json::jsonUtils::retrieveValue(payload, END_OF_SPEECH_OFFSET_FIELD_NAME, &endOfSpeechOffset);
|
|
bool foundStart =
|
|
json::jsonUtils::retrieveValue(payload, START_OF_SPEECH_TIMESTAMP_FIELD_NAME, &startOfSpeechTimeStampInString);
|
|
|
|
if (foundEnd && foundStart) {
|
|
int64_t startOfSpeechTimestamp = 0;
|
|
auto offset = milliseconds(endOfSpeechOffset);
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
END_OF_SPEECH_OFFSET_RECEIVED_ACTIVITY_NAME,
|
|
DataPointDurationBuilder{offset}.setName(END_OF_SPEECH_OFFSET_RECEIVED).build(),
|
|
info->directive);
|
|
std::istringstream iss{startOfSpeechTimeStampInString};
|
|
iss >> startOfSpeechTimestamp;
|
|
|
|
ACSDK_DEBUG0(LX("handleSetEndOfSpeechOffsetDirective")
|
|
.d("startTimeSpeech(ms)", startOfSpeechTimestamp)
|
|
.d("endTimeSpeech(ms)", startOfSpeechTimestamp + endOfSpeechOffset));
|
|
info->result->setCompleted();
|
|
} else {
|
|
std::string missing;
|
|
if (!foundEnd && !foundStart) {
|
|
missing = END_OF_SPEECH_OFFSET_FIELD_NAME + " and " + START_OF_SPEECH_TIMESTAMP_FIELD_NAME;
|
|
} else if (!foundEnd) {
|
|
missing = END_OF_SPEECH_OFFSET_FIELD_NAME;
|
|
} else {
|
|
missing = START_OF_SPEECH_TIMESTAMP_FIELD_NAME;
|
|
}
|
|
|
|
ACSDK_ERROR(LX("handleSetEndOfSpeechOffsetDirective").d("missing", missing));
|
|
info->result->setFailed("Missing parameter(s): " + missing);
|
|
}
|
|
removeDirective(info);
|
|
}
|
|
|
|
bool AudioInputProcessor::executeRecognize(
|
|
AudioProvider provider,
|
|
Initiator initiator,
|
|
steady_clock::time_point startOfSpeechTimestamp,
|
|
avsCommon::avs::AudioInputStream::Index begin,
|
|
avsCommon::avs::AudioInputStream::Index end,
|
|
const std::string& keyword,
|
|
std::shared_ptr<const std::vector<char>> KWDMetadata,
|
|
const std::string& initiatorToken) {
|
|
// Make sure we have a keyword if this is a wakeword initiator.
|
|
if (Initiator::WAKEWORD == initiator && keyword.empty()) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "emptyKeywordWithWakewordInitiator"));
|
|
return false;
|
|
}
|
|
|
|
// 500ms preroll.
|
|
avsCommon::avs::AudioInputStream::Index preroll = provider.format.sampleRateHz / 2;
|
|
|
|
// Check if we have everything we need to enable false wakeword detection.
|
|
// TODO: Consider relaxing the hard requirement for a full 500ms preroll - ACSDK-276.
|
|
bool falseWakewordDetection =
|
|
Initiator::WAKEWORD == initiator && begin != INVALID_INDEX && begin >= preroll && end != INVALID_INDEX;
|
|
|
|
// If we will be enabling false wakeword detection, add preroll and build the initiator payload.
|
|
json::JsonGenerator generator;
|
|
std::string initiatorString = initiatorToString(initiator);
|
|
generator.addMember(TYPE_KEY, initiatorString);
|
|
generator.startObject(PAYLOAD_KEY);
|
|
// If we will be enabling false wakeword detection, add preroll and build the initiator payload.
|
|
if (falseWakewordDetection) {
|
|
generator.startObject(WAKEWORD_INDICES_KEY);
|
|
generator.addMember(START_INDEX_KEY, preroll);
|
|
generator.addMember(END_INDEX_KEY, preroll + (end - begin));
|
|
generator.finishObject();
|
|
|
|
begin -= preroll;
|
|
}
|
|
|
|
if (!keyword.empty()) {
|
|
generator.addMember(WAKE_WORD_KEY, string::stringToUpperCase(keyword));
|
|
}
|
|
|
|
if (!initiatorToken.empty()) {
|
|
generator.addMember(TOKEN_KEY, initiatorToken);
|
|
}
|
|
|
|
bool initiatedByWakeword = (Initiator::WAKEWORD == initiator) ? true : false;
|
|
|
|
return executeRecognize(
|
|
provider,
|
|
generator.toString(),
|
|
startOfSpeechTimestamp,
|
|
begin,
|
|
end,
|
|
keyword,
|
|
KWDMetadata,
|
|
initiatedByWakeword,
|
|
falseWakewordDetection,
|
|
initiatorString);
|
|
}
|
|
|
|
bool AudioInputProcessor::executeRecognize(
|
|
AudioProvider provider,
|
|
const std::string& initiatorJson,
|
|
steady_clock::time_point startOfSpeechTimestamp,
|
|
avsCommon::avs::AudioInputStream::Index begin,
|
|
avsCommon::avs::AudioInputStream::Index end,
|
|
const std::string& keyword,
|
|
std::shared_ptr<const std::vector<char>> KWDMetadata,
|
|
bool initiatedByWakeword,
|
|
bool falseWakewordDetection,
|
|
const std::string& initiatorString) {
|
|
if (!provider.stream) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "nullAudioInputStream"));
|
|
return false;
|
|
}
|
|
|
|
std::unordered_map<int, std::string> mapSampleRatesAVSEncoding = {{32000, "OPUS"}};
|
|
std::string avsEncodingFormat;
|
|
std::unordered_map<int, std::string>::iterator itSampleRateAVSEncoding;
|
|
|
|
switch (provider.format.encoding) {
|
|
case avsCommon::utils::AudioFormat::Encoding::LPCM:
|
|
if (provider.format.sampleRateHz != 16000) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedSampleRateForPCM")
|
|
.d("sampleRate", provider.format.sampleRateHz));
|
|
return false;
|
|
} else if (provider.format.sampleSizeInBits != 16) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedSampleSize")
|
|
.d("sampleSize", provider.format.sampleSizeInBits));
|
|
return false;
|
|
}
|
|
avsEncodingFormat = encodingFormatToString(provider.format.encoding);
|
|
m_audioBytesForMetricThreshold = WAKEWORD_DETECTION_SEGMENT_SIZE_BYTES_PCM;
|
|
m_uploadMetricName = WAKEWORD_DETECTION_SEGMENT_UPLOADED_PCM;
|
|
break;
|
|
case avsCommon::utils::AudioFormat::Encoding::OPUS:
|
|
itSampleRateAVSEncoding = mapSampleRatesAVSEncoding.find(provider.format.sampleRateHz);
|
|
if (itSampleRateAVSEncoding == mapSampleRatesAVSEncoding.end()) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedSampleRateForOPUS")
|
|
.d("sampleRate", provider.format.sampleRateHz));
|
|
return false;
|
|
}
|
|
|
|
avsEncodingFormat = encodingFormatToString(provider.format.encoding);
|
|
m_audioBytesForMetricThreshold = WAKEWORD_DETECTION_SEGMENT_SIZE_BYTES_OPUS;
|
|
m_uploadMetricName = WAKEWORD_DETECTION_SEGMENT_UPLOADED_OPUS;
|
|
break;
|
|
default:
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedEncoding")
|
|
.d("encoding", provider.format.encoding));
|
|
return false;
|
|
}
|
|
|
|
if (provider.format.endianness != avsCommon::utils::AudioFormat::Endianness::LITTLE) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedEndianness")
|
|
.d("endianness", provider.format.endianness));
|
|
return false;
|
|
} else if (provider.format.numChannels != 1) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed")
|
|
.d("reason", "unsupportedNumChannels")
|
|
.d("channels", provider.format.numChannels));
|
|
return false;
|
|
}
|
|
|
|
// If this is a barge-in, verify that it is permitted.
|
|
switch (m_state) {
|
|
case ObserverInterface::State::IDLE:
|
|
case ObserverInterface::State::EXPECTING_SPEECH:
|
|
break;
|
|
case ObserverInterface::State::RECOGNIZING:
|
|
// Barge-in is only permitted if the audio providers have compatible policies.
|
|
if (!m_lastAudioProvider.canBeOverridden) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Active audio provider can not be overridden"));
|
|
return false;
|
|
}
|
|
if (!provider.canOverride) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "New audio provider can not override"));
|
|
return false;
|
|
}
|
|
// For barge-in, we should close the previous reader before creating another one.
|
|
if (m_usingEncoder) {
|
|
m_encoder->stopEncoding(true);
|
|
}
|
|
closeAttachmentReaders();
|
|
break;
|
|
case ObserverInterface::State::BUSY:
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Barge-in is not permitted while busy"));
|
|
return false;
|
|
}
|
|
|
|
if (settings::WakeWordConfirmationSettingType::TONE == m_wakeWordConfirmation->get()) {
|
|
m_executor.submit(
|
|
[this]() { m_systemSoundPlayer->playTone(SystemSoundPlayerInterface::Tone::WAKEWORD_NOTIFICATION); });
|
|
}
|
|
|
|
// Mutex required since following operations depend on value of @c m_encodingAudioFormats
|
|
std::lock_guard<std::mutex> lock(m_encodingFormatMutex);
|
|
|
|
// Assemble the event payload.
|
|
json::JsonGenerator payloadGenerator;
|
|
payloadGenerator.addMember(PROFILE_KEY, asrProfileToString(provider.profile));
|
|
m_usingEncoder = isUsingEncoderLocked();
|
|
|
|
// The initiator (or lack thereof) from a previous ExpectSpeech has precedence.
|
|
if (m_precedingExpectSpeechInitiator) {
|
|
if (!m_precedingExpectSpeechInitiator->empty()) {
|
|
payloadGenerator.addRawJsonMember(INITIATOR_KEY, *m_precedingExpectSpeechInitiator);
|
|
}
|
|
m_precedingExpectSpeechInitiator.reset();
|
|
} else if (!initiatorJson.empty()) {
|
|
payloadGenerator.addRawJsonMember(INITIATOR_KEY, initiatorJson);
|
|
}
|
|
|
|
payloadGenerator.addMember(
|
|
START_OF_SPEECH_TIMESTAMP_FIELD_NAME, std::to_string(startOfSpeechTimestamp.time_since_epoch().count()));
|
|
|
|
// Set up an attachment reader for the event.
|
|
avsCommon::avs::attachment::InProcessAttachmentReader::SDSTypeIndex offset = 0;
|
|
auto reference = AudioInputStream::Reader::Reference::BEFORE_WRITER;
|
|
if (INVALID_INDEX != begin) {
|
|
offset = begin;
|
|
reference = AudioInputStream::Reader::Reference::ABSOLUTE;
|
|
}
|
|
|
|
avsCommon::avs::AudioInputStream::Index encodingOffset = 0;
|
|
AudioInputStream::Reader::Reference encodingReference = AudioInputStream::Reader::Reference::ABSOLUTE;
|
|
|
|
// Set up the speech encoder
|
|
if (m_usingEncoder) {
|
|
ACSDK_DEBUG(LX("encodingAudio").d("format", avsEncodingFormat));
|
|
if (!m_encoder->startEncoding(provider.stream, provider.format, offset, reference)) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Failed to start encoder"));
|
|
return false;
|
|
}
|
|
} else {
|
|
ACSDK_DEBUG(LX("notEncodingAudio"));
|
|
}
|
|
|
|
if (KWDMetadata) {
|
|
for (auto& it : m_encodingAudioFormats) {
|
|
std::shared_ptr<attachment::AttachmentReader> metadataReader =
|
|
attachment::AttachmentUtils::createAttachmentReader(*KWDMetadata);
|
|
if (!metadataReader) {
|
|
ACSDK_ERROR(LX("sendingKWDMetadataFailed").d("reason", "Failed to create attachment reader"));
|
|
// If fail to create any metadata reader, clear all existing metadata for consistency
|
|
closeAttachmentReaders(attachment::AttachmentReader::ClosePoint::IMMEDIATELY);
|
|
break;
|
|
}
|
|
auto resolveKey = it.first;
|
|
m_attachmentReaders[resolveKey] = {
|
|
std::make_shared<MessageRequest::NamedReader>(KWD_METADATA_FIELD_NAME, metadataReader)};
|
|
}
|
|
}
|
|
|
|
// N audio streams of different encodings
|
|
for (auto& it : m_encodingAudioFormats) {
|
|
ACSDK_DEBUG(
|
|
LX("Create audio reader").d("format", it.second).d("offset", offset).d("encodingOffset", encodingOffset));
|
|
|
|
const auto audioFormat = it.second;
|
|
auto isLPCMEncodingAudioFormat = (AudioFormat::Encoding::LPCM == audioFormat);
|
|
|
|
std::shared_ptr<attachment::AttachmentReader> audioReader =
|
|
attachment::DefaultAttachmentReader<AudioInputStream>::create(
|
|
sds::ReaderPolicy::NONBLOCKING,
|
|
isLPCMEncodingAudioFormat ? provider.stream : m_encoder->getEncodedStream(),
|
|
isLPCMEncodingAudioFormat ? offset : encodingOffset,
|
|
isLPCMEncodingAudioFormat ? reference : encodingReference);
|
|
if (!audioReader) {
|
|
ACSDK_ERROR(LX("executeRecognizeFailed").d("reason", "Failed to create attachment reader"));
|
|
closeAttachmentReaders();
|
|
return false;
|
|
}
|
|
auto resolveKey = it.first;
|
|
m_attachmentReaders[resolveKey].push_back(
|
|
std::make_shared<MessageRequest::NamedReader>(AUDIO_ATTACHMENT_FIELD_NAME, audioReader));
|
|
ACSDK_INFO(LX("Create audio attachment reader success")
|
|
.d("resolveKey", resolveKey)
|
|
.d("format", encodingFormatToString(it.second)));
|
|
}
|
|
|
|
if (!multiStreamsRequestedLocked()) {
|
|
m_messageRequestResolver = nullptr;
|
|
|
|
// Set up format for single audio stream request
|
|
if (m_usingEncoder && m_encoder->getContext()) {
|
|
avsEncodingFormat = m_encoder->getContext()->getAVSFormatName();
|
|
}
|
|
payloadGenerator.addMember(FORMAT_KEY, avsEncodingFormat);
|
|
|
|
// emit audio encoding format metric
|
|
if (!m_encodingAudioFormats.empty()) {
|
|
auto metricEvent = MetricEventBuilder{}.setActivityName(AUDIO_ENCODING_FORMAT_ACTIVITY_NAME);
|
|
auto encodingAudioFormat = m_encodingAudioFormats.begin()->second;
|
|
switch (encodingAudioFormat) {
|
|
case avsCommon::utils::AudioFormat::Encoding::OPUS:
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
metricEvent.addDataPoint(
|
|
DataPointCounterBuilder{}.setName(AUDIO_ENCODING_FORMAT_OPUS).increment(1).build()));
|
|
m_audioBytesForMetricThreshold = WAKEWORD_DETECTION_SEGMENT_SIZE_BYTES_OPUS;
|
|
m_uploadMetricName = WAKEWORD_DETECTION_SEGMENT_UPLOADED_OPUS;
|
|
break;
|
|
case avsCommon::utils::AudioFormat::Encoding::LPCM:
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
metricEvent.addDataPoint(
|
|
DataPointCounterBuilder{}.setName(AUDIO_ENCODING_FORMAT_LPCM).increment(1).build()));
|
|
m_audioBytesForMetricThreshold = WAKEWORD_DETECTION_SEGMENT_SIZE_BYTES_PCM;
|
|
m_uploadMetricName = WAKEWORD_DETECTION_SEGMENT_UPLOADED_PCM;
|
|
break;
|
|
default:
|
|
ACSDK_WARN(LX("executeRecognize")
|
|
.m("failed to emit audio encoding format metric")
|
|
.d("reason", "unsupportedEncoding")
|
|
.d("encoding", encodingFormatToString(encodingAudioFormat)));
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
m_messageRequestResolver = getMessageRequestResolverLocked();
|
|
}
|
|
|
|
// Code below this point changes the state of AIP. Formally update state now, and don't error out without calling
|
|
// executeResetState() after this point.
|
|
m_preCachedDialogRequestId = uuidGeneration::generateUUID();
|
|
|
|
setState(ObserverInterface::State::RECOGNIZING);
|
|
|
|
// Note that we're preparing to send a Recognize event.
|
|
m_preparingToSend = true;
|
|
|
|
// Reset flag when we send a new recognize event.
|
|
m_localStopCapturePerformed = false;
|
|
m_streamIsClosedInRecognizingState = false;
|
|
|
|
// Start assembling the context; we'll service the callback after assembling our Recognize event.
|
|
m_contextManager->getContextWithoutReportableStateProperties(shared_from_this());
|
|
|
|
// Stop the ExpectSpeech timer so we don't get a timeout.
|
|
m_expectingSpeechTimer.stop();
|
|
|
|
// Record provider as the last-used AudioProvider so it can be used in the event of an ExpectSpeech directive.
|
|
m_lastAudioProvider = provider;
|
|
|
|
// Record the Recognize event payload for later use by executeContextAvailable().
|
|
m_recognizePayload = payloadGenerator.toString();
|
|
|
|
// We can't assemble the MessageRequest until we receive the context.
|
|
m_recognizeRequest.reset();
|
|
|
|
// Handle metrics for this event.
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
MetricEventBuilder{}
|
|
.setActivityName(START_OF_UTTERANCE_ACTIVITY_NAME)
|
|
.addDataPoint(DataPointStringBuilder{}
|
|
.setName("TIME_SINCE_RESUME_ID")
|
|
.setValue(std::to_string(m_timeSinceLastResumeMS.count()))
|
|
.build())
|
|
.addDataPoint(DataPointStringBuilder{}
|
|
.setName("TIME_SINCE_PARTIAL_ID")
|
|
.setValue(std::to_string(m_timeSinceLastPartialMS.count()))
|
|
.build())
|
|
.addDataPoint(DataPointStringBuilder{}
|
|
.setName("RESOURCE_TYPE_ID")
|
|
.setValue(std::to_string(m_resourceFlags.to_ulong()))
|
|
.build())
|
|
.addDataPoint(DataPointCounterBuilder{}.setName(START_OF_UTTERANCE).increment(1).build()),
|
|
m_preCachedDialogRequestId);
|
|
|
|
/// Submit initiator metric if available.
|
|
if (!initiatorString.empty()) {
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
MetricEventBuilder{}
|
|
.setActivityName(INITIATOR_ACTIVITY_NAME_PREFIX + initiatorString)
|
|
.addDataPoint(
|
|
DataPointCounterBuilder{}.setName(INITIATOR_PREFIX + initiatorString).increment(1).build()),
|
|
m_preCachedDialogRequestId);
|
|
}
|
|
|
|
if (initiatedByWakeword) {
|
|
auto duration = milliseconds((end - begin) * MILLISECONDS_PER_SECOND / provider.format.sampleRateHz);
|
|
|
|
auto startOfStreamTimestamp = startOfSpeechTimestamp;
|
|
if (falseWakewordDetection) {
|
|
startOfStreamTimestamp -= PREROLL_DURATION;
|
|
}
|
|
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
MetricEventBuilder{}
|
|
.setActivityName(WW_DURATION_ACTIVITY_NAME)
|
|
.addDataPoint(DataPointDurationBuilder{duration}.setName(WW_DURATION).build())
|
|
.addDataPoint(
|
|
DataPointDurationBuilder{duration_cast<milliseconds>(startOfStreamTimestamp.time_since_epoch())}
|
|
.setName(START_OF_STREAM_TIMESTAMP)
|
|
.build())
|
|
.addDataPoint(DataPointCounterBuilder{}.setName(RECOGNIZE_START_SEND_MESSAGE).increment(1).build()),
|
|
m_preCachedDialogRequestId);
|
|
ACSDK_DEBUG(LX(__func__).d("WW_DURATION(ms)", duration.count()));
|
|
}
|
|
|
|
submitInstanceEntryMetric(
|
|
m_metricRecorder,
|
|
m_preCachedDialogRequestId,
|
|
START_OF_UTTERANCE,
|
|
std::map<std::string, std::string>{{"initiator", !initiatorString.empty() ? initiatorString : "unknown"}});
|
|
|
|
return true;
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnContextAvailable(const std::string& jsonContext) {
|
|
ACSDK_DEBUG(LX("executeOnContextAvailable").sensitive("jsonContext", jsonContext));
|
|
|
|
// Should already be RECOGNIZING if we get here.
|
|
if (m_state != ObserverInterface::State::RECOGNIZING) {
|
|
ACSDK_ERROR(
|
|
LX("executeOnContextAvailableFailed").d("reason", "Not permitted in current state").d("state", m_state));
|
|
return;
|
|
}
|
|
|
|
// Should already have a reader.
|
|
if (m_attachmentReaders.empty()) {
|
|
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "nullReader"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
// Recognize payload should not be empty.
|
|
if (m_recognizePayload.empty()) {
|
|
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "payloadEmpty"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
|
|
// Start acquiring the channel right away; we'll service the callback after assembling our Recognize event.
|
|
if (m_focusState != avsCommon::avs::FocusState::FOREGROUND) {
|
|
auto activity = FocusManagerInterface::Activity::create(
|
|
NAMESPACE, shared_from_this(), std::chrono::milliseconds::zero(), avsCommon::avs::ContentType::MIXABLE);
|
|
if (!m_focusManager->acquireChannel(CHANNEL_NAME, activity)) {
|
|
ACSDK_ERROR(LX("executeOnContextAvailableFailed").d("reason", "Unable to acquire channel"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (!m_preCachedDialogRequestId.empty()) {
|
|
m_directiveSequencer->setDialogRequestId(m_preCachedDialogRequestId);
|
|
m_preCachedDialogRequestId.clear();
|
|
}
|
|
|
|
// Assemble the MessageRequest. It will be sent by executeOnFocusChanged when we acquire the channel.
|
|
auto msgIdAndJsonEvent =
|
|
buildJsonEventString("Recognize", m_directiveSequencer->getDialogRequestId(), m_recognizePayload, jsonContext);
|
|
|
|
if (m_messageRequestResolver) {
|
|
// Create unresolved MessageRequest
|
|
m_recognizeRequest = std::make_shared<MessageRequest>(
|
|
msgIdAndJsonEvent.second,
|
|
true,
|
|
"",
|
|
std::vector<std::pair<std::string, std::string>>{},
|
|
m_messageRequestResolver,
|
|
m_audioBytesForMetricThreshold,
|
|
m_uploadMetricName);
|
|
} else {
|
|
m_recognizeRequest = std::make_shared<MessageRequest>(
|
|
msgIdAndJsonEvent.second, m_audioBytesForMetricThreshold, m_uploadMetricName);
|
|
// Only add attachment readers for resolved MessageRequest. For unresolved one, attachment readers will be
|
|
// passed to the resolver function.
|
|
if (!m_attachmentReaders.empty() && !m_attachmentReaders.begin()->second.empty()) {
|
|
for (auto& namedReader : m_attachmentReaders.begin()->second) {
|
|
m_recognizeRequest->addAttachmentReader(namedReader->name, namedReader->reader);
|
|
}
|
|
}
|
|
}
|
|
// If we already have focus, there won't be a callback to send the message, so send it now.
|
|
if (avsCommon::avs::FocusState::FOREGROUND == m_focusState) {
|
|
sendRequestNow();
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnContextFailure(const ContextRequestError error) {
|
|
ACSDK_ERROR(LX("executeOnContextFailure").d("error", error));
|
|
executeResetState();
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnFocusChanged(avsCommon::avs::FocusState newFocus) {
|
|
ACSDK_DEBUG(LX("executeOnFocusChanged").d("newFocus", newFocus));
|
|
|
|
// Note new focus state.
|
|
m_focusState = newFocus;
|
|
|
|
// If we're losing focus, stop using the channel.
|
|
if (newFocus != avsCommon::avs::FocusState::FOREGROUND) {
|
|
ACSDK_DEBUG(LX("executeOnFocusChanged").d("reason", "Lost focus"));
|
|
executeResetState();
|
|
return;
|
|
}
|
|
|
|
// We're not losing the channel (m_focusState == avsCommon::avs::FocusState::FOREGROUND). For all
|
|
// states except RECOGNIZING, there's nothing more to do here.
|
|
if (m_state != ObserverInterface::State::RECOGNIZING) {
|
|
return;
|
|
}
|
|
|
|
// For a focus change to FOREGROUND in the Recognizing state, we may have a message queued up to send. If we do,
|
|
// we can safely send it now.
|
|
sendRequestNow();
|
|
}
|
|
|
|
bool AudioInputProcessor::executeStopCapture(bool stopImmediately, std::shared_ptr<DirectiveInfo> info) {
|
|
if (info && info->isCancelled) {
|
|
ACSDK_DEBUG(LX("stopCaptureIgnored").d("reason", "isCancelled"));
|
|
return true;
|
|
}
|
|
if (m_state != ObserverInterface::State::RECOGNIZING) {
|
|
auto returnValue = false;
|
|
if (info) {
|
|
if (info->result) {
|
|
if (m_localStopCapturePerformed || m_streamIsClosedInRecognizingState) {
|
|
// Since a local StopCapture was performed or the event stream is closed while in RECOGNIZING state,
|
|
// we can safely ignore the StopCapture from AVS.
|
|
ACSDK_INFO(LX("executeStopCapture")
|
|
.d("localStopCapturePerformed", m_localStopCapturePerformed)
|
|
.d("streamIsClosedInRecognizingState", m_streamIsClosedInRecognizingState)
|
|
.m("StopCapture directive ignored because local StopCapture was performed or event"
|
|
" stream is closed while in recognizing state."));
|
|
m_localStopCapturePerformed = false;
|
|
m_streamIsClosedInRecognizingState = false;
|
|
returnValue = true;
|
|
info->result->setCompleted();
|
|
} else {
|
|
static const char* errorMessage = "StopCapture only allowed in RECOGNIZING state.";
|
|
info->result->setFailed(errorMessage);
|
|
ACSDK_ERROR(LX("executeStopCaptureFailed")
|
|
.d("reason", "invalidState")
|
|
.d("expectedState", "RECOGNIZING")
|
|
.d("state", m_state));
|
|
}
|
|
}
|
|
removeDirective(info);
|
|
}
|
|
return returnValue;
|
|
}
|
|
|
|
// If info is nullptr, this indicates that a local StopCapture is performed.
|
|
if (nullptr == info) {
|
|
m_localStopCapturePerformed = true;
|
|
}
|
|
|
|
// Create a lambda to do the StopCapture.
|
|
std::function<void()> stopCapture = [=] {
|
|
ACSDK_DEBUG(LX("stopCapture").d("stopImmediately", stopImmediately));
|
|
if (m_usingEncoder) {
|
|
// If SpeechEncoder is enabled, let it finish it so the stream will be closed automatically.
|
|
m_encoder->stopEncoding(stopImmediately);
|
|
m_usingEncoder = false;
|
|
}
|
|
auto closePoint = stopImmediately ? attachment::AttachmentReader::ClosePoint::IMMEDIATELY
|
|
: attachment::AttachmentReader::ClosePoint::AFTER_DRAINING_CURRENT_BUFFER;
|
|
closeAttachmentReaders(closePoint);
|
|
|
|
setState(ObserverInterface::State::BUSY);
|
|
|
|
if (info) {
|
|
if (info->result) {
|
|
info->result->setCompleted();
|
|
}
|
|
removeDirective(info);
|
|
}
|
|
|
|
if (m_speechConfirmation->get() == settings::SpeechConfirmationSettingType::TONE) {
|
|
m_systemSoundPlayer->playTone(SystemSoundPlayerInterface::Tone::END_SPEECH);
|
|
}
|
|
};
|
|
|
|
if (m_preparingToSend) {
|
|
// If we're still preparing to send, we'll save the lambda and call it after we start sending.
|
|
m_deferredStopCapture = stopCapture;
|
|
} else {
|
|
// Otherwise, we can call the lambda now.
|
|
stopCapture();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void AudioInputProcessor::executeResetState() {
|
|
// Irrespective of current state, clean up and go back to idle.
|
|
ACSDK_DEBUG(LX(__func__));
|
|
m_expectingSpeechTimer.stop();
|
|
m_precedingExpectSpeechInitiator.reset();
|
|
closeAttachmentReaders();
|
|
if (m_encoder) {
|
|
m_encoder->stopEncoding(true);
|
|
}
|
|
if (m_recognizeRequestSent) {
|
|
m_recognizeRequestSent->removeObserver(shared_from_this());
|
|
m_recognizeRequestSent.reset();
|
|
}
|
|
m_recognizeRequest.reset();
|
|
m_preparingToSend = false;
|
|
m_deferredStopCapture = nullptr;
|
|
if (m_focusState != avsCommon::avs::FocusState::NONE) {
|
|
m_focusManager->releaseChannel(CHANNEL_NAME, shared_from_this());
|
|
}
|
|
m_focusState = avsCommon::avs::FocusState::NONE;
|
|
setState(ObserverInterface::State::IDLE);
|
|
m_audioBytesForMetricThreshold = 0;
|
|
}
|
|
|
|
bool AudioInputProcessor::executeExpectSpeech(milliseconds timeout, std::shared_ptr<DirectiveInfo> info) {
|
|
if (info && info->isCancelled) {
|
|
ACSDK_DEBUG(LX("expectSpeechIgnored").d("reason", "isCancelled"));
|
|
return true;
|
|
}
|
|
if (m_state != ObserverInterface::State::IDLE && m_state != ObserverInterface::State::BUSY) {
|
|
if (info->result) {
|
|
static const char* errorMessage = "ExpectSpeech only allowed in IDLE or BUSY state.";
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
removeDirective(info);
|
|
ACSDK_ERROR(LX("executeExpectSpeechFailed")
|
|
.d("reason", "invalidState")
|
|
.d("expectedState", "IDLE/BUSY")
|
|
.d("state", m_state));
|
|
return false;
|
|
}
|
|
|
|
if (m_precedingExpectSpeechInitiator) {
|
|
ACSDK_ERROR(LX(__func__)
|
|
.d("reason", "precedingExpectSpeechInitiatorUnconsumed")
|
|
.d(INITIATOR_KEY.c_str(), *m_precedingExpectSpeechInitiator));
|
|
}
|
|
|
|
m_precedingExpectSpeechInitiator = memory::make_unique<std::string>("");
|
|
bool found = json::jsonUtils::retrieveValue(
|
|
info->directive->getPayload(), INITIATOR_KEY, m_precedingExpectSpeechInitiator.get());
|
|
if (found) {
|
|
ACSDK_DEBUG(LX(__func__).d("initiatorFound", *m_precedingExpectSpeechInitiator));
|
|
} else {
|
|
*m_precedingExpectSpeechInitiator = "";
|
|
}
|
|
|
|
setState(ObserverInterface::State::EXPECTING_SPEECH);
|
|
if (info->result) {
|
|
info->result->setCompleted();
|
|
}
|
|
removeDirective(info);
|
|
|
|
if (m_expectSpeechTimeoutHandler &&
|
|
m_expectSpeechTimeoutHandler->handleExpectSpeechTimeout(timeout, [this]() { return expectSpeechTimedOut(); })) {
|
|
ACSDK_DEBUG(LX(__func__).m("Expect speech timeout being handled externally"));
|
|
// Let the ExpectSpeech timeout will be handled externally.
|
|
} else {
|
|
ACSDK_DEBUG(LX(__func__).m("Expect speech timeout being handled internally"));
|
|
// Start the ExpectSpeech timer.
|
|
if (!m_expectingSpeechTimer.start(timeout, std::bind(&AudioInputProcessor::expectSpeechTimedOut, this))
|
|
.valid()) {
|
|
ACSDK_ERROR(LX("executeExpectSpeechFailed").d("reason", "startTimerFailed"));
|
|
}
|
|
}
|
|
|
|
// If possible, start recognizing immediately.
|
|
if (m_lastAudioProvider && m_lastAudioProvider.alwaysReadable) {
|
|
return executeRecognize(m_lastAudioProvider, "");
|
|
} else if (m_defaultAudioProvider && m_defaultAudioProvider.alwaysReadable) {
|
|
return executeRecognize(m_defaultAudioProvider, "");
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool AudioInputProcessor::executeExpectSpeechTimedOut() {
|
|
if (m_state != ObserverInterface::State::EXPECTING_SPEECH) {
|
|
ACSDK_ERROR(LX("executeExpectSpeechTimedOutFailure")
|
|
.d("reason", "invalidState")
|
|
.d("expectedState", "EXPECTING_SPEECH")
|
|
.d("state", m_state));
|
|
return false;
|
|
}
|
|
m_precedingExpectSpeechInitiator.reset();
|
|
auto msgIdAndJsonEvent = buildJsonEventString("ExpectSpeechTimedOut");
|
|
auto request = std::make_shared<MessageRequest>(msgIdAndJsonEvent.second);
|
|
request->addObserver(shared_from_this());
|
|
m_messageSender->sendMessage(request);
|
|
setState(ObserverInterface::State::IDLE);
|
|
ACSDK_ERROR(LX("executeExpectSpeechFailed").d("reason", "Timed Out"));
|
|
return true;
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnDialogUXStateChanged(DialogUXStateObserverInterface::DialogUXState newState) {
|
|
ACSDK_DEBUG0(LX(__func__).d("newState", newState));
|
|
|
|
if (!m_initialDialogUXStateReceived) {
|
|
// The initial dialog UX state change call comes from simply registering as an observer; it is not a deliberate
|
|
// change to the dialog state which should interrupt a recognize event.
|
|
m_initialDialogUXStateReceived = true;
|
|
return;
|
|
}
|
|
|
|
if (newState != DialogUXStateObserverInterface::DialogUXState::IDLE) {
|
|
return;
|
|
}
|
|
|
|
executeResetState();
|
|
}
|
|
|
|
void AudioInputProcessor::setState(ObserverInterface::State state) {
|
|
if (m_state == state) {
|
|
return;
|
|
}
|
|
|
|
// Reset the user inactivity if transitioning to or from `RECOGNIZING` state.
|
|
if (ObserverInterface::State::RECOGNIZING == m_state || ObserverInterface::State::RECOGNIZING == state) {
|
|
m_executor.submit([this]() { m_userInactivityMonitor->onUserActive(); });
|
|
}
|
|
|
|
auto currentDialogRequestId =
|
|
m_preCachedDialogRequestId.empty() ? m_directiveSequencer->getDialogRequestId() : m_preCachedDialogRequestId;
|
|
ACSDK_DEBUG5(LX(__func__).d("currentDialogRequestId", currentDialogRequestId));
|
|
if (!currentDialogRequestId.empty()) {
|
|
submitInstanceEntryMetric(
|
|
m_metricRecorder,
|
|
currentDialogRequestId,
|
|
ENTRY_METRIC_NAME_STATE_CHANGE,
|
|
std::map<std::string, std::string>{{"from", ObserverInterface::stateToString(m_state)},
|
|
{"to", ObserverInterface::stateToString(state)}});
|
|
}
|
|
|
|
ACSDK_DEBUG(LX("setState").d("from", m_state).d("to", state));
|
|
m_state = state;
|
|
managePowerResource(m_state);
|
|
|
|
for (auto observer : m_observers) {
|
|
observer->onStateChanged(m_state);
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::removeDirective(std::shared_ptr<DirectiveInfo> info) {
|
|
// Check result too, to catch cases where DirectiveInfo was created locally, without a nullptr result.
|
|
// In those cases there is no messageId to remove because no result was expected.
|
|
if (info->directive && info->result) {
|
|
CapabilityAgent::removeDirective(info->directive->getMessageId());
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::sendRequestNow() {
|
|
ACSDK_DEBUG(LX(__func__));
|
|
|
|
if (m_recognizeRequest) {
|
|
ACSDK_METRIC_IDS(TAG, "Recognize", "", "", Metrics::Location::AIP_SEND);
|
|
if (m_recognizeRequestSent && (m_recognizeRequestSent != m_recognizeRequest)) {
|
|
m_recognizeRequestSent->removeObserver(shared_from_this());
|
|
}
|
|
m_recognizeRequestSent = m_recognizeRequest;
|
|
m_recognizeRequestSent->addObserver(shared_from_this());
|
|
m_messageSender->sendMessage(m_recognizeRequestSent);
|
|
|
|
m_recognizeRequest.reset();
|
|
m_preparingToSend = false;
|
|
if (m_deferredStopCapture) {
|
|
m_deferredStopCapture();
|
|
m_deferredStopCapture = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::onResponseStatusReceived(
|
|
avsCommon::sdkInterfaces::MessageRequestObserverInterface::Status status) {
|
|
ACSDK_DEBUG(LX("onResponseStatusReceived").d("status", status));
|
|
if (status != MessageRequestObserverInterface::Status::SUCCESS) {
|
|
resetState();
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::onExceptionReceived(const std::string& exceptionMessage) {
|
|
ACSDK_ERROR(LX("onExceptionReceived").d("exception", exceptionMessage));
|
|
resetState();
|
|
}
|
|
|
|
void AudioInputProcessor::onSendCompleted(MessageRequestObserverInterface::Status status) {
|
|
ACSDK_DEBUG(LX("onSendCompleted").d("status", status));
|
|
m_executor.submit([this, status]() {
|
|
if (MessageRequestObserverInterface::Status::SUCCESS == status &&
|
|
ObserverInterface::State::RECOGNIZING == m_state) {
|
|
// This is to take care of the edge case where the event stream is closed before a stop capture is received.
|
|
m_streamIsClosedInRecognizingState = true;
|
|
}
|
|
executeResetState();
|
|
});
|
|
}
|
|
std::unordered_set<std::shared_ptr<avsCommon::avs::CapabilityConfiguration>> AudioInputProcessor::
|
|
getCapabilityConfigurations() {
|
|
std::lock_guard<std::mutex> lock(m_mutex);
|
|
return m_capabilityConfigurations;
|
|
}
|
|
|
|
void AudioInputProcessor::onLocaleAssetsChanged() {
|
|
m_executor.submit([this]() { executeOnLocaleAssetsChanged(); });
|
|
}
|
|
|
|
void AudioInputProcessor::executeOnLocaleAssetsChanged() {
|
|
ACSDK_DEBUG(LX(__func__));
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
|
m_capabilityConfigurations.clear();
|
|
auto newWakeWordsConfiguration = getSpeechRecognizerCapabilityConfiguration(*m_assetsManager);
|
|
m_capabilityConfigurations.insert(newWakeWordsConfiguration);
|
|
lock.unlock();
|
|
m_capabilityChangeNotifier->notifyObservers(
|
|
[newWakeWordsConfiguration](
|
|
std::shared_ptr<avsCommon::sdkInterfaces::CapabilityConfigurationChangeObserverInterface> observer) {
|
|
observer->onConfigurationChanged(*newWakeWordsConfiguration);
|
|
});
|
|
}
|
|
|
|
void AudioInputProcessor::handleDirectiveFailure(
|
|
const std::string& errorMessage,
|
|
std::shared_ptr<DirectiveInfo> info,
|
|
avsCommon::avs::ExceptionErrorType errorType) {
|
|
m_exceptionEncounteredSender->sendExceptionEncountered(
|
|
info->directive->getUnparsedDirective(), errorType, errorMessage);
|
|
|
|
if (info->result) {
|
|
info->result->setFailed(errorMessage);
|
|
}
|
|
|
|
removeDirective(info);
|
|
|
|
ACSDK_ERROR(LX("handleDirectiveFailed").d("error", errorMessage));
|
|
}
|
|
|
|
settings::SettingEventMetadata AudioInputProcessor::getWakeWordConfirmationMetadata() {
|
|
return settings::SettingEventMetadata{NAMESPACE,
|
|
WAKE_WORD_CONFIRMATION_CHANGED_EVENT_NAME,
|
|
WAKE_WORD_CONFIRMATION_REPORT_EVENT_NAME,
|
|
WAKE_WORD_CONFIRMATION_PAYLOAD_KEY};
|
|
}
|
|
|
|
settings::SettingEventMetadata AudioInputProcessor::getSpeechConfirmationMetadata() {
|
|
return settings::SettingEventMetadata{NAMESPACE,
|
|
SPEECH_CONFIRMATION_CHANGED_EVENT_NAME,
|
|
SPEECH_CONFIRMATION_REPORT_EVENT_NAME,
|
|
SPEECH_CONFIRMATION_PAYLOAD_KEY};
|
|
}
|
|
|
|
bool AudioInputProcessor::handleSetWakeWordConfirmation(std::shared_ptr<DirectiveInfo> info) {
|
|
std::string jsonValue;
|
|
bool found = avsCommon::utils::json::jsonUtils::retrieveValue(
|
|
info->directive->getPayload(), WAKE_WORD_CONFIRMATION_PAYLOAD_KEY, &jsonValue);
|
|
|
|
if (!found) {
|
|
std::string errorMessage = "missing " + WAKE_WORD_CONFIRMATION_PAYLOAD_KEY;
|
|
sendExceptionEncounteredAndReportFailed(
|
|
info, errorMessage, avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED);
|
|
return false;
|
|
}
|
|
|
|
settings::WakeWordConfirmationSettingType value = settings::getWakeWordConfirmationDefault();
|
|
std::stringstream ss{jsonValue};
|
|
ss >> value;
|
|
|
|
if (ss.fail()) {
|
|
std::string errorMessage = "invalid " + WAKE_WORD_CONFIRMATION_PAYLOAD_KEY;
|
|
sendExceptionEncounteredAndReportFailed(
|
|
info, errorMessage, avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED);
|
|
return false;
|
|
}
|
|
|
|
auto executeChange = [this, value]() { m_wakeWordConfirmation->setAvsChange(value); };
|
|
|
|
m_executor.submit(executeChange);
|
|
|
|
if (info->result) {
|
|
info->result->setCompleted();
|
|
}
|
|
|
|
removeDirective(info);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool AudioInputProcessor::handleSetSpeechConfirmation(std::shared_ptr<DirectiveInfo> info) {
|
|
std::string jsonValue;
|
|
bool found = avsCommon::utils::json::jsonUtils::retrieveValue(
|
|
info->directive->getPayload(), SPEECH_CONFIRMATION_PAYLOAD_KEY, &jsonValue);
|
|
|
|
if (!found) {
|
|
std::string errorMessage = "missing/invalid " + SPEECH_CONFIRMATION_PAYLOAD_KEY;
|
|
sendExceptionEncounteredAndReportFailed(
|
|
info, errorMessage, avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED);
|
|
return false;
|
|
}
|
|
|
|
settings::SpeechConfirmationSettingType value;
|
|
std::stringstream ss{jsonValue};
|
|
ss >> value;
|
|
|
|
if (ss.fail()) {
|
|
std::string errorMessage = "invalid " + SPEECH_CONFIRMATION_PAYLOAD_KEY;
|
|
sendExceptionEncounteredAndReportFailed(
|
|
info, errorMessage, avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED);
|
|
return false;
|
|
}
|
|
|
|
auto executeChange = [this, value]() { m_speechConfirmation->setAvsChange(value); };
|
|
|
|
m_executor.submit(executeChange);
|
|
|
|
if (info->result) {
|
|
info->result->setCompleted();
|
|
}
|
|
|
|
removeDirective(info);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool AudioInputProcessor::handleSetWakeWords(std::shared_ptr<DirectiveInfo> info) {
|
|
auto wakeWords = json::jsonUtils::retrieveStringArray<settings::WakeWords>(
|
|
info->directive->getPayload(), WAKE_WORDS_PAYLOAD_KEY);
|
|
|
|
if (wakeWords.empty()) {
|
|
std::string errorMessage = "missing/invalid " + WAKE_WORDS_PAYLOAD_KEY;
|
|
sendExceptionEncounteredAndReportFailed(
|
|
info, errorMessage, avsCommon::avs::ExceptionErrorType::UNEXPECTED_INFORMATION_RECEIVED);
|
|
return false;
|
|
}
|
|
|
|
if (!m_wakeWordsSetting) {
|
|
std::string errorMessage = "Wake words are not supported in this device";
|
|
sendExceptionEncounteredAndReportFailed(
|
|
info, errorMessage, avsCommon::avs::ExceptionErrorType::UNSUPPORTED_OPERATION);
|
|
return false;
|
|
}
|
|
|
|
m_executor.submit([this, wakeWords, info]() { m_wakeWordsSetting->setAvsChange(wakeWords); });
|
|
|
|
if (info->result) {
|
|
info->result->setCompleted();
|
|
}
|
|
|
|
removeDirective(info);
|
|
|
|
return true;
|
|
}
|
|
|
|
void AudioInputProcessor::managePowerResource(ObserverInterface::State newState) {
|
|
if (!m_powerResourceId) {
|
|
return;
|
|
}
|
|
auto startTime = steady_clock::now();
|
|
ACSDK_DEBUG5(LX(__func__).d("state", newState));
|
|
switch (newState) {
|
|
case ObserverInterface::State::RECOGNIZING:
|
|
case ObserverInterface::State::EXPECTING_SPEECH:
|
|
m_powerResourceManager->acquire(m_powerResourceId);
|
|
m_timeSinceLastResumeMS = m_powerResourceManager->getTimeSinceLastResumeMS();
|
|
m_timeSinceLastPartialMS =
|
|
m_powerResourceManager->getTimeSinceLastPartialMS(POWER_RESOURCE_COMPONENT_NAME, m_resourceFlags);
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
MetricEventBuilder{}
|
|
.setActivityName(ACQUIRE_POWER_RESOURCE_ACTIVITY)
|
|
.addDataPoint(DataPointDurationBuilder{duration_cast<milliseconds>(steady_clock::now() - startTime)}
|
|
.setName(ACQUIRE_POWER_RESOURCE)
|
|
.build()),
|
|
m_preCachedDialogRequestId);
|
|
break;
|
|
case ObserverInterface::State::BUSY:
|
|
case ObserverInterface::State::IDLE:
|
|
m_powerResourceManager->release(m_powerResourceId);
|
|
submitMetric(
|
|
m_metricRecorder,
|
|
MetricEventBuilder{}
|
|
.setActivityName(RELEASE_POWER_RESOURCE_ACTIVITY)
|
|
.addDataPoint(DataPointDurationBuilder{duration_cast<milliseconds>(steady_clock::now() - startTime)}
|
|
.setName(RELEASE_POWER_RESOURCE)
|
|
.build()),
|
|
m_preCachedDialogRequestId);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::onConnectionStatusChanged(bool connected) {
|
|
if (!connected) {
|
|
m_executor.submit([this]() { return executeDisconnected(); });
|
|
}
|
|
}
|
|
|
|
void AudioInputProcessor::executeDisconnected() {
|
|
if (ObserverInterface::State::IDLE != m_state) {
|
|
ACSDK_WARN(LX(__func__).d("state", AudioInputProcessorObserverInterface::stateToString(m_state)));
|
|
executeResetState();
|
|
}
|
|
}
|
|
|
|
bool AudioInputProcessor::setEncodingAudioFormat(AudioFormat::Encoding encoding) {
|
|
if (encoding == AudioFormat::Encoding::LPCM ||
|
|
(m_encoder && m_encoder->getContext() && encoding == m_encoder->getContext()->getAudioFormat().encoding)) {
|
|
std::lock_guard<std::mutex> lock(m_encodingFormatMutex);
|
|
m_encodingAudioFormats.clear();
|
|
// Only one format is configured, and AIP will send resolved RequestMessage, and this resolveKey is simply a
|
|
// placeholder
|
|
m_encodingAudioFormats.emplace(DEFAULT_RESOLVE_KEY, encoding);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool AudioInputProcessor::initialize() {
|
|
if (m_encoder && m_encoder->getContext()) {
|
|
std::lock_guard<std::mutex> lock(m_encodingFormatMutex);
|
|
m_encodingAudioFormats.clear();
|
|
m_encodingAudioFormats.emplace(DEFAULT_RESOLVE_KEY, m_encoder->getContext()->getAudioFormat().encoding);
|
|
}
|
|
m_assetsManager->addLocaleAssetsObserver(shared_from_this());
|
|
return true;
|
|
}
|
|
|
|
MessageRequest::MessageRequestResolveFunction AudioInputProcessor::getMessageRequestResolverLocked() const {
|
|
auto encodingAudioFormats = m_encodingAudioFormats;
|
|
auto attachmentReaders = m_attachmentReaders;
|
|
auto metricRecorder = m_metricRecorder;
|
|
MessageRequest::MessageRequestResolveFunction resolver = [encodingAudioFormats, attachmentReaders, metricRecorder](
|
|
const std::shared_ptr<EditableMessageRequest>& req,
|
|
std::string resolveKey) {
|
|
return resolveMessageRequest(req, resolveKey, encodingAudioFormats, attachmentReaders, metricRecorder);
|
|
};
|
|
return resolver;
|
|
}
|
|
|
|
AudioInputProcessor::EncodingFormatResponse AudioInputProcessor::requestEncodingAudioFormats(
|
|
const EncodingFormatRequest& encodings) {
|
|
EncodingFormatResponse result;
|
|
for (auto it = encodings.begin(); it != encodings.end(); it++) {
|
|
auto requestedEncoding = it->second.first;
|
|
auto fallbackEncoding = it->second.second;
|
|
if (isEncodingFormatSupported(requestedEncoding)) {
|
|
result[it->first] = requestedEncoding;
|
|
} else if (isEncodingFormatSupported(fallbackEncoding)) {
|
|
result[it->first] = fallbackEncoding;
|
|
} else {
|
|
ACSDK_WARN(LX("Both provided requested and fallback encoding formats are not supported.")
|
|
.d("resolveKey", it->first)
|
|
.d("requestedEncoding", encodingFormatToString(requestedEncoding))
|
|
.d("fallbackEncoding", encodingFormatToString(fallbackEncoding)));
|
|
}
|
|
}
|
|
if (!result.empty()) {
|
|
std::lock_guard<std::mutex> lock(m_encodingFormatMutex);
|
|
m_encodingAudioFormats = result;
|
|
} else {
|
|
ACSDK_ERROR(LX("None of requested encoding audio formats are supported."));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void AudioInputProcessor::closeAttachmentReaders(attachment::AttachmentReader::ClosePoint closePoint) {
|
|
for (auto& it : m_attachmentReaders) {
|
|
for (auto& namedReader : it.second) {
|
|
if (namedReader && namedReader->reader) {
|
|
namedReader->reader->close(closePoint);
|
|
}
|
|
}
|
|
}
|
|
m_attachmentReaders.clear();
|
|
}
|
|
|
|
bool AudioInputProcessor::isEncodingFormatSupported(avsCommon::utils::AudioFormat::Encoding encodingFormat) const {
|
|
if (encodingFormat == AudioFormat::Encoding::LPCM ||
|
|
(m_encoder && m_encoder->getContext() &&
|
|
encodingFormat == m_encoder->getContext()->getAudioFormat().encoding)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool AudioInputProcessor::isUsingEncoderLocked() const {
|
|
for (auto it = m_encodingAudioFormats.begin(); it != m_encodingAudioFormats.end(); it++) {
|
|
if (it->second != AudioFormat::Encoding::LPCM) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool AudioInputProcessor::multiStreamsRequestedLocked() const {
|
|
return m_encodingAudioFormats.size() > 1;
|
|
}
|
|
|
|
AudioInputProcessor::EncodingFormatResponse AudioInputProcessor::getEncodingAudioFormats() const {
|
|
std::lock_guard<std::mutex> lock(m_encodingFormatMutex);
|
|
return m_encodingAudioFormats;
|
|
}
|
|
|
|
} // namespace aip
|
|
} // namespace capabilityAgents
|
|
} // namespace alexaClientSDK
|