avs-device-sdk/SpeechEncoder/OpusEncoderContext/src/OpusEncoderContext.cpp

208 lines
5.8 KiB
C++

/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#include <iostream>
#include <climits>
#include <opus/opus.h>
#include <AVSCommon/Utils/Endian.h>
#include <AVSCommon/Utils/Logger/Logger.h>
#include "SpeechEncoder/OpusEncoderContext.h"
namespace alexaClientSDK {
namespace speechencoder {
using namespace avsCommon;
using namespace avsCommon::utils;
/// String to identify log entries originating from this file.
static const std::string TAG("OpusEncoderContext");
/**
* Create a LogEntry using this file's TAG and the specified event string.
*
* @param The event string for this @c LogEntry.
*/
#define LX(event) alexaClientSDK::avsCommon::utils::logger::LogEntry(TAG, event)
/// AVS OPUS format name
static constexpr char AVS_FORMAT[] = "OPUS";
/// Audio sample rate: 16kHz
static constexpr unsigned int SAMPLE_RATE = 16000;
/// OPUS bitrate: 32kbps CBR
static constexpr unsigned int BIT_RATE = 32000;
/// OPUS frame length: 20ms
static constexpr unsigned int FRAME_LENGTH = 20;
/// PCM frame size
static constexpr unsigned int FRAME_SIZE = (SAMPLE_RATE / 1000) * FRAME_LENGTH;
/// OPUS packet size (CBR)
static constexpr unsigned int PACKET_SIZE = ((BIT_RATE / CHAR_BIT) / 1000) * FRAME_LENGTH;
/// Maximum packet size
static constexpr unsigned int MAX_PACKET_SIZE = PACKET_SIZE * 2;
inline uint16_t Reverse16(uint16_t value) {
return (((value & 0x00FF) << 8) | ((value & 0xFF00) >> 8));
}
std::shared_ptr<EncoderContext> OpusEncoderContext::createEncoderContext() {
return std::make_shared<OpusEncoderContext>();
}
OpusEncoderContext::~OpusEncoderContext() {
close();
}
bool OpusEncoderContext::init(AudioFormat inputFormat) {
m_inputFormat = inputFormat;
if (inputFormat.sampleRateHz != SAMPLE_RATE) {
ACSDK_ERROR(LX("initFailed").d("reason", "Input sampling rate is invalid"));
return false;
}
if (inputFormat.encoding != AudioFormat::Encoding::LPCM) {
ACSDK_ERROR(LX("initFailed").d("reason", "Input audio format must be LPCM"));
return false;
}
if (inputFormat.numChannels == 2 && inputFormat.layout != AudioFormat::Layout::INTERLEAVED) {
// Error, only interleaved frames are supported for 2 channel
ACSDK_ERROR(LX("initFailed").d("reason", "Input audio format must be interleaved"));
return false;
}
m_outputFormat.numChannels = inputFormat.numChannels;
return true;
}
size_t OpusEncoderContext::getInputFrameSize() {
return FRAME_SIZE;
}
size_t OpusEncoderContext::getOutputFrameSize() {
return PACKET_SIZE;
}
bool OpusEncoderContext::requiresFullyRead() {
return true;
}
AudioFormat OpusEncoderContext::getAudioFormat() {
return m_outputFormat;
}
std::string OpusEncoderContext::getAVSFormatName() {
return AVS_FORMAT;
}
bool OpusEncoderContext::configureEncoder() {
int err;
// Use 32k bit rate
err = opus_encoder_ctl(m_encoder, OPUS_SET_BITRATE(BIT_RATE));
if (err != OPUS_OK) {
ACSDK_ERROR(LX("startFailed").d("reason", "Failed to set bitrate to 32kbps").d("err", err));
return false;
}
// CBR Only
err = opus_encoder_ctl(m_encoder, OPUS_SET_VBR(0));
if (err != OPUS_OK) {
ACSDK_ERROR(LX("startFailed").d("reason", "Failed to set hard-CBR").d("err", err));
return false;
}
// 20ms framesize
err = opus_encoder_ctl(m_encoder, OPUS_SET_EXPERT_FRAME_DURATION(OPUS_FRAMESIZE_20_MS));
if (err != OPUS_OK) {
ACSDK_ERROR(LX("startFailed").d("reason", "Failed to set frame size to 20ms").d("err", err));
return false;
}
return true;
}
bool OpusEncoderContext::start() {
int err;
if (m_encoder) {
ACSDK_ERROR(LX("startFailed").d("reason", "OpusEncoder is not null"));
return false;
}
m_encoder = opus_encoder_create(m_inputFormat.sampleRateHz, m_inputFormat.numChannels, OPUS_APPLICATION_VOIP, &err);
if (err != OPUS_OK) {
ACSDK_ERROR(LX("startFailed").d("reason", "Failed to create OpusEncoder").d("err", err));
return false;
}
if (!m_encoder) {
ACSDK_ERROR(LX("startFailed").d("reason", "OpusEncoder is null"));
return false;
}
if (!configureEncoder()) {
// Destroy previously created encoder
opus_encoder_destroy(m_encoder);
return false;
}
return true;
}
ssize_t OpusEncoderContext::processSamples(void* samples, size_t numberOfWords, uint8_t* buffer) {
uint16_t* in = static_cast<uint16_t*>(samples);
opus_int16 pcm[FRAME_SIZE];
for (unsigned int i = 0; i < numberOfWords; i++) {
if (m_inputFormat.endianness == AudioFormat::Endianness::LITTLE) {
pcm[i] = littleEndianMachine() ? in[i] : Reverse16(in[i]);
} else {
pcm[i] = littleEndianMachine() ? Reverse16(in[i]) : in[i];
}
}
return opus_encode(m_encoder, pcm, numberOfWords, buffer, MAX_PACKET_SIZE);
}
void OpusEncoderContext::close() {
if (m_encoder) {
opus_encoder_destroy(m_encoder);
m_encoder = NULL;
}
}
OpusEncoderContext::OpusEncoderContext() :
m_outputFormat{
AudioFormat::Encoding::OPUS,
AudioFormat::Endianness::LITTLE,
SAMPLE_RATE,
16,
0,
false,
AudioFormat::Layout::INTERLEAVED,
} {
}
} // namespace speechencoder
} // namespace alexaClientSDK