avs-device-sdk/SpeechEncoder/test/SpeechEncoderTest.cpp

/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *     http://aws.amazon.com/apache2.0/
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

#include <chrono>
#include <thread>

#include <gtest/gtest.h>
#include <gmock/gmock.h>

#include <AVSCommon/AVS/Attachment/InProcessAttachment.h>
#include <AVSCommon/AVS/AudioInputStream.h>
#include <AVSCommon/Utils/AudioFormat.h>
#include <AVSCommon/Utils/PromiseFuturePair.h>

#include "SpeechEncoder/SpeechEncoder.h"

namespace alexaClientSDK {
namespace speechencoder {
namespace test {

using namespace avsCommon;
using namespace avsCommon::avs;
using namespace avsCommon::utils;

/// Word size per PCM frame = 2byte (16bit)
static constexpr size_t FRAME_WORDSIZE = 2;

/// Number of dummy frames that will be sent to encoder.
static constexpr int NUM_AUDIO_WRITE = 100;

/// Input frame size for mock encoder. Will accept 2 frames. (2 * 2byte = 4)
static constexpr size_t MOCK_ENCODER_INPUT_FRAME_SIZE = 4;

/// Output frame size for mock encoder. Will output 2 frames per encode. (2 * 2byte = 4)
static constexpr size_t MOCK_ENCODER_OUTPUT_FRAME_SIZE = 4;

/// Number of words in input stream SDS buffer (must be large enough to fill encoder's output stream)
static constexpr size_t INPUT_WORD_COUNT = 4096;

/// Short timeout (needs to be greater than @c SpeechEncoder's BLOCKING writer timeout).
static auto PROCESSING_TIMEOUT = std::chrono::milliseconds(200);

/// The maximum factor of compression we expect to encounter.
static constexpr unsigned int MAX_COMPRESSION_FACTOR = 10;

/// Output format for mock encoder.
static const AudioFormat MOCK_ENCODER_FORMAT = {
    AudioFormat::Encoding::LPCM,
    AudioFormat::Endianness::LITTLE,
    16000,
    FRAME_WORDSIZE* CHAR_BIT,
    1,
    false,
    AudioFormat::Layout::INTERLEAVED,
};

using ::testing::Return;
using namespace ::testing;

/**
 * A mock encoder backend implementation that inherits from @c EncoderContext.
 */
class MockEncoderContext : public EncoderContext {
public:
    MOCK_METHOD1(init, bool(alexaClientSDK::avsCommon::utils::AudioFormat inputFormat));
    MOCK_METHOD0(getInputFrameSize, size_t());
    MOCK_METHOD0(getOutputFrameSize, size_t());
    MOCK_METHOD0(requiresFullyRead, bool());
    MOCK_METHOD0(getAudioFormat, AudioFormat());
    MOCK_METHOD0(getAVSFormatName, std::string());

    MOCK_METHOD0(start, bool());
    MOCK_METHOD3(processSamples, ssize_t(void* samples, size_t nWords, uint8_t* buffer));
    MOCK_METHOD0(close, void());
};

class SpeechEncoderTest : public ::testing::Test {
protected:
    /// The SpeechEncoder to test.
    std::shared_ptr<SpeechEncoder> m_encoder;

    /// The mock @c EncoderContext.
    std::shared_ptr<MockEncoderContext> m_encoderCtx;

    /**
     * Set up the test harness for running a test.
     */
    virtual void SetUp() {
        m_encoderCtx = std::make_shared<MockEncoderContext>();
        m_encoder = std::make_shared<SpeechEncoder>(m_encoderCtx);

        EXPECT_CALL(*m_encoderCtx, getInputFrameSize()).WillRepeatedly(Return(MOCK_ENCODER_INPUT_FRAME_SIZE));
        EXPECT_CALL(*m_encoderCtx, getAudioFormat()).WillRepeatedly(Return(MOCK_ENCODER_FORMAT));
        EXPECT_CALL(*m_encoderCtx, getOutputFrameSize()).WillRepeatedly(Return(MOCK_ENCODER_OUTPUT_FRAME_SIZE));
    }
};

/*
 * Test to verify if SpeechEncoder::startEncoding properly call backend implementation.
 * This test will feed a dummy PCM stream into SpeechEncoder, then test the behavior with
 * the mock @c EncoderContext.
 */
TEST_F(SpeechEncoderTest, testTimer_startEncoding) {
    AudioFormat audioFormat = {AudioFormat::Encoding::LPCM,
                               AudioFormat::Endianness::LITTLE,
                               16000,
                               FRAME_WORDSIZE * CHAR_BIT,
                               1,
                               false,
                               AudioFormat::Layout::INTERLEAVED};
    auto buffer = std::make_shared<AudioInputStream::Buffer>(4000);
    std::shared_ptr<AudioInputStream> inputStream = AudioInputStream::create(buffer, FRAME_WORDSIZE, 1);
    ASSERT_TRUE(inputStream);

    // EncoderContext::init should be called once.
    EXPECT_CALL(*m_encoderCtx, init(_)).Times(1).WillOnce(Return(true));

    // Mock encoder requires fully read.
    EXPECT_CALL(*m_encoderCtx, requiresFullyRead()).WillRepeatedly(Return(true));

    // EncoderContext::start should be called once.
    EXPECT_CALL(*m_encoderCtx, start()).Times(1).WillOnce(Return(true));

    // EncoderContext::close should be called once.
    EXPECT_CALL(*m_encoderCtx, close()).Times(1);

    // Start the encoder.
    m_encoder->startEncoding(inputStream, audioFormat, 0, AudioInputStream::Reader::Reference::ABSOLUTE);

    // EncoderContext::processSamples should be called with MOCK_ENCODER_INPUT_FRAME_SIZE size.
    EXPECT_CALL(*m_encoderCtx, processSamples(_, MOCK_ENCODER_INPUT_FRAME_SIZE, _))
        .Times(NUM_AUDIO_WRITE / MOCK_ENCODER_INPUT_FRAME_SIZE)
        .WillRepeatedly(Return(MOCK_ENCODER_OUTPUT_FRAME_SIZE));

    // Begin feeding dummy(empty) PCM frames into SDS.
    std::shared_ptr<AudioInputStream::Writer> writer =
        inputStream->createWriter(AudioInputStream::Writer::Policy::BLOCKING);
    uint8_t dummy[FRAME_WORDSIZE] = {0, 0};
    for (int i = 0; i < NUM_AUDIO_WRITE; i++) {
        writer->write(&dummy, 1);
    }

    // Let encoder thread to process...
    std::this_thread::sleep_for(std::chrono::milliseconds(10));
}

/**
 * Test if encoding thread will exit if encoder output is not being consumed on exit.
 */
TEST_F(SpeechEncoderTest, test_shutdownOnBlockingWrite) {
    AudioFormat audioFormat = {
        AudioFormat::Encoding::LPCM,
        AudioFormat::Endianness::LITTLE,
        16000,
        FRAME_WORDSIZE * CHAR_BIT,
        1,
        false,
        AudioFormat::Layout::INTERLEAVED,
    };

    auto inputBufferSize = AudioInputStream::calculateBufferSize(INPUT_WORD_COUNT, FRAME_WORDSIZE, 1);
    auto buffer = std::make_shared<AudioInputStream::Buffer>(inputBufferSize);
    std::shared_ptr<AudioInputStream> inputStream = AudioInputStream::create(buffer, FRAME_WORDSIZE, 1);
    ASSERT_TRUE(inputStream);

    // EncoderContext::init should be called once.
    EXPECT_CALL(*m_encoderCtx, init(_)).Times(1).WillOnce(Return(true));

    // Mock encoder requires fully read.
    EXPECT_CALL(*m_encoderCtx, requiresFullyRead()).WillRepeatedly(Return(true));

    // EncoderContext::start should be called once.
    EXPECT_CALL(*m_encoderCtx, start()).Times(1).WillOnce(Return(true));

    // EncoderContext::close should be called once.
    EXPECT_CALL(*m_encoderCtx, close()).Times(1);

    // EncoderContext::processSamples should be called with MOCK_ENCODER_INPUT_FRAME_SIZE size.
    EXPECT_CALL(*m_encoderCtx, processSamples(_, MOCK_ENCODER_INPUT_FRAME_SIZE, _))
        .WillRepeatedly(Return(MOCK_ENCODER_OUTPUT_FRAME_SIZE));

    // Start the encoder.
    m_encoder->startEncoding(inputStream, audioFormat, 0, AudioInputStream::Reader::Reference::ABSOLUTE);

    // Get the encoded stream to find the size of the buffer that the speech encoder writes to.
    auto encodedStream = m_encoder->getEncodedStream();
    ASSERT_TRUE(encodedStream);
    // If this test fails, INPUT_WORD_COUNT should be increased.
    ASSERT_GE(INPUT_WORD_COUNT, encodedStream->getDataSize() * MAX_COMPRESSION_FACTOR);

    // Fill input buffer with dummy PCM data
    std::shared_ptr<AudioInputStream::Writer> writer =
        inputStream->createWriter(AudioInputStream::Writer::Policy::BLOCKING);
    uint8_t dummy[FRAME_WORDSIZE];

    for (size_t i = 0; i < FRAME_WORDSIZE; i++) {
        dummy[i] = 0;
    }

    for (size_t wordsWritten = 0; wordsWritten < INPUT_WORD_COUNT; wordsWritten++) {
        writer->write(dummy, 1);
    }

    // Let encoder thread process as much input data as it can.
    std::this_thread::sleep_for(PROCESSING_TIMEOUT);

    // At this point, speech encoder loop should be experiencing timeout and will keep on retrying.

    // Simulate a shutdown.
    m_encoder.reset();
}

/**
 * Test if encoding thread will exit and create again when stopEncoding() and startEncoding() is called in quick
 * succession.
 */
TEST_F(SpeechEncoderTest, test_stopAndStartEncoder) {
    const AudioFormat audioFormat = {
        AudioFormat::Encoding::LPCM,
        AudioFormat::Endianness::LITTLE,
        16000,
        FRAME_WORDSIZE * CHAR_BIT,
        1,
        false,
        AudioFormat::Layout::INTERLEAVED,
    };

    // number of start and stop encoding to run
    const int numRunOfTest = 10;

    auto inputBufferSize = AudioInputStream::calculateBufferSize(INPUT_WORD_COUNT, FRAME_WORDSIZE, 1);
    auto buffer = std::make_shared<AudioInputStream::Buffer>(inputBufferSize);
    std::shared_ptr<AudioInputStream> inputStream = AudioInputStream::create(buffer, FRAME_WORDSIZE, 1);
    ASSERT_TRUE(inputStream);

    // EncoderContext::init should be called numRunOfTest times
    EXPECT_CALL(*m_encoderCtx, init(_)).Times(numRunOfTest).WillRepeatedly(Return(true));

    // Mock encoder requires fully read.
    EXPECT_CALL(*m_encoderCtx, requiresFullyRead()).WillRepeatedly(Return(true));

    // EncoderContext::start should be called numRunOfTest times.
    EXPECT_CALL(*m_encoderCtx, start()).Times(numRunOfTest).WillRepeatedly(Return(true));

    // EncoderContext::close should be called numRunOfTest times.
    EXPECT_CALL(*m_encoderCtx, close()).Times(numRunOfTest);

    // EncoderContext::processSamples should be called with MOCK_ENCODER_INPUT_FRAME_SIZE size.
    EXPECT_CALL(*m_encoderCtx, processSamples(_, MOCK_ENCODER_INPUT_FRAME_SIZE, _))
        .WillRepeatedly(Return(MOCK_ENCODER_OUTPUT_FRAME_SIZE));

    for (auto i = 0; i < numRunOfTest; ++i) {
        // Start the encoder.
        m_encoder->startEncoding(inputStream, audioFormat, 0, AudioInputStream::Reader::Reference::ABSOLUTE);

        // Stop the encoder
        m_encoder->stopEncoding(true);
    }
}

}  // namespace test
}  // namespace speechencoder
}  // namespace alexaClientSDK