Interaction between FFmpeg and other libraries

Posted by msafvati on Thu, 20 Jan 2022 10:31:34 +0100

Video software can not be completed by only one FFmpeg. FFmpeg only includes stream reading and writing, encoding and decoding, and other operations have to be realized by other libraries. For example, playing video requires an interface library; To play a sound, you need to play the library or system API of the sound. So this one is also the top priority. Next, I will introduce several common ways of interaction between FFmpeg and other libraries.

Convert avframe and qt to each other

AVFrame to QImage is relatively simple, but there is a hole in the way of directly specifying data. Once the data fails, the picture will also fail. The friendly approach here is to construct it again

// AVFrame to QImage
// First, make sure the image frame format is AV_PIX_FMT_RGB32, if not, turn once
AVFrame *_frame_rgb32 = _video_format_convet (_frame_xxx, AV_PIX_FMT_RGB32);
// Here_ img_tmp is available, and_ frame_rgb32 common data area
QImage _img_tmp (_frame_rgb32->data [0], _frame_rgb32->width, _frame_rgb32->height, QImage::Format_RGB32);
//But if you want_ frame_ If rgb32 continues to be used after being released, it needs to be constructed again
QImage _img { 640, 480, QImage::Format_ARGB32 };
QPainter _p (&_img);
_p.drawImage (_img.rect (), _img_tmp, _img_tmp.rect ());

// QImage to AVFrame
AVFrame *_frame_rgb32 = av_frame_alloc ();
_frame_rgb32->width = img.width ();
_frame_rgb32->height = img.height ();
_frame_rgb32->format = AV_PIX_FMT_RGB32;
av_frame_get_buffer (_frame_rgb32, 0);
memcpy (_frame_rgb32->data [0], img.bits (), _frame_rgb32->width * _frame_rgb32->height * 4);

Convert avframe to gdiplus

It is also easy to convert AVFrame to Gdiplus::Bitmap, but you still need to pay attention to GDI + initialization

// AVFrame to Gdiplus::Bitmap
// At this time_ frame_rgb32 is a valid AVFrame
Gdiplus::Bitmap _bmp (_frame_rgb32->width, _frame_rgb32->height, PixelFormat32bppARGB);
Gdiplus::BitmapData _data;
_bmp.LockBits (&Gdiplus::Rect (0, 0, _bmp.GetWidth (), _bmp.GetHeight ()), Gdiplus::ImageLockModeWrite, PixelFormat32bppARGB, &_data);
memcpy (_data.Scan0, _frame_rgb32->data [0], _frame_rgb32->width * _frame_rgb32->height * 4);
_bmp.UnlockBits (&_data);

// Gdiplus:: convert bitmap to AVFrame
// At this time_ bmp is a valid Gdiplus::Bitmap
AVFrame *_frame_rgb32 = av_frame_alloc ();
_frame_rgb32->width = _bmp.GetWidth ();
_frame_rgb32->height = _bmp.GetHeight ();
_frame_rgb32->format = AV_PIX_FMT_RGB32;
av_frame_get_buffer (_frame_rgb32, 0);
Gdiplus::BitmapData _data;
_bmp.LockBits (&Gdiplus::Rect (0, 0, _bmp.GetWidth (), _bmp.GetHeight ()), Gdiplus::ImageLockModeRead, PixelFormat32bppARGB, &_data);
memcpy (_frame_rgb32->data [0], _data.Scan0, _frame_rgb32->width * _frame_rgb32->height * 4);
_bmp.UnlockBits (&_data);

Play video using sdl1

// The first is the SDL initialization code
SDL_Init (SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER);

// Then create SDL2 window and SDL2 picture
SDL_Surface *_screen = SDL_SetVideoMode (_frame_yuv420p->width, _frame_yuv420p->height, 0, SDL_SWSURFACE);
SDL_Overlay *_bmp = SDL_CreateYUVOverlay (_frame_yuv420p->width, _frame_yuv420p->height, SDL_YV12_OVERLAY, _screen);
SDL_Rect _rect { 0, 0, _frame_yuv420p->width, _frame_yuv420p->height };

// Then, play the internal loop and move the AVFrame structure data to the SDL picture
SDL_LockYUVOverlay (_bmp);
memcpy (_bmp->pixels [0], _frame_yuv420p->data [0], _frame_yuv420p->width * _frame_yuv420p->height);
memcpy (_bmp->pixels [1], _frame_yuv420p->data [1], _frame_yuv420p->width * _frame_yuv420p->height / 4);
memcpy (_bmp->pixels [2], _frame_yuv420p->data [2], _frame_yuv420p->width * _frame_yuv420p->height / 4);
_frame_yuv420p->linesize [0] = _bmp->pitches [0];
_frame_yuv420p->linesize [1] = _bmp->pitches [2];
_frame_yuv420p->linesize [2] = _bmp->pitches [1];
SDL_UnlockYUVOverlay (_bmp);
SDL_DisplayYUVOverlay (_bmp, &_rect);
// suspend
SDL_Delay (50);

// Show over, exit
SDL_Quit ();

Play video using sdl2

// The first is the SDL initialization code
SDL_Init (SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER);

// Then create SDL2 window and SDL2 picture
SDL_Window *_screen = SDL_CreateWindow ("My Window", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 640, 480, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
SDL_Renderer* _sdlRenderer = SDL_CreateRenderer (_screen, -1, 0);
SDL_Texture* _sdlTexture = SDL_CreateTexture (_sdlRenderer, SDL_PIXELFORMAT_YV12, SDL_TEXTUREACCESS_STREAMING, 640, 480);
SDL_Rect _rect { 0, 0, 640, 480 };

// Then, play the internal loop and move the AVFrame structure data to the SDL2 picture
int _sz = _frame_yuv420p->width * _frame_yuv420p->height;
uint8_t *_buf = new uint8_t [_sz * 3 / 2];
memcpy (_buf, _frame_yuv420p->data [0], _sz);
memcpy (_buf + _sz * 5 / 4, _frame_yuv420p->data [1], _sz / 4);
memcpy (_buf + _sz, _frame_yuv420p->data [2], _sz / 4);
SDL_UpdateTexture (_sdlTexture, NULL, _buf, 640);
SDL_RenderClear (_sdlRenderer);
SDL_RenderCopy (_sdlRenderer, _sdlTexture, NULL, &_rect);
SDL_RenderPresent (_sdlRenderer);
delete _buf;
// suspend
SDL_Delay (50);

// Show over, exit
SDL_Quit ();

Play audio using sdl2

One problem with SDL2 playing audio is that it only supports single instances. That is, you cannot create two objects to play together.

// The global variable is used to control the broadcast progress. It is usually written in the class as a class member variable
volatile Uint8 *_audio_pos = nullptr;
volatile Uint32 _audio_len = 0;

// Callback function. The actual broadcasting is realized through this function
void _fill_audio (void *udata, Uint8 *stream, int len) {
    // If the control progress is written to a class / struct member, udata passes a pointer
    SDL_memset (stream, 0, len);
    if (_audio_len == 0)
        return;
    len = (len > (int) _audio_len ? _audio_len : len);
    //SDL_MixAudio (stream, (const Uint8*) _audio_pos, len, SDL_MIX_MAXVOLUME);
    SDL_MixAudioFormat (stream, (const Uint8*) _audio_pos, AUDIO_S16, len, SDL_MIX_MAXVOLUME);
    _audio_pos += len;
    _audio_len -= len;
}

// initialization
SDL_AudioSpec _spec;
_spec.freq = _frame_s16->sample_rate;
_spec.format = AUDIO_S16SYS;
_spec.channels = _frame_s16->channels;
_spec.silence = 0;
_spec.samples = 1024;
_spec.callback = &AudioPlay2::_fill_audio;
_spec.userdata = nullptr; // Or this, decide whether to pass or not according to the requirements
if (int _ret = SDL_OpenAudio (&_spec, NULL)) {
    printf ("SDL_OpenAudio failed\n");
    return;
}
SDL_PauseAudio (0);

// Start the playback process and call it every time you read a frame
_audio_pos = _frame_s16->data [0];
_audio_len = _frame_s16->nb_samples * _frame_s16->channels * 2;
while (_audio_len > 0)
    SDL_Delay (1);

// release
SDL_CloseAudio ();

Use the wave function to play audio

wave series functions only support Windows Platforms, ranging from WinXP to Win10. It is relatively old, but compared with SDL2, one library can be referenced less. In addition, the implementation of the two is particularly similar. It is almost unnecessary to modify the architecture to realize the mutual migration with SDL2.

// Global variable to control playback progress
volatile LPWAVEHDR _pwh = nullptr;

// The callback function has Win32 characteristics, and the calling contract needs to be specified
void CALLBACK _wave_out_proc (HWAVEOUT hwo, UINT uMsg, DWORD_PTR dwInstance, DWORD_PTR dwParam1, DWORD_PTR dwParam2) {
    // dwInstance here is the pointer specified by the user
    if (WOM_DONE == uMsg) {
        while (_pwh) {
            std::this_thread::sleep_for (std::chrono::milliseconds (1));
        }
        _pwh = (LPWAVEHDR) dwParam1;
    }
}

// initialization
WAVEFORMATEX _wfex;
_wfex.wFormatTag = WAVE_FORMAT_PCM;
_wfex.nChannels = (WORD) _frame_s16->channels;
_wfex.nSamplesPerSec = (DWORD) _frame_s16->sample_rate;
_wfex.wBitsPerSample = 16;
_wfex.cbSize = sizeof (_wfex);
_wfex.nBlockAlign = _wfex.wBitsPerSample * _wfex.nChannels / 8;
_wfex.nAvgBytesPerSec = m_wfex.nSamplesPerSec * m_wfex.nBlockAlign;
if (::waveOutOpen (nullptr, 0, &_wfex, 0, 0, WAVE_FORMAT_QUERY) != 0) {
    printf ("waveOutOpen failed\n");
    return;
}
if (::waveOutOpen (&_hwo, WAVE_MAPPER, &_wfex, (DWORD_PTR) _wave_out_proc, (DWORD_PTR) this, CALLBACK_FUNCTION) != 0) {
    printf ("waveOutOpen failed\n");
    return;
}

// Start the playback process and call it every time you read a frame
LPWAVEHDR _pwh = new WAVEHDR;
if (!_pwh)
    return false;
memset (_pwh, 0, sizeof (WAVEHDR));
_pwh->dwLoops = 1;
_pwh->dwBufferLength = (DWORD) _frame_s16->nb_samples * _frame_s16->channels * 2;
_pwh->lpData = new char [_pwh->dwBufferLength];
if (!_pwh->lpData) {
    delete _pwh;
    return;
}
memcpy (_pwh->lpData, _frame_s16->data[0], _pwh->dwBufferLength);
if (::waveOutPrepareHeader (_hwo, _pwh, sizeof (WAVEHDR)) != 0) {
    delete [] _pwh->lpData;
    delete _pwh;
    return;
}
if (::waveOutWrite (_hwo, _pwh, sizeof (WAVEHDR)) != 0) {
    delete [] _pwh->lpData;
    delete _pwh;
    return;
}

// release
if (_hwo) {
    ::waveOutReset (_hwo);
    ::waveOutClose (_hwo);
    _hwo = NULL;
}

Enumerate dshow devices

This uses the dshow function. The operation is a little troublesome. It takes a lot of code to get a device. It is recommended not to delve into it. Just copy it when necessary.

The reason why FFmpeg is not used here is that up to now, FFmpeg does not support obtaining lists by programming. If parsing is also involved by reading the console, it is still the pot of FFmpeg.

Address: https://trac.ffmpeg.org/wiki/DirectShow#Howtoprogrammaticallyenumeratedevices

// dshow header file
#include <string>
#include <vector>
#include <dshow.h>
#include <dvdmedia.h>
#pragma comment (lib, "Strmiids.lib")
#pragma comment (lib, "Winmm.lib")

// Note that you need to call COM + initialization before calling enumeration
::CoInitializeEx (NULL, COINIT_APARTMENTTHREADED);

// Enumerate all dshow video devices
std::vector<std::wstring> _video_names;
ICreateDevEnum* pSysDevEnum = nullptr;
if (SUCCEEDED (CoCreateInstance (CLSID_SystemDeviceEnum, nullptr, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**) &pSysDevEnum))) {
    IEnumMoniker* pEnumCat = nullptr;
    if (SUCCEEDED (pSysDevEnum->CreateClassEnumerator (CLSID_VideoInputDeviceCategory, &pEnumCat, 0))) {
        IMoniker* pMoniker = nullptr;
        ULONG cFetched = 0;
        while (SUCCEEDED (pEnumCat->Next (1, &pMoniker, &cFetched)) && cFetched) {
            IPropertyBag* pPropBag = nullptr;
            if (SUCCEEDED (pMoniker->BindToStorage (nullptr, nullptr, IID_IPropertyBag, (void**) &pPropBag))) {
                VARIANT varName;
                VariantInit (&varName);
                if (SUCCEEDED (pPropBag->Read (L"FriendlyName", &varName, 0))) {
                    _video_names.push_back (varName.bstrVal);
                }
                VariantClear (&varName);
                if (pPropBag)
                    pPropBag->Release ();
            }
            if (pPropBag)
                pPropBag->Release ();
        }
        if (pMoniker)
            pMoniker->Release ();
    }
    if (pEnumCat)
        pEnumCat->Release ();
}
if (pSysDevEnum)
    pSysDevEnum->Release ();

// Enumerate all dshow audio devices
std::vector<std::wstring> _audio_names;
ICreateDevEnum* pSysDevEnum = NULL;
if (SUCCEEDED (CoCreateInstance (CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**) &pSysDevEnum))) {
    IEnumMoniker* pEnumCat = nullptr;
    if (SUCCEEDED (pSysDevEnum->CreateClassEnumerator (CLSID_AudioInputDeviceCategory, &pEnumCat, 0))) {
        IMoniker* pMoniker = nullptr;
        ULONG cFetched;
        while (SUCCEEDED (pEnumCat->Next (1, &pMoniker, &cFetched)) && cFetched) {
            IPropertyBag* pPropBag = nullptr;
            if (SUCCEEDED (pMoniker->BindToStorage (nullptr, nullptr, IID_IPropertyBag, (void**) &pPropBag))) {
                VARIANT varName;
                VariantInit (&varName);
                if (SUCCEEDED (pPropBag->Read (L"FriendlyName", &varName, 0)))
                    _audio_names.push_back (varName.bstrVal);
                VariantClear (&varName);
            }
            if (pPropBag)
                pPropBag->Release ();
            pMoniker->Release ();
            pMoniker = nullptr;
        }
        if (pMoniker)
            pMoniker->Release ();
    }
    if (pEnumCat)
        pEnumCat->Release ();
}
if (pSysDevEnum)
    pSysDevEnum->Release ();

Capturing speaker output using windows

An example implemented by calling COM + components has a lot of code. It is not recommended to study it in practice. It's OK to use it. There is a problem to note: the Win764 bit system does not support setting the number of channels. Once set, it is easy to capture failure, so you need to manually turn it later. The audio data has been converted into an AVFrame available for FFmpeg in the code, which can be directly used for processing or transcoding.

HRESULT _r = 0;
DWORD _nTaskIndex = 0;
REFERENCE_TIME _hnsDefaultDevicePeriod = 0;
LARGE_INTEGER _liFirstFire { 0 };
//
HANDLE _hEventStarted = ::CreateEvent (NULL, TRUE, FALSE, NULL);
HANDLE _hEventStop = ::CreateEvent (NULL, TRUE, FALSE, NULL);
HANDLE _hTimerWakeUp = ::CreateWaitableTimer (NULL, FALSE, NULL);
HANDLE _hTask = AvSetMmThreadCharacteristics (_T ("Capture"), &_nTaskIndex);
SetEvent (_hEventStarted);
AVFrame *_frame = av_frame_alloc ();
//
IMMDeviceEnumerator *_pEnumerator = nullptr;
IMMDevice *_pDevice = nullptr;
IAudioClient *_pAudioClient = nullptr;
WAVEFORMATEX *_pwfx = nullptr;
IAudioCaptureClient *_pCaptureClient = nullptr;
do {
    if (FAILED (_r = CoCreateInstance (__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**) &_pEnumerator))) {
        LOG_INFO ("CoCreateInstance failed %d", _r);
        break;
    }
    if (FAILED (_r = _pEnumerator->GetDefaultAudioEndpoint (eRender, eConsole, &_pDevice))) {
        LOG_INFO ("_pEnumerator->GetDefaultAudioEndpoint failed %d", _r);
        break;
    }
    if (FAILED (_r = _pDevice->Activate (__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**) &_pAudioClient))) {
        LOG_INFO ("_pDevice->Activate failed %d", _r);
        break;
    }
    if (FAILED (_r = _pAudioClient->GetDevicePeriod (&_hnsDefaultDevicePeriod, NULL))) {
        LOG_INFO ("_pAudioClient->GetDevicePeriod failed %d", _r);
        break;
    }
    if (FAILED (_r = _pAudioClient->GetMixFormat (&_pwfx))) {
        LOG_INFO ("_pAudioClient->GetMixFormat failed %d", _r);
        break;
    }
    //
    _pwfx->wBitsPerSample = 16;
    _pwfx->nSamplesPerSec = _sample_rate;
    //_pwfx->nChannels = _channel_num;
    _pwfx->nBlockAlign = _pwfx->nChannels * _pwfx->wBitsPerSample / 8;
    _pwfx->nAvgBytesPerSec = _pwfx->nBlockAlign * _pwfx->nSamplesPerSec;
    if (_pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) {
        _pwfx->wFormatTag = WAVE_FORMAT_PCM;
    } else if (_pwfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
        PWAVEFORMATEXTENSIBLE _pEx = reinterpret_cast<PWAVEFORMATEXTENSIBLE>(_pwfx);
        if (IsEqualGUID (KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, _pEx->SubFormat)) {
            _pEx->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
            _pEx->Samples.wValidBitsPerSample = _pwfx->wBitsPerSample;
        }
    } else {
        LOG_INFO ("unknown format 0x%04X", _pwfx->wFormatTag);
        break;
    }
    //
    size_t _FrameSize = (_pwfx->wBitsPerSample / 8) * _pwfx->nChannels;// Length of each frame (bytes)
    _frame->channels = _pwfx->nChannels;
    _frame->channel_layout = av_get_default_channel_layout (_frame->channels);
    _frame->sample_rate = _pwfx->nSamplesPerSec;
    _frame->format = AV_SAMPLE_FMT_S16;
    //_frame->format = AV_SAMPLE_FMT_FLT;
    //
    if (FAILED (_r = _pAudioClient->Initialize (AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, 0, 0, _pwfx, nullptr))) {
        LOG_INFO ("_pAudioClient->Initialize failed %d", _r);
        break;
    }
    if (FAILED (_r = _pAudioClient->GetService (__uuidof(IAudioCaptureClient), (void**) &_pCaptureClient))) {
        LOG_INFO ("_pAudioClient->GetService failed %d", _r);
        break;
    }
    _liFirstFire.QuadPart = -_hnsDefaultDevicePeriod / 2; // negative means relative time
    LONG _lTimeBetweenFires = (LONG) _hnsDefaultDevicePeriod / 2 / (10 * 1000); // convert to milliseconds
    if (!SetWaitableTimer (_hTimerWakeUp, &_liFirstFire, _lTimeBetweenFires, NULL, NULL, FALSE)) {
        LOG_INFO ("SetWaitableTimer failed %d", ::GetLastError ());
        break;
    }
    if (FAILED (_r = _pAudioClient->Start ())) {
        LOG_INFO ("_pAudioClient->Start failed %d", _r);
        break;
    }
    //
    HANDLE _waitArray [2] = { _hEventStop, _hTimerWakeUp };
    while (true) {
        DWORD _dwWaitResult = WaitForMultipleObjects (_countof (_waitArray), _waitArray, FALSE, INFINITE);
        if (WAIT_OBJECT_0 + 1 != _dwWaitResult)
            break;
        UINT32 _nNextPacketSize = 0;
        if (FAILED (_r = _pCaptureClient->GetNextPacketSize (&_nNextPacketSize)))
            break;
        if (_nNextPacketSize == 0)
            continue;
        //
        BYTE *_pData = nullptr;
        UINT32 _nNumFramesToRead = 0;
        DWORD _dwFlags = 0;
        if (FAILED (_r = _pCaptureClient->GetBuffer (&_pData, &_nNumFramesToRead, &_dwFlags, nullptr, nullptr))) {
            break;
        }
        if (_nNumFramesToRead == 0)
            continue;
        if (_frame->nb_samples != _nNumFramesToRead) {// * _pwfx->nChannels
            if (_frame->data [0])
                av_frame_unref (_frame);
            _frame->nb_samples = _nNumFramesToRead;// * _pwfx->nChannels
            av_frame_get_buffer (_frame, 1);
        }
        //
        if ((_dwFlags & AUDCLNT_BUFFERFLAGS_SILENT) > 0) {
            memset (_frame->data [0], 0, _nNumFramesToRead*_FrameSize);
        } else {
            ::CopyMemory (_frame->data [0], _pData, _nNumFramesToRead*_FrameSize);
        }
        // Transcoding succeeded here
        // _callback (_frame);
        _pCaptureClient->ReleaseBuffer (_nNumFramesToRead);
    }
} while (false);
//
if (_pCaptureClient)
    _pCaptureClient->Release ();
av_frame_free (&_frame);
if (_pwfx)
    CoTaskMemFree (_pwfx);
if (_pAudioClient)
    _pAudioClient->Release ();
if (_pDevice)
    _pDevice->Release ();
if (_pEnumerator)
    _pEnumerator->Release ();
AvRevertMmThreadCharacteristics (_hTask);
::CloseHandle (_hTimerWakeUp);
::CloseHandle (_hEventStop);
::CloseHandle (_hEventStarted);

Program structure

For the player, just decode and display it

#Include < header file >

int main (int argc, char* argv[]) {
    // initialization
    // Open input stream
    new_thread {
        while (_run) {
            // image processing
            // Read an AVPacket from the camera input stream
            // Decode AVPacket into AVFrame
            // Consider whether to convert the pixel format of AVFrame according to the actual needs
            // Display pictures (SDL2 or other interface libraries)
        }
    }
    new_thread {
        while (_run) {
            // Audio processing
            // Read an AVPacket from the microphone input stream
            // Decode AVPacket into AVFrame
            // Consider whether to convert the sampling format of AVFrame according to the actual needs
            // Play sound (SDL2 or other audio library)
        }
    }
    while (_run) {
        // wait...
    }
    // Close input stream
    return 0;
}

For a video surveillance software, it needs to display while reading the camera and save files while reading the sound, so the architecture is to combine the two

#Include < header file >

int main (int argc, char* argv[]) {
    // initialization
    // Turn on the camera
    // Open output stream
    new_thread {
        while (_run) {
            // image processing
            // Read an AVPacket from the camera input stream
            // Decode AVPacket into AVFrame
            // Consider whether to convert the pixel format of AVFrame according to the actual needs
            // Display pictures (SDL2 or other interface libraries)
            // Encode AVFrame as AVPacket
            // send one frame
            // Control speed
        }
    }
    while (_run) {
        // wait...
    }
    // Close output stream
    // Turn off camera input stream
    return 0;
}

Programmer Think