Problem to write video using FFMPEG from a vector of cv::Mat-CodePudding

I'm trying create two functions, one to read a video and store the frmes in a vector of cv::Mat, and other to get a vector of cv::Mat and write this vector in a video. The code compile and run without exception, but the video writed doesn't run, there are data inside, but VLC is not able to run the video. What am I doing wrong in function to write video?

#include <iostream>
#include <string>
#include <vector>

#include <opencv2/core/mat.hpp>
#include <opencv2/imgcodecs.hpp>


extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
    #include <libavutil/pixdesc.h>
#include <libavutil/opt.h>
}

// helper function to check for FFmpeg errors
inline void checkError(int error, const std::string &message) {
    if (error < 0) {
        std::cerr << message << ": " << av_err2str(error) << std::endl;
        exit(EXIT_FAILURE);
    }
}
  

int writeVideo(const std::string& video_path, std::vector<cv::Mat>& frames, int width, int height, int fps) {
    // initialize FFmpeg
    av_log_set_level(AV_LOG_ERROR);
    avformat_network_init();

    // create the output video context
    AVFormatContext *formatContext = nullptr;
    int error = avformat_alloc_output_context2(&formatContext, nullptr, nullptr, video_path.c_str());
    checkError(error, "Error creating output context");

    // create the video stream
    AVStream *videoStream = avformat_new_stream(formatContext, nullptr);
    if (!videoStream) {
        std::cerr << "Error creating video stream" << std::endl;
        exit(EXIT_FAILURE);
    }

    // create the video codec context
    const AVCodec *videoCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
    AVCodecContext *videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (!videoCodecContext) {
        std::cerr << "Error allocating video codec context" << std::endl;
        exit(EXIT_FAILURE);
    }
    videoCodecContext->bit_rate = 200000;
    videoCodecContext->width = width;
    videoCodecContext->height = height;
    videoCodecContext->time_base = (AVRational){ 1, fps };
    videoCodecContext->framerate = (AVRational){ fps, 1 };
    videoCodecContext->gop_size = 12;
    videoCodecContext->max_b_frames = 0;
    videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
    if (formatContext->oformat->flags & AVFMT_GLOBALHEADER) {
        videoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }
    error = avcodec_open2(videoCodecContext, videoCodec, nullptr);
    checkError(error, "Error opening");
        error = avcodec_parameters_from_context(videoStream->codecpar, videoCodecContext);
    checkError(error, "Error setting video codec parameters");

    // open the output file
    error = avio_open(&formatContext->pb, video_path.c_str(), AVIO_FLAG_WRITE);
    checkError(error, "Error opening output file");

    // write the video file header
    error = avformat_write_header(formatContext, nullptr);
    checkError(error, "Error writing video file header");


    AVPacket *packet = av_packet_alloc();
    if (!packet) {
        std::cerr << "Error allocating packet" << std::endl;
        exit(EXIT_FAILURE);
    }
    for (const cv::Mat &frame : frames) {
        // convert the cv::Mat to an AVFrame
        AVFrame *avFrame = av_frame_alloc();
        avFrame->format = videoCodecContext->pix_fmt;
        avFrame->width = width;
        avFrame->height = height;
        error = av_frame_get_buffer(avFrame, 0);
        checkError(error, "Error allocating frame buffer");
        struct SwsContext *frameConverter = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, videoCodecContext->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
        uint8_t *srcData[AV_NUM_DATA_POINTERS] = { frame.data };
        int srcLinesize[AV_NUM_DATA_POINTERS] = { static_cast<int>(frame.step) };
        sws_scale(frameConverter, srcData, srcLinesize, 0, height, avFrame->data, avFrame->linesize);
        sws_freeContext(frameConverter);

        // encode the AVFrame
        avFrame->pts = packet->pts;
        error = avcodec_send_frame(videoCodecContext, avFrame);
        checkError(error, "Error sending frame to video codec");
        while (error >= 0) {
            error = avcodec_receive_packet(videoCodecContext, packet);
            if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) {
                break;
            }
            checkError(error, "Error encoding video frame");

            // write the encoded packet to the output file
            packet->stream_index = videoStream->index;
            error = av_interleaved_write_frame(formatContext, packet);
            checkError(error, "Error writing video packet");
            av_packet_unref(packet);
        }
        av_frame_free(&avFrame);
    }

    // clean up
    av_packet_free(&packet);
    avcodec_free_context(&videoCodecContext);
    avformat_free_context(formatContext);
    avformat_network_deinit();

    return EXIT_SUCCESS;
}

std::vector<cv::Mat> readVideo(const std::string video_path) {
    // initialize FFmpeg
    av_log_set_level(AV_LOG_ERROR);
    avformat_network_init();

    AVFormatContext* formatContext = nullptr;
    int error = avformat_open_input(&formatContext, video_path.c_str(), nullptr, nullptr);
    checkError(error, "Error opening input file");

    //Read packets of a media file to get stream information.
    
    error = avformat_find_stream_info(formatContext, nullptr);
    checkError(error, "Error avformat find stream info");
    
    // find the video stream
    AVStream* videoStream = nullptr;
    for (unsigned int i = 0; i < formatContext->nb_streams; i  ) {
        if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && !videoStream) {
            videoStream = formatContext->streams[i];
        }
    }
    if (!videoStream) {
        std::cerr << "Error: input file does not contain a video stream" << std::endl;
        exit(EXIT_FAILURE);
    }

    // create the video codec context
    const AVCodec* videoCodec = avcodec_find_decoder(videoStream->codecpar->codec_id);
    AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (!videoCodecContext) {
        std::cerr << "Error allocating video codec context" << std::endl;
        exit(EXIT_FAILURE);
    }
    
    std::cout << "::informations::\n";
    std::cout << "  bit_rate:" << videoCodecContext->bit_rate << "\n";
    std::cout << "  width:" << videoCodecContext->width << "\n";
    std::cout << "  height:" << videoCodecContext->height << "\n";
    std::cout << "  gop_size:" << videoCodecContext->gop_size << "\n";
    std::cout << "  max_b_frames:" << videoCodecContext->max_b_frames << "\n";
    std::cout << "  pix_fmt:" << videoCodecContext->pix_fmt << "\n";
    
    error = avcodec_parameters_to_context(videoCodecContext, videoStream->codecpar);
    checkError(error, "Error setting video codec context parameters");
    error = avcodec_open2(videoCodecContext, videoCodec, nullptr);
    checkError(error, "Error opening video codec");

    // create the frame scaler
    int width = videoCodecContext->width;
    int height = videoCodecContext->height;
    struct SwsContext* frameScaler = sws_getContext(width, height, videoCodecContext->pix_fmt, width, height, AV_PIX_FMT_BGR24, SWS_BICUBIC, nullptr, nullptr, nullptr);

    // read the packets and decode the video frames
    std::vector<cv::Mat> videoFrames;
    AVPacket packet;
    while (av_read_frame(formatContext, &packet) == 0) {
        if (packet.stream_index == videoStream->index) {
            // decode the video frame
            AVFrame* frame = av_frame_alloc();
            int gotFrame = 0;
            error = avcodec_send_packet(videoCodecContext, &packet);
            checkError(error, "Error sending packet to video codec");
            error = avcodec_receive_frame(videoCodecContext, frame);

            //There is not enough data for decoding the frame, have to free and get more data
            
            if (error == AVERROR(EAGAIN))
            {
                av_frame_unref(frame);
                av_freep(frame);
                continue;
            }

            if (error == AVERROR_EOF)
            {
                std::cerr << "AVERROR_EOF" << std::endl;
                break;
            }

            checkError(error, "Error receiving frame from video codec");


            if (error == 0) {
                gotFrame = 1;
            }
            if (gotFrame) {
                // scale the frame to the desired format
                AVFrame* scaledFrame = av_frame_alloc();
                av_image_alloc(scaledFrame->data, scaledFrame->linesize, width, height, AV_PIX_FMT_BGR24, 32);
                sws_scale(frameScaler, frame->data, frame->linesize, 0, height, scaledFrame->data, scaledFrame->linesize);

                // copy the frame data to a cv::Mat object
                cv::Mat mat(height, width, CV_8UC3, scaledFrame->data[0], scaledFrame->linesize[0]);

                videoFrames.push_back(mat.clone());

                // clean up
                av_freep(&scaledFrame->data[0]);
                av_frame_free(&scaledFrame);
            }
            av_frame_free(&frame);
        }
        av_packet_unref(&packet);
    }


    // clean up
    sws_freeContext(frameScaler);
    avcodec_free_context(&videoCodecContext);
    avformat_close_input(&formatContext);
    return videoFrames;
}

int main() {
    auto videoFrames = readVideo("input.mp4");
    cv::imwrite("test.png", videoFrames[10]);
    writeVideo("outnow.mp4", videoFrames, 512, 608, 30);
    //writeVideo("outnow.mp4", videoFrames);
    return 0;
}

CodePudding user response：

FFmpeg stores the encoded output in a buffer, and starts writing the data to the output file when the buffered data exceeds some size.

At the end of the encoding process, we have to flush the buffer to the file, and close the output file by executing the following functions:

av_write_trailer(formatContext);
avformat_close_input(&formatContext);

Usually, we also have to flush the rest of the packets (usually there are buffered frames that are not yet encoded).

Add the following loop, before executing av_write_trailer and avformat_close_input:

// flush the rest of the packets
////////////////////////////////////////////////////////////
int ret = 0;
avcodec_send_frame(videoCodecContext, nullptr);
do
{
    av_packet_unref(packet);
    ret = avcodec_receive_packet(videoCodecContext, packet);
    if (!ret)
    {
        error = av_interleaved_write_frame(formatContext, packet);
        checkError(error, "Error writing video packet");
    }
} while (!ret);

av_write_trailer(formatContext);

avformat_close_input(&formatContext);
////////////////////////////////////////////////////////////

Complete code:

#include <iostream>
#include <string>
#include <vector>

#include <opencv2/core/mat.hpp>
#include <opencv2/imgcodecs.hpp>


extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavutil/pixdesc.h>
#include <libavutil/opt.h>
}

// helper function to check for FFmpeg errors
inline void checkError(int error, const std::string& message) {
    if (error < 0) {
        //std::cerr << message << ": " << av_err2str(error) << std::endl;  //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
        std::cerr << message << ": " << std::to_string(error) << std::endl;
        exit(EXIT_FAILURE);
    }
}


int writeVideo(const std::string& video_path, std::vector<cv::Mat>& frames, int width, int height, int fps) {
    // initialize FFmpeg
    av_log_set_level(AV_LOG_ERROR);
    avformat_network_init();

    // create the output video context
    AVFormatContext* formatContext = nullptr;
    int error = avformat_alloc_output_context2(&formatContext, nullptr, nullptr, video_path.c_str());
    checkError(error, "Error creating output context");

    // create the video stream
    AVStream* videoStream = avformat_new_stream(formatContext, nullptr);
    if (!videoStream) {
        std::cerr << "Error creating video stream" << std::endl;
        exit(EXIT_FAILURE);
    }

    // create the video codec context
    const AVCodec* videoCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
    AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (!videoCodecContext) {
        std::cerr << "Error allocating video codec context" << std::endl;
        exit(EXIT_FAILURE);
    }
    videoCodecContext->bit_rate = 200000;
    videoCodecContext->width = width;
    videoCodecContext->height = height;
    //videoCodecContext->time_base = (AVRational){ 1, fps };  //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
    //videoCodecContext->framerate = (AVRational){ fps, 1 };
    videoCodecContext->time_base.num = 1;
    videoCodecContext->time_base.den = fps;
    videoCodecContext->framerate.num = fps;
    videoCodecContext->framerate.den = 1;
    videoCodecContext->gop_size = 12;
    videoCodecContext->max_b_frames = 0;
    videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
    if (formatContext->oformat->flags & AVFMT_GLOBALHEADER) {
        videoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }
    error = avcodec_open2(videoCodecContext, videoCodec, nullptr);
    checkError(error, "Error opening");
    error = avcodec_parameters_from_context(videoStream->codecpar, videoCodecContext);
    checkError(error, "Error setting video codec parameters");

    // open the output file
    error = avio_open(&formatContext->pb, video_path.c_str(), AVIO_FLAG_WRITE);
    checkError(error, "Error opening output file");

    // write the video file header
    error = avformat_write_header(formatContext, nullptr);
    checkError(error, "Error writing video file header");


    AVPacket* packet = av_packet_alloc();
    if (!packet) {
        std::cerr << "Error allocating packet" << std::endl;
        exit(EXIT_FAILURE);
    }
    for (const cv::Mat& frame : frames) {
        // convert the cv::Mat to an AVFrame
        AVFrame* avFrame = av_frame_alloc();
        avFrame->format = videoCodecContext->pix_fmt;
        avFrame->width = width;
        avFrame->height = height;
        error = av_frame_get_buffer(avFrame, 0);
        checkError(error, "Error allocating frame buffer");
        struct SwsContext* frameConverter = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, videoCodecContext->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
        uint8_t* srcData[AV_NUM_DATA_POINTERS] = { frame.data };
        int srcLinesize[AV_NUM_DATA_POINTERS] = { static_cast<int>(frame.step) };
        sws_scale(frameConverter, srcData, srcLinesize, 0, height, avFrame->data, avFrame->linesize);
        sws_freeContext(frameConverter);

        // encode the AVFrame
        avFrame->pts = packet->pts;
        error = avcodec_send_frame(videoCodecContext, avFrame);
        checkError(error, "Error sending frame to video codec");
        while (error >= 0) {
            error = avcodec_receive_packet(videoCodecContext, packet);
            if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) {
                break;
            }
            checkError(error, "Error encoding video frame");

            // write the encoded packet to the output file
            packet->stream_index = videoStream->index;
            error = av_interleaved_write_frame(formatContext, packet);
            checkError(error, "Error writing video packet");
            av_packet_unref(packet);
        }
        av_frame_free(&avFrame);
    }


    // flush the rest of the packets
    ////////////////////////////////////////////////////////////
    int ret = 0;
    avcodec_send_frame(videoCodecContext, nullptr);
    do
    {
        av_packet_unref(packet);
        ret = avcodec_receive_packet(videoCodecContext, packet);
        if (!ret)
        {
            error = av_interleaved_write_frame(formatContext, packet);
            checkError(error, "Error writing video packet");
        }
    } while (!ret);

    av_write_trailer(formatContext);

    avformat_close_input(&formatContext);
    ////////////////////////////////////////////////////////////
   

    // clean up
    av_packet_free(&packet);
    avcodec_free_context(&videoCodecContext);
    avformat_free_context(formatContext);
    avformat_network_deinit();

    return EXIT_SUCCESS;
}

std::vector<cv::Mat> readVideo(const std::string video_path) {
    // initialize FFmpeg
    av_log_set_level(AV_LOG_ERROR);
    avformat_network_init();

    AVFormatContext* formatContext = nullptr;
    int error = avformat_open_input(&formatContext, video_path.c_str(), nullptr, nullptr);
    checkError(error, "Error opening input file");

    //Read packets of a media file to get stream information.
    ////////////////////////////////////////////////////////////////////////////
    error = avformat_find_stream_info(formatContext, nullptr);
    checkError(error, "Error avformat find stream info");
    ////////////////////////////////////////////////////////////////////////////


    // find the video stream
    AVStream* videoStream = nullptr;
    for (unsigned int i = 0; i < formatContext->nb_streams; i  ) {
        if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && !videoStream) {
            videoStream = formatContext->streams[i];
        }
    }
    if (!videoStream) {
        std::cerr << "Error: input file does not contain a video stream" << std::endl;
        exit(EXIT_FAILURE);
    }

    // create the video codec context
    const AVCodec* videoCodec = avcodec_find_decoder(videoStream->codecpar->codec_id);
    AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (!videoCodecContext) {
        std::cerr << "Error allocating video codec context" << std::endl;
        exit(EXIT_FAILURE);
    }

    std::cout << "::informations::\n";
    std::cout << "  bit_rate:" << videoCodecContext->bit_rate << "\n";
    std::cout << "  width:" << videoCodecContext->width << "\n";
    std::cout << "  height:" << videoCodecContext->height << "\n";
    std::cout << "  gop_size:" << videoCodecContext->gop_size << "\n";
    std::cout << "  max_b_frames:" << videoCodecContext->max_b_frames << "\n";
    std::cout << "  pix_fmt:" << videoCodecContext->pix_fmt << "\n";

    error = avcodec_parameters_to_context(videoCodecContext, videoStream->codecpar);
    checkError(error, "Error setting video codec context parameters");
    error = avcodec_open2(videoCodecContext, videoCodec, nullptr);
    checkError(error, "Error opening video codec");

    // create the frame scaler
    int width = videoCodecContext->width;
    int height = videoCodecContext->height;
    struct SwsContext* frameScaler = sws_getContext(width, height, videoCodecContext->pix_fmt, width, height, AV_PIX_FMT_BGR24, SWS_BICUBIC, nullptr, nullptr, nullptr);

    // read the packets and decode the video frames
    std::vector<cv::Mat> videoFrames;
    AVPacket packet;
    while (av_read_frame(formatContext, &packet) == 0) {
        if (packet.stream_index == videoStream->index) {
            // decode the video frame
            AVFrame* frame = av_frame_alloc();
            int gotFrame = 0;
            error = avcodec_send_packet(videoCodecContext, &packet);
            checkError(error, "Error sending packet to video codec");
            error = avcodec_receive_frame(videoCodecContext, frame);

            //There is not enough data for decoding the frame, have to free and get more data
            ////////////////////////////////////////////////////////////////////////////
            if (error == AVERROR(EAGAIN))
            {
                av_frame_unref(frame);
                av_freep(frame);
                continue;
            }

            if (error == AVERROR_EOF)
            {
                std::cerr << "AVERROR_EOF" << std::endl;
                break;
            }
            ////////////////////////////////////////////////////////////////////////////

            checkError(error, "Error receiving frame from video codec");


            if (error == 0) {
                gotFrame = 1;
            }
            if (gotFrame) {
                // scale the frame to the desired format
                AVFrame* scaledFrame = av_frame_alloc();
                av_image_alloc(scaledFrame->data, scaledFrame->linesize, width, height, AV_PIX_FMT_BGR24, 32);
                sws_scale(frameScaler, frame->data, frame->linesize, 0, height, scaledFrame->data, scaledFrame->linesize);

                // copy the frame data to a cv::Mat object
                cv::Mat mat(height, width, CV_8UC3, scaledFrame->data[0], scaledFrame->linesize[0]);

                //Show mat image for testing
                ////////////////////////////////////////////////////////////////////////////
                //cv::imshow("mat", mat);
                //cv::waitKey(100);   //Wait 100msec (relativly long time - for testing).
                ////////////////////////////////////////////////////////////////////////////


                videoFrames.push_back(mat.clone());

                // clean up
                av_freep(&scaledFrame->data[0]);
                av_frame_free(&scaledFrame);
            }
            av_frame_free(&frame);
        }
        av_packet_unref(&packet);
    }


    // clean up
    sws_freeContext(frameScaler);
    avcodec_free_context(&videoCodecContext);
    avformat_close_input(&formatContext);
    return videoFrames;
}

int main() {
    auto videoFrames = readVideo("input.mp4");
    cv::imwrite("test.png", videoFrames[10]);
    writeVideo("outnow.mp4", videoFrames, videoFrames[0].cols, videoFrames[0].rows, 30);
    //writeVideo("outnow.mp4", videoFrames);
    return 0;
}

Note:
Two frames are still missing at the end.
The issue is related to the decoding stage, because length of videoFrames is two frames shorter that the number of frames in input.mp4.