H264コーデックとC ++ APIを使用してFFMpegでFLVをRTMPにストリーミングしてflv.jsに

Question

H264コーデックを使用してOpenCVを使用してWebサイトからライブビデオをストリーミングし、それをFLVに変換してから、RTMPサーバーを介してストリーミングし、flv.jsを使用してブラウザーでストリームをキャッチしたいと思います。基本的に、flv.jsでストリームを読み取れないことを除いて、すべてが機能しています。 ffplayでストリームを開くことができるので、少なくともほとんどのものが正しく設定されていると思います。

私の現在の実装：

#include <iostream> #include <vector> #include <opencv2/highgui.hpp> #include <opencv2/video.hpp> extern "C" { #include <libavformat/avformat.h> #include <libavcodec/avcodec.h> #include <libavutil/avutil.h> #include <libavutil/pixdesc.h> #include <libavutil/imgutils.h> #include <libswscale/swscale.h> } void stream_video(double width, double height, int fps, int camID) { av_register_all(); avformat_network_init(); const char *output = "rtmp://localhost/live/stream"; const AVRational dst_fps = {fps, 1}; int ret; // initialize video capture device cv::VideoCapture cam(camID); if (!cam.isOpened()) { std::cout << "Failed to open video capture device!" << std::endl; exit(1); } cam.set(cv::CAP_PROP_FRAME_WIDTH, width); cam.set(cv::CAP_PROP_FRAME_HEIGHT, height); // allocate cv::Mat with extra bytes (required by AVFrame::data) std::vector<uint8_t> imgbuf(height * width * 3 + 16); cv::Mat image(height, width, CV_8UC3, imgbuf.data(), width * 3); // open output format context AVFormatContext *outctx = nullptr; ret = avformat_alloc_output_context2(&outctx, nullptr, "flv", output); if (ret < 0) { std::cout << "Could not allocate output format context!" << std::endl; exit(1); } // open output IO context if (!(outctx->oformat->flags & AVFMT_NOFILE)) { ret = avio_open2(&outctx->pb, output, AVIO_FLAG_WRITE, nullptr, nullptr); if (ret < 0) { std::cout << "Could not open output IO context!" << std::endl; exit(1); } } // create new video stream AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_H264); AVStream *strm = avformat_new_stream(outctx, codec); AVCodecContext *avctx = avcodec_alloc_context3(codec); avctx->codec_id = AV_CODEC_ID_H264; avctx->width = width; avctx->height = height; avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->framerate = dst_fps; avctx->time_base = av_inv_q(dst_fps); ret = avcodec_parameters_from_context(strm->codecpar, avctx); if (ret < 0) { std::cout << "Could not initialize stream codec parameters!" << std::endl; exit(1); } AVDictionary *opts = nullptr; av_dict_set(&opts, "preset", "superfast", 0); av_dict_set(&opts, "tune", "zerolatency", 0); // open video encoder ret = avcodec_open2(avctx, codec, &opts); if (ret < 0) { std::cout << "Could not open video encoder!" << std::endl; exit(1); } // initialize sample scaler SwsContext *swsctx = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, avctx->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr); if (!swsctx) { std::cout << "Could not initialize sample scaler!" << std::endl; exit(1); } // allocate frame buffer for encoding AVFrame *frame = av_frame_alloc(); std::vector<uint8_t> framebuf(av_image_get_buffer_size(avctx->pix_fmt, width, height, 1)); av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), avctx->pix_fmt, width, height, 1); frame->width = width; frame->height = height; frame->format = static_cast<int>(avctx->pix_fmt); // write header ret = avformat_write_header(outctx, nullptr); if (ret < 0) { std::cout << "Could not write header!" << std::endl; exit(1); } // encoding loop int64_t frame_pts = 0; unsigned nb_frames = 0; bool end_of_stream = false; do { nb_frames++; if (!end_of_stream) { cam >> image; // convert cv::Mat to AVFrame. const int stride[] = {static_cast<int>(image.step[0])}; sws_scale(swsctx, &image.data, stride, 0, image.rows, frame->data, frame->linesize); frame->pts = frame_pts++; } // encode video frame. AVPacket pkt = {0}; av_init_packet(&pkt); ret = avcodec_send_frame(avctx, frame); if (ret < 0) { std::cout << "Error sending frame to codec context!" << std::endl; exit(1); } ret = avcodec_receive_packet(avctx, &pkt); if (ret < 0) { std::cout << "Error receiving packet from codec context!" << std::endl; exit(1); } // rescale packet timestamp. av_packet_rescale_ts(&pkt, avctx->time_base, strm->time_base); // write packet. pkt.pts = AV_NOPTS_VALUE; pkt.dts = AV_NOPTS_VALUE; av_interleaved_write_frame(outctx, &pkt); std::cout << " Frames: " << nb_frames << '\r' << std::flush; av_packet_unref(&pkt); } while (!end_of_stream); av_write_trailer(outctx); std::cout << nb_frames << " frames encoded" << std::endl; av_frame_free(&frame); avcodec_close(avctx); avio_close(outctx->pb); avformat_free_context(outctx); } int main() { double width = 1280, height = 720, fps = 30; int camID = 1; stream_video(width, height, fps, camID); return 0; }

前に言ったように、ffplay rtmp://localhost/live/streamまたはffplay http://localhost:8000/live/stream.flvでストリームを正常に開くことはできますが、ブラウザ内のflv.jsプレーヤーでストリームを開くことができず、エラーが発生します。

flv: Invalid AVCDecoderConfigurationRecord, lack of data! [FLVDemuxer] > Malformed Nalus near timestamp 0, NaluSize > DataSize! [FLVDemuxer] > Malformed Nalus near timestamp 1, NaluSize > DataSize! [FLVDemuxer] > Malformed Nalus near timestamp 2, NaluSize > DataSize! ....

flv.jsのようなビデオをストリーミングする場合、ストリームをffmpeg -re -i input.mp4 -c copy -f flv rtmp://localhost/live/streamで正しく動作するように修正する手助けをいただければ幸いです。問題なく、flv.jsでストリームを開くことができるので、「このコマンド」I大まかに内部コードを実現したいと思います。また、コードをGitHubリポジトリに配置しますここ誰かがコードをコンパイルしてチェックしたい場合。

Jan Kuri · Accepted Answer

私はこの問題を自分で解決します。これが期待どおりに機能しなかった主な理由は、AVStreamエクストラデータ（spsおよびppsヘッダー）が空だったためです。 AVCodecContextからextradataとextradata_sizeを手動でコピーする必要がありました（これが自動的に行われない理由はわかりません）。これを行った後、私は初めてflv.jsで写真を見ました。次に、ビデオストリームを正しく機能させるために、frame->ptsを正しく計算する必要がありました。他の誰かが同じ問題に遭遇した場合に備えて、以下に作業コード全体を添付しています。

#include <iostream> #include <vector> #include <opencv2/highgui.hpp> #include <opencv2/video.hpp> extern "C" { #include <libavformat/avformat.h> #include <libavcodec/avcodec.h> #include <libavutil/imgutils.h> #include <libswscale/swscale.h> } cv::VideoCapture get_device(int camID, double width, double height) { cv::VideoCapture cam(camID); if (!cam.isOpened()) { std::cout << "Failed to open video capture device!" << std::endl; exit(1); } cam.set(cv::CAP_PROP_FRAME_WIDTH, width); cam.set(cv::CAP_PROP_FRAME_HEIGHT, height); return cam; } void initialize_avformat_context(AVFormatContext *&fctx, const char *format_name) { int ret = avformat_alloc_output_context2(&fctx, nullptr, format_name, nullptr); if (ret < 0) { std::cout << "Could not allocate output format context!" << std::endl; exit(1); } } void initialize_io_context(AVFormatContext *&fctx, const char *output) { if (!(fctx->oformat->flags & AVFMT_NOFILE)) { int ret = avio_open2(&fctx->pb, output, AVIO_FLAG_WRITE, nullptr, nullptr); if (ret < 0) { std::cout << "Could not open output IO context!" << std::endl; exit(1); } } } void set_codec_params(AVFormatContext *&fctx, AVCodecContext *&codec_ctx, double width, double height, int fps) { const AVRational dst_fps = {fps, 1}; codec_ctx->codec_tag = 0; codec_ctx->codec_id = AV_CODEC_ID_H264; codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO; codec_ctx->width = width; codec_ctx->height = height; codec_ctx->gop_size = 12; codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; codec_ctx->framerate = dst_fps; codec_ctx->time_base = av_inv_q(dst_fps); if (fctx->oformat->flags & AVFMT_GLOBALHEADER) { codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } } void initialize_codec_stream(AVStream *&stream, AVCodecContext *&codec_ctx, AVCodec *&codec) { int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx); if (ret < 0) { std::cout << "Could not initialize stream codec parameters!" << std::endl; exit(1); } AVDictionary *codec_options = nullptr; av_dict_set(&codec_options, "profile", "high", 0); av_dict_set(&codec_options, "preset", "superfast", 0); av_dict_set(&codec_options, "tune", "zerolatency", 0); // open video encoder ret = avcodec_open2(codec_ctx, codec, &codec_options); if (ret < 0) { std::cout << "Could not open video encoder!" << std::endl; exit(1); } } SwsContext *initialize_sample_scaler(AVCodecContext *codec_ctx, double width, double height) { SwsContext *swsctx = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, codec_ctx->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr); if (!swsctx) { std::cout << "Could not initialize sample scaler!" << std::endl; exit(1); } return swsctx; } AVFrame *allocate_frame_buffer(AVCodecContext *codec_ctx, double width, double height) { AVFrame *frame = av_frame_alloc(); std::vector<uint8_t> framebuf(av_image_get_buffer_size(codec_ctx->pix_fmt, width, height, 1)); av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), codec_ctx->pix_fmt, width, height, 1); frame->width = width; frame->height = height; frame->format = static_cast<int>(codec_ctx->pix_fmt); return frame; } void write_frame(AVCodecContext *codec_ctx, AVFormatContext *fmt_ctx, AVFrame *frame) { AVPacket pkt = {0}; av_init_packet(&pkt); int ret = avcodec_send_frame(codec_ctx, frame); if (ret < 0) { std::cout << "Error sending frame to codec context!" << std::endl; exit(1); } ret = avcodec_receive_packet(codec_ctx, &pkt); if (ret < 0) { std::cout << "Error receiving packet from codec context!" << std::endl; exit(1); } av_interleaved_write_frame(fmt_ctx, &pkt); av_packet_unref(&pkt); } void stream_video(double width, double height, int fps, int camID) { av_register_all(); avformat_network_init(); const char *output = "rtmp://localhost/live/stream"; int ret; auto cam = get_device(camID, width, height); std::vector<uint8_t> imgbuf(height * width * 3 + 16); cv::Mat image(height, width, CV_8UC3, imgbuf.data(), width * 3); AVFormatContext *ofmt_ctx = nullptr; AVCodec *out_codec = nullptr; AVStream *out_stream = nullptr; AVCodecContext *out_codec_ctx = nullptr; initialize_avformat_context(ofmt_ctx, "flv"); initialize_io_context(ofmt_ctx, output); out_codec = avcodec_find_encoder(AV_CODEC_ID_H264); out_stream = avformat_new_stream(ofmt_ctx, out_codec); out_codec_ctx = avcodec_alloc_context3(out_codec); set_codec_params(ofmt_ctx, out_codec_ctx, width, height, fps); initialize_codec_stream(out_stream, out_codec_ctx, out_codec); out_stream->codecpar->extradata = out_codec_ctx->extradata; out_stream->codecpar->extradata_size = out_codec_ctx->extradata_size; av_dump_format(ofmt_ctx, 0, output, 1); auto *swsctx = initialize_sample_scaler(out_codec_ctx, width, height); auto *frame = allocate_frame_buffer(out_codec_ctx, width, height); int cur_size; uint8_t *cur_ptr; ret = avformat_write_header(ofmt_ctx, nullptr); if (ret < 0) { std::cout << "Could not write header!" << std::endl; exit(1); } bool end_of_stream = false; do { cam >> image; const int stride[] = {static_cast<int>(image.step[0])}; sws_scale(swsctx, &image.data, stride, 0, image.rows, frame->data, frame->linesize); frame->pts += av_rescale_q(1, out_codec_ctx->time_base, out_stream->time_base); write_frame(out_codec_ctx, ofmt_ctx, frame); } while (!end_of_stream); av_write_trailer(ofmt_ctx); av_frame_free(&frame); avcodec_close(out_codec_ctx); avio_close(ofmt_ctx->pb); avformat_free_context(ofmt_ctx); } int main() { // av_log_set_level(AV_LOG_DEBUG); double width = 1280, height = 720; int camID = 1, fps = 25; stream_video(width, height, fps, camID); return 0; }

それでおしまい！