FFMPEG-based video converter

/* Employee trial solution for Inventos Need to write program for Linux with libavcodec/libavformat, that takes two filenames for input and output files. Input file in .flv format must be transcoded to output file .mp4 with h264/aac codecs */ // #include <stdlib.h> #include <stdio.h> #include <string.h> #include "libavutil/mathematics.h" #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libswscale/swscale.h" #include "libswresample/swresample.h" /* 5 seconds stream duration */ #define STREAM_FRAME_RATE 25 /* 25 images/s */ #define STREAM_PIX_FMT PIX_FMT_YUV420P /* default pix_fmt */ /* mediafile info */ typedef struct { char *filename; AVFormatContext *pFormatCont; int iStreamA, iStreamV; AVCodecContext *pCodecContA, *pCodecContV; AVCodec *pCodecA, *pCodecV; AVStream *pStreamA, *pStreamV; int iPictH, iPictW, iBitRateA, iBitRateV, iSampleRateA; int iFrameCountA, iFrameCountV; } MFInfo; /* add an audio output stream */ static int add_audio_stream(MFInfo *mfi, enum CodecID codec_id) { AVFormatContext *fc = mfi->pFormatCont; AVCodec *codec = NULL; AVStream *st = NULL; AVCodecContext *cc = NULL; /* find the audio encoder */ codec = avcodec_find_encoder(codec_id); if (!codec) { fprintf(stderr, "audio codec not found\n"); return -1; } /* allocate audio stream */ st = avformat_new_stream(fc, codec); if (!st) { fprintf(stderr, "Could not alloc audio stream\n"); return -1; } st->id = 1; cc = st->codec; avcodec_get_context_defaults3(cc, codec); cc->codec = codec; cc->codec_id = codec_id; /* put sample parameters */ cc->sample_fmt = AV_SAMPLE_FMT_S16; cc->bit_rate = mfi->iBitRateA; cc->sample_rate = mfi->iSampleRateA; cc->channels = 2; // some formats want stream headers to be separate if (fc->oformat->flags & AVFMT_GLOBALHEADER) cc->flags |= CODEC_FLAG_GLOBAL_HEADER; /* open codec */ if (avcodec_open2(cc, NULL, NULL) < 0) { fprintf(stderr, "could not open audio codec \n"); return -1; } mfi->pStreamA = st; mfi->pCodecContA = cc; mfi->pCodecA = codec; mfi->iStreamA = st->index; return 0; } /**************************************************************/ /* video output */ static AVFrame *picture, *tmp_picture; static uint8_t *video_outbuf; static int frame_count, video_outbuf_size; /* add a video output stream */ static int add_video_stream(MFInfo *mfi, enum CodecID codec_id) { AVFormatContext *fc = mfi->pFormatCont; AVCodecContext *cc = NULL; AVStream *st = NULL; AVCodec *codec = NULL; /* find the video encoder */ codec = avcodec_find_encoder(codec_id); if (!codec) { fprintf(stderr, "video codec not found\n"); return -1; } /* allocate video stream */ st = avformat_new_stream(fc, codec); if (!st) { fprintf(stderr, "Could not alloc video stream\n"); return -1; } cc = st->codec; avcodec_get_context_defaults3(cc, codec); cc->codec = codec; cc->codec_id = codec_id; /* put sample parameters */ cc->bit_rate = mfi->iBitRateV; /* resolution must be a multiple of two */ cc->width = mfi->iPictW; cc->height = mfi->iPictH; /* time base: this is the fundamental unit of time (in seconds) in terms of which frame timestamps are represented. for fixed-fps content, timebase should be 1/framerate and timestamp increments should be identically 1. */ cc->time_base.den = STREAM_FRAME_RATE; cc->time_base.num = 1; cc->gop_size = 12; /* emit one intra frame every twelve frames at most */ cc->pix_fmt = STREAM_PIX_FMT; // some formats want stream headers to be separate if (fc->oformat->flags & AVFMT_GLOBALHEADER) cc->flags |= CODEC_FLAG_GLOBAL_HEADER; /* open the video codec */ if (avcodec_open2(cc, NULL, NULL) < 0) { fprintf(stderr, "could not open video codec\n"); return -1; } mfi->pStreamV = st; mfi->pCodecContV = cc; mfi->pCodecV = codec; mfi->iStreamV = st->index; return 0; } /* open mediafile, get a/v streams and find codecs */ static int open_mediafile(MFInfo *mfi) { int i; AVFormatContext *pFormatCont = NULL; // Open video file mfi->pFormatCont = NULL; if(avformat_open_input(&pFormatCont, mfi->filename, NULL, NULL)!=0) { fprintf(stderr, "Could not open '%s'\n", mfi->filename); return -1; } // Retrieve stream information if(avformat_find_stream_info(pFormatCont, NULL)<0) { fprintf(stderr, "Couldn't find stream information'\n"); return -1; } // Dump information about file onto standard error av_dump_format(pFormatCont, 0, mfi->filename, 0); // Find the first video and audio stream mfi->iStreamA = -1; mfi->iStreamV = -1; for(i=0; i < pFormatCont->nb_streams; i++) { if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO) { mfi->iStreamV=i; } if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO) { mfi->iStreamA=i; } } if(mfi->iStreamV == -1) { fprintf(stderr, "Didn't find a video stream\n"); return -1; } else { // Get pointer to video stream mfi->pStreamV=pFormatCont->streams[mfi->iStreamV]; // Get a pointer to the codec context for the video stream mfi->pCodecContV=mfi->pStreamV->codec; // Find the decoder for the video stream mfi->pCodecV=avcodec_find_decoder(mfi->pCodecContV->codec_id); if(mfi->pCodecV==NULL) { fprintf(stderr, "Video codec not found\n"); return -1; } // Open video codec if(avcodec_open2(mfi->pCodecContV, mfi->pCodecV, NULL) < 0) { fprintf(stderr, "Could not open video codec'\n"); return -1; } // Set mediafile properties mfi->iBitRateV = mfi->pCodecContV->bit_rate; mfi->iPictW = mfi->pCodecContV->width; mfi->iPictH = mfi->pCodecContV->height; } if(mfi->iStreamA == -1) { fprintf(stderr, "Didn't find a audio stream'\n"); return -1; } else { // Get pointer to audio stream mfi->pStreamA=pFormatCont->streams[mfi->iStreamA]; // Get a pointer to the codec context for the audio stream mfi->pCodecContA=mfi->pStreamA->codec; // Find the decoder for the audio stream mfi->pCodecA=avcodec_find_decoder(mfi->pCodecContA->codec_id); if(mfi->pCodecA==NULL) { fprintf(stderr, "Audio codec not found'\n"); return -1; } // Open audio codec if(avcodec_open2(mfi->pCodecContA, mfi->pCodecA, NULL) < 0) { fprintf(stderr, "Could not open audio codec'\n"); return -1; } // Set mediafile properties mfi->iBitRateA = mfi->pCodecContA->bit_rate; mfi->iSampleRateA = mfi->pCodecContA->sample_rate; } mfi->pFormatCont = pFormatCont; return 0; } static int create_output_mediafile(MFInfo *mfi) { AVFormatContext *fc = NULL; AVOutputFormat *ofmt = NULL; /* allocate the output media context */ avformat_alloc_output_context2(&fc, NULL, "mp4", mfi->filename); if (!fc) { return -1; } mfi->pFormatCont=fc; //ofmt = fc->oformat; /* add the audio and video streams and initialize the codecs */ if (add_video_stream(mfi, CODEC_ID_H264) != 0) return -1; if (add_audio_stream(mfi, CODEC_ID_AAC) != 0) return -1; av_dump_format(fc, 0, mfi->filename, 1); /* open the output file, if needed */ if (!(fc->oformat->flags & AVFMT_NOFILE)) { if (avio_open(&fc->pb, mfi->filename, AVIO_FLAG_WRITE) < 0) { fprintf(stderr, "Could not open '%s'\n", mfi->filename); return -1; } } /* write the stream header, if any */ avformat_write_header(fc, NULL); } static void close_output_mediafile(MFInfo *mfi) { int i; AVFormatContext *fc = mfi->pFormatCont; avformat_free_context(fc); return; /* !!!!!!!!!!!!!!!! */ /* close each codec */ avcodec_close(mfi->pCodecContA); avcodec_close(mfi->pCodecContV); /* free the streams */ for(i = 0; i < fc->nb_streams; i++) { av_freep(&fc->streams[i]->codec); av_freep(&fc->streams[i]); } if (!(fc->oformat->flags & AVFMT_NOFILE)) { /* close the output file */ avio_close(fc->pb); } /* free the contect */ av_free(fc); } static void close_input_mediafile(MFInfo *mfi) { AVFormatContext *fc = mfi->pFormatCont; /* close each codec */ //avcodec_close(mfi->pCodecA); //avcodec_close(mfi->pCodecV); avformat_close_input(&fc); } static void dump_frame(AVFrame *pFrame) { fprintf(stderr, "pts=%i pkt_pts=%i linesize[0]=%i \n", pFrame->pts, pFrame->pkt_pts, pFrame->linesize[0]); fprintf(stderr, "nb_samples=%i format=%s \n", pFrame->nb_samples, av_get_sample_fmt_name(pFrame->format)); //fprintf(stderr, "nb_samples=%i format=%s \n", pFrame->nb_samples, av_get_sample_fmt_name(pFrame->format)); } static int resample_frame(AVFrame *pFrameIn, AVFrame *pFrameOut, AVCodecContext *cc_in, AVCodecContext *cc_out) { static struct SwrContext *aud_convert_ctx; const AVFrame *pcFrameIn = pFrameIn; uint8_t *sample_buf; int nb_samples_out, sample_buf_size; //fprintf(stderr, "w=%i h=%i pf=%i \n", pFrameIn->width, pFrameIn->height, avcodec_pix_fmt_to_codec_tag(pFrameIn->format)); //fprintf(stderr, "w=%i h=%i pf=%i \n", cc_out->width, cc_out->height, avcodec_pix_fmt_to_codec_tag(cc_out->pix_fmt)); if (aud_convert_ctx == NULL) { aud_convert_ctx = swr_alloc_set_opts(aud_convert_ctx, cc_out->channel_layout, cc_out->sample_fmt, cc_out->sample_rate, cc_in->channel_layout, cc_in->sample_fmt, cc_in->sample_rate, 0, NULL); if (aud_convert_ctx == NULL) { fprintf(stderr, "Cannot initialize the audio conversion context\n"); return -1; } } avcodec_get_frame_defaults(pFrameOut); /* test!! */ if (cc_out->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE) nb_samples_out = pFrameIn->nb_samples; else nb_samples_out = cc_out->frame_size; sample_buf_size = av_samples_get_buffer_size(NULL, cc_out->channels, nb_samples_out, cc_out->sample_fmt, 1); sample_buf = av_malloc(sample_buf_size); swr_convert(aud_convert_ctx, sample_buf, nb_samples_out, (const uint8_t **)pFrameIn->data, pFrameIn->nb_samples); pFrameOut->nb_samples = nb_samples_out; pFrameOut->format = cc_out->sample_fmt; avcodec_fill_audio_frame(pFrameOut, cc_out->channels, cc_out->sample_fmt, sample_buf, sample_buf_size, 1); dump_frame(pFrameOut); return 0; } static int transcode_a(AVPacket *pkt_in, MFInfo *mfiIn, MFInfo *mfiOut) { AVCodecContext *cc_in = mfiIn->pCodecContA; AVCodecContext *cc_out = mfiOut->pStreamA->codec; AVFrame *pFrameA, *pFrameOut; AVPacket pkt_out; int i, got_frame, got_packet; int64_t new_pts = pkt_in->pts; pFrameA=avcodec_alloc_frame(); pFrameOut=avcodec_alloc_frame(); av_init_packet(&pkt_out); pkt_out.data = NULL; pkt_out.size = 0; // Decode from packet to Frame i = avcodec_decode_audio4(cc_in, pFrameA, &got_frame, pkt_in); if (i<0 || got_frame==0) { fprintf(stderr, "Audio decode error ret=%i got=%i \n", i, got_frame); return -1; } dump_frame(pFrameA); resample_frame(pFrameA, pFrameOut, cc_in, cc_out); // Calculate PTS pFrameOut->pts = mfiOut->iFrameCountA; //mfiOut->iFrameCountA++; dump_frame(pFrameOut); // Encode from Frame to packet i = avcodec_encode_audio2(cc_out, &pkt_out, pFrameOut, &got_packet); if (i<0 || got_packet==0) { fprintf(stderr, "Audio encode error ret=%i got=%i \n", i, got_packet); return -1; } // Correct DTS pkt_out.dts = new_pts; pkt_out.pts = new_pts; //pkt_out.dts = pFrameOut->pts; //pkt_out.pts = pFrameOut->pts; pkt_out.stream_index = mfiOut->iStreamA; // Clear pFrameOut av_free(pFrameOut); fprintf(stderr, "out pkt A idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts); // write the compressed packet in the media file i = av_interleaved_write_frame(mfiOut->pFormatCont, &pkt_out); if (i<0) { fprintf(stderr, "Audio stream write error ret=%i \n", i); return -1; } return 0; } static int rescale_frame(AVFrame *pFrameIn, AVFrame *pFrameOut, AVCodecContext *cc_out) { static struct SwsContext *img_convert_ctx; AVCodecContext *cc = pFrameIn->owner; const AVFrame *pcFrameIn = pFrameIn; //fprintf(stderr, "w=%i h=%i pf=%i \n", pFrameIn->width, pFrameIn->height, avcodec_pix_fmt_to_codec_tag(pFrameIn->format)); //fprintf(stderr, "w=%i h=%i pf=%i \n", cc_out->width, cc_out->height, avcodec_pix_fmt_to_codec_tag(cc_out->pix_fmt)); if (img_convert_ctx == NULL) { img_convert_ctx = sws_getContext(pFrameIn->width, pFrameIn->height, pFrameIn->format, cc_out->width, cc_out->height, cc_out->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL); if (img_convert_ctx == NULL) { fprintf(stderr, "Cannot initialize the conversion context\n"); return -1; } } avcodec_get_frame_defaults(pFrameOut); avpicture_alloc((AVPicture*)pFrameOut, cc_out->pix_fmt, cc_out->width, cc_out->height); sws_scale(img_convert_ctx, (const uint8_t * const*)pcFrameIn->data, pcFrameIn->linesize, 0, cc_out->height, pFrameOut->data, pFrameOut->linesize); return 0; } static int transcode_v(AVPacket *pkt_in, MFInfo *mfiIn, MFInfo *mfiOut) { AVCodecContext *cc_in = mfiIn->pCodecContV; AVCodecContext *cc_out = mfiOut->pStreamV->codec; AVFrame *pFrameV, *pFrameOut; AVPacket pkt_out; int i, got_frame, got_packet; int64_t new_pts = mfiOut->iFrameCountV; if (pkt_in != NULL) { pFrameV=avcodec_alloc_frame(); pFrameOut=avcodec_alloc_frame(); // Decode from packet to Frame i = avcodec_decode_video2(cc_in, pFrameV, &got_frame, pkt_in); if (i<0 || got_frame==0) { fprintf(stderr, "Video decode error ret=%i got=%i \n", i, got_frame); return -1; } //fprintf(stderr, "frame V type=%i format=%i \n", pFrameV->type, pFrameV->format); // Rescale image to another frame rescale_frame(pFrameV, pFrameOut, cc_out); // Set PTS pFrameOut->pts = new_pts; } else { pFrameOut=NULL; } av_init_packet(&pkt_out); pkt_out.data = NULL; pkt_out.size = 0; // Encode from Frame to packet i = avcodec_encode_video2(cc_out, &pkt_out, pFrameOut, &got_packet); if (i<0 ) { fprintf(stderr, "Video encode error ret=%i got=%i \n", i, got_packet); fprintf(stderr, "out pkt V idx=%i dur=%i dts=%i64 pts=%i64 \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts); return -1; } // Correct DTS //new_pts = av_opt_ptr(avcodec_get_frame_class(), pFrameOut, "best_effort_timestamp"); pkt_out.dts = new_pts; pkt_out.pts = new_pts; pkt_out.stream_index = mfiOut->iStreamV; // Clear pFrameOut av_free(pFrameOut); //pkt_out.dts = pkt_out.pts = AV_NOPTS_VALUE; //correct_pts(mfiOutput.pStreamV, mfiOutput.pCodecContV, &pkt); fprintf(stderr, "out pkt V idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts); //fprintf(stderr, "out pkt V idx=%i dur=%i \n", pkt_out.stream_index, pkt_out.duration); if (i == 0) { if (got_packet == 1) { // write the compressed packet in the media file i = av_interleaved_write_frame(mfiOut->pFormatCont, &pkt_out); if (i<0) { fprintf(stderr, "Video stream write error ret=%i \n", i); return -1; } } else { return 1; } } else { return -1; } return 0; } /**************************************************************/ /* main */ int main(int argc, char **argv) { const char *filename; AVOutputFormat *fmt; AVFormatContext *fc; AVStream *audio_st, *video_st; int i, got_frame, got_packet; MFInfo mfiInput, mfiOutput; AVFrame *pFrameA, *pFrameV; AVPacket pkt, pktA, pktV, *ppkt; if (argc != 3) { printf("usage: %s input_file.flv output_file\n" "\n", argv[0]); return 1; } mfiInput.filename = argv[1]; mfiOutput.filename = argv[2]; /* initialize libavcodec, and register all codecs and formats */ av_register_all(); /* read input mediafile info */ if (open_mediafile(&mfiInput) !=0) return 1; /* set output mediafile parameters */ mfiOutput.iPictH = mfiInput.iPictH; mfiOutput.iPictW = mfiInput.iPictW; mfiOutput.iBitRateA = 64000; mfiOutput.iBitRateV = 400000; //mfiOutput.iSampleRateA = 44100; mfiOutput.iSampleRateA = mfiInput.iSampleRateA; mfiOutput.iFrameCountV = 1; mfiOutput.iFrameCountA = 1; /* allocate output mediafile */ if (create_output_mediafile(&mfiOutput) !=0) return 1; /* allocate frame buffers */ //pFrameA=avcodec_alloc_frame(); //pFrameV=avcodec_alloc_frame(); av_new_packet(&pkt, 0); //av_new_packet(&pktA, 0); //av_new_packet(&pktV, 0); /* Read frames from input and save write frames to output */ i=0; for(;;) { i++; //fprintf(stderr, "frame %i\n", i); // Read next frame into packet if (av_read_frame(mfiInput.pFormatCont, &pkt) !=0) break; fprintf(stderr, "in pkt idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt.stream_index, pkt.duration, pkt.dts, pkt.pts); // Check frame type for AUDIO (from what stream he come) and transcode if (pkt.stream_index == mfiInput.iStreamA) { if (transcode_a(&pkt, &mfiInput, &mfiOutput) != 0) return 1; mfiOutput.iFrameCountA++; } // Check frame type for VIDEO (from what stream he come) and transcode if (pkt.stream_index == mfiInput.iStreamV) { //if (transcode_v(&pkt, &mfiInput, &mfiOutput) < 0) return 1; mfiOutput.iFrameCountV++; } // write the compressed packet in the media file //av_interleaved_write_frame(mfiOutput.pFormatCont, &pkt); av_free_packet(&pkt); av_init_packet(&pkt); //if (i>10) return 1; } // Write last frames while (transcode_v(NULL, &mfiInput, &mfiOutput) == 0) { mfiOutput.iFrameCountV++; } /* write the trailer, if any. the trailer must be written * before you close the CodecContexts open when you wrote the * header; otherwise write_trailer may try to use memory that * was freed on av_codec_close() */ fprintf(stderr, "av_write_trailer \n"); av_write_trailer(mfiOutput.pFormatCont); close_output_mediafile(&mfiOutput); close_input_mediafile(&mfiInput); return 0; }