FFMPEG-based video converter

/*  Employee trial solution for Inventos   
Need to write program for Linux with libavcodec/libavformat, that takes  two filenames for input and output files.
Input file in .flv format must  be transcoded to output file .mp4 with h264/aac codecs */
// #include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "libavutil/mathematics.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"  

/* 5 seconds stream duration */
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT PIX_FMT_YUV420P /* default pix_fmt */  

/* mediafile info */
typedef struct {
    char *filename;
    AVFormatContext *pFormatCont;
    int             iStreamA, iStreamV;
    AVCodecContext  *pCodecContA, *pCodecContV;
    AVCodec         *pCodecA, *pCodecV;
    AVStream  *pStreamA, *pStreamV;
    int    iPictH, iPictW, iBitRateA, iBitRateV, iSampleRateA;
    int    iFrameCountA, iFrameCountV;
} MFInfo;

/* add an audio output stream */
static int add_audio_stream(MFInfo *mfi, enum CodecID codec_id) {    
    AVFormatContext *fc = mfi->pFormatCont;
    AVCodec *codec = NULL;
    AVStream *st = NULL;
    AVCodecContext *cc = NULL;
    /* find the audio encoder */
    codec = avcodec_find_encoder(codec_id);
    if (!codec) {
        fprintf(stderr, "audio codec not found\n");
        return -1;
    /* allocate audio stream */
    st = avformat_new_stream(fc, codec);
    if (!st) {
        fprintf(stderr, "Could not alloc audio stream\n");
        return -1;
    st->id = 1;
    cc = st->codec;
    avcodec_get_context_defaults3(cc, codec);
    cc->codec = codec;
    cc->codec_id = codec_id;
    /* put sample parameters */
    cc->sample_fmt = AV_SAMPLE_FMT_S16;
    cc->bit_rate = mfi->iBitRateA;
    cc->sample_rate = mfi->iSampleRateA;
    cc->channels = 2;
    // some formats want stream headers to be separate
    if (fc->oformat->flags & AVFMT_GLOBALHEADER)
        cc->flags |= CODEC_FLAG_GLOBAL_HEADER;
    /* open codec */
    if (avcodec_open2(cc, NULL, NULL) < 0) {
        fprintf(stderr, "could not open audio codec \n");
        return -1;
    mfi->pStreamA = st;
    mfi->pCodecContA = cc;
    mfi->pCodecA = codec;
    mfi->iStreamA = st->index;
    return 0;

/* video output */
static AVFrame *picture, *tmp_picture;
static uint8_t *video_outbuf;
static int frame_count, video_outbuf_size;

/* add a video output stream */
static int add_video_stream(MFInfo *mfi, enum CodecID codec_id) {
    AVFormatContext *fc = mfi->pFormatCont;
    AVCodecContext *cc = NULL;
    AVStream *st = NULL;
    AVCodec *codec = NULL;
    /* find the video encoder */
    codec = avcodec_find_encoder(codec_id);
    if (!codec) {
        fprintf(stderr, "video codec not found\n");
        return -1;

     /* allocate video stream */
    st = avformat_new_stream(fc, codec);
    if (!st) {
        fprintf(stderr, "Could not alloc video stream\n");
        return -1;
    cc = st->codec;
    avcodec_get_context_defaults3(cc, codec);
    cc->codec = codec;
    cc->codec_id = codec_id;

    /* put sample parameters */
    cc->bit_rate = mfi->iBitRateV;
    /* resolution must be a multiple of two */
    cc->width = mfi->iPictW;
    cc->height = mfi->iPictH;

    /* time base: this is the fundamental unit of time (in seconds) in terms
        of which frame timestamps are represented. for fixed-fps content,
        timebase should be 1/framerate and timestamp increments should be
        identically 1. */
    cc->time_base.den = STREAM_FRAME_RATE;
    cc->time_base.num = 1;
    cc->gop_size = 12; /* emit one intra frame every twelve frames at most */
    cc->pix_fmt = STREAM_PIX_FMT;

    // some formats want stream headers to be separate
    if (fc->oformat->flags & AVFMT_GLOBALHEADER)
        cc->flags |= CODEC_FLAG_GLOBAL_HEADER;

    /* open the video codec */
    if (avcodec_open2(cc, NULL, NULL) < 0) {
        fprintf(stderr, "could not open video codec\n");
        return -1;
    mfi->pStreamV = st;  mfi->pCodecContV = cc;
    mfi->pCodecV = codec;
    mfi->iStreamV = st->index;
    return 0;

/* open mediafile, get a/v streams and find codecs */
static int open_mediafile(MFInfo *mfi) {
    int i;

    AVFormatContext *pFormatCont = NULL;

    // Open video file
    mfi->pFormatCont = NULL;
    if(avformat_open_input(&pFormatCont, mfi->filename, NULL, NULL)!=0) {
        fprintf(stderr, "Could not open '%s'\n", mfi->filename);
        return -1;

    // Retrieve stream information
    if(avformat_find_stream_info(pFormatCont, NULL)<0) {
        fprintf(stderr, "Couldn't find stream information'\n");
        return -1;

    // Dump information about file onto standard error
    av_dump_format(pFormatCont, 0, mfi->filename, 0);
    // Find the first video and audio stream
    mfi->iStreamA = -1;
    mfi->iStreamV = -1;
    for(i=0; i < pFormatCont->nb_streams; i++) {
        if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)         {
        if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)         {

    if(mfi->iStreamV == -1) {
        fprintf(stderr, "Didn't find a video stream\n");
        return -1;
    } else {
      // Get pointer to video stream
      // Get a pointer to the codec context for the video stream
      // Find the decoder for the video stream
      if(mfi->pCodecV==NULL)  {
          fprintf(stderr, "Video codec not found\n");
          return -1;

      // Open video codec
      if(avcodec_open2(mfi->pCodecContV, mfi->pCodecV, NULL) < 0) {
          fprintf(stderr, "Could not open video codec'\n");
          return -1;

      // Set mediafile properties
      mfi->iBitRateV = mfi->pCodecContV->bit_rate;
      mfi->iPictW = mfi->pCodecContV->width;
      mfi->iPictH = mfi->pCodecContV->height;
    if(mfi->iStreamA == -1) {
        fprintf(stderr, "Didn't find a audio stream'\n");
        return -1;
    } else {
        // Get pointer to audio stream
        // Get a pointer to the codec context for the audio stream
        // Find the decoder for the audio stream
        if(mfi->pCodecA==NULL)  {
            fprintf(stderr, "Audio codec not found'\n");
            return -1;
        // Open audio codec
        if(avcodec_open2(mfi->pCodecContA, mfi->pCodecA, NULL) < 0) {
            fprintf(stderr, "Could not open audio codec'\n");
            return -1;
        // Set mediafile properties
        mfi->iBitRateA = mfi->pCodecContA->bit_rate;
        mfi->iSampleRateA = mfi->pCodecContA->sample_rate;
    mfi->pFormatCont = pFormatCont;
    return 0;

static int create_output_mediafile(MFInfo *mfi) {
    AVFormatContext *fc = NULL;
    AVOutputFormat *ofmt = NULL;

    /* allocate the output media context */
    avformat_alloc_output_context2(&fc, NULL, "mp4", mfi->filename);
    if (!fc) {
        return -1;
    //ofmt = fc->oformat;

    /* add the audio and video streams and initialize the codecs */
    if (add_video_stream(mfi, CODEC_ID_H264) != 0) return -1;
    if (add_audio_stream(mfi, CODEC_ID_AAC) != 0) return -1;
    av_dump_format(fc, 0, mfi->filename, 1);

    /* open the output file, if needed */
    if (!(fc->oformat->flags & AVFMT_NOFILE)) {
        if (avio_open(&fc->pb, mfi->filename, AVIO_FLAG_WRITE) < 0) {
            fprintf(stderr, "Could not open '%s'\n", mfi->filename);
            return -1;

    /* write the stream header, if any */
    avformat_write_header(fc, NULL);

static void close_output_mediafile(MFInfo *mfi) {
    int i;
    AVFormatContext *fc = mfi->pFormatCont;

    /* !!!!!!!!!!!!!!!! */
    /* close each codec */

    /* free the streams */
    for(i = 0; i < fc->nb_streams; i++) {
    if (!(fc->oformat->flags & AVFMT_NOFILE)) {
        /* close the output file */

    /* free the contect */

static void close_input_mediafile(MFInfo *mfi) {
    AVFormatContext *fc = mfi->pFormatCont;

    /* close each codec */

static void dump_frame(AVFrame *pFrame) {
    fprintf(stderr, "pts=%i   pkt_pts=%i  linesize[0]=%i \n", pFrame->pts, pFrame->pkt_pts, pFrame->linesize[0]);
    fprintf(stderr, "nb_samples=%i  format=%s \n", pFrame->nb_samples, av_get_sample_fmt_name(pFrame->format));
    //fprintf(stderr, "nb_samples=%i  format=%s \n", pFrame->nb_samples, av_get_sample_fmt_name(pFrame->format));

static int resample_frame(AVFrame *pFrameIn, AVFrame *pFrameOut, AVCodecContext *cc_in, AVCodecContext *cc_out) {
    static struct SwrContext *aud_convert_ctx;
    const AVFrame *pcFrameIn = pFrameIn;
    uint8_t *sample_buf;
    int nb_samples_out, sample_buf_size;

    //fprintf(stderr, "w=%i h=%i pf=%i \n", pFrameIn->width, pFrameIn->height, avcodec_pix_fmt_to_codec_tag(pFrameIn->format));
    //fprintf(stderr, "w=%i h=%i pf=%i \n", cc_out->width, cc_out->height, avcodec_pix_fmt_to_codec_tag(cc_out->pix_fmt));
    if (aud_convert_ctx == NULL) {
        aud_convert_ctx = swr_alloc_set_opts(aud_convert_ctx,
        cc_out->channel_layout, cc_out->sample_fmt, cc_out->sample_rate,
        cc_in->channel_layout, cc_in->sample_fmt, cc_in->sample_rate,
        0, NULL);
        if (aud_convert_ctx == NULL) {
            fprintf(stderr, "Cannot initialize the audio conversion context\n");
            return -1;

    /* test!! */
    if (cc_out->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
        nb_samples_out = pFrameIn->nb_samples;
        nb_samples_out = cc_out->frame_size;
    sample_buf_size = av_samples_get_buffer_size(NULL, cc_out->channels, nb_samples_out, cc_out->sample_fmt, 1);
    sample_buf = av_malloc(sample_buf_size);
    swr_convert(aud_convert_ctx, sample_buf, nb_samples_out, (const uint8_t **)pFrameIn->data, pFrameIn->nb_samples);
    pFrameOut->nb_samples = nb_samples_out;  pFrameOut->format = cc_out->sample_fmt;
    avcodec_fill_audio_frame(pFrameOut, cc_out->channels, cc_out->sample_fmt,
        sample_buf, sample_buf_size, 1);
    return 0;

static int transcode_a(AVPacket *pkt_in, MFInfo *mfiIn, MFInfo *mfiOut) {
    AVCodecContext *cc_in = mfiIn->pCodecContA;
    AVCodecContext *cc_out = mfiOut->pStreamA->codec;
    AVFrame *pFrameA, *pFrameOut;
    AVPacket pkt_out;
    int i, got_frame, got_packet;
    int64_t new_pts = pkt_in->pts;

    pFrameA = avcodec_alloc_frame();
    pFrameOut = avcodec_alloc_frame();
    pkt_out.data = NULL;
    pkt_out.size = 0;

    // Decode from packet to Frame
    i = avcodec_decode_audio4(cc_in, pFrameA, &got_frame, pkt_in);
    if (i<0 || got_frame==0) {
      fprintf(stderr, "Audio decode error ret=%i got=%i \n", i, got_frame);
      return -1;
    resample_frame(pFrameA, pFrameOut, cc_in, cc_out);

    // Calculate PTS
    pFrameOut->pts = mfiOut->iFrameCountA;

    // Encode from Frame to packet
    i = avcodec_encode_audio2(cc_out, &pkt_out, pFrameOut, &got_packet);
    if (i<0 || got_packet==0) {
      fprintf(stderr, "Audio encode error ret=%i got=%i \n", i, got_packet);
      return -1;

    // Correct DTS
    pkt_out.dts = new_pts;
    pkt_out.pts = new_pts;
    //pkt_out.dts = pFrameOut->pts;
    //pkt_out.pts = pFrameOut->pts;
    pkt_out.stream_index = mfiOut->iStreamA;

    // Clear pFrameOut
    fprintf(stderr, "out pkt A idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts);

    // write the compressed packet in the media file
    i = av_interleaved_write_frame(mfiOut->pFormatCont, &pkt_out);
    if (i<0) {
      fprintf(stderr, "Audio stream write error ret=%i \n", i);
      return -1;
    return 0;

static int rescale_frame(AVFrame *pFrameIn, AVFrame *pFrameOut, AVCodecContext *cc_out) {
    static struct SwsContext *img_convert_ctx;
    AVCodecContext *cc = pFrameIn->owner;
    const AVFrame *pcFrameIn = pFrameIn;

    //fprintf(stderr, "w=%i h=%i pf=%i \n", pFrameIn->width, pFrameIn->height, avcodec_pix_fmt_to_codec_tag(pFrameIn->format));
    //fprintf(stderr, "w=%i h=%i pf=%i \n", cc_out->width, cc_out->height, avcodec_pix_fmt_to_codec_tag(cc_out->pix_fmt));
    if (img_convert_ctx == NULL) {
        img_convert_ctx = sws_getContext(pFrameIn->width, pFrameIn->height, pFrameIn->format,
                                          cc_out->width, cc_out->height, cc_out->pix_fmt,
                                          SWS_BICUBIC, NULL, NULL, NULL);
        if (img_convert_ctx == NULL) {
            fprintf(stderr, "Cannot initialize the conversion context\n");
            return -1;
    avpicture_alloc((AVPicture*)pFrameOut, cc_out->pix_fmt,
                      cc_out->width, cc_out->height);
    sws_scale(img_convert_ctx, (const uint8_t * const*)pcFrameIn->data, pcFrameIn->linesize,
              0, cc_out->height, pFrameOut->data, pFrameOut->linesize);
    return 0;

static int transcode_v(AVPacket *pkt_in, MFInfo *mfiIn, MFInfo *mfiOut) {
    AVCodecContext *cc_in = mfiIn->pCodecContV;

    AVCodecContext *cc_out = mfiOut->pStreamV->codec;
    AVFrame *pFrameV, *pFrameOut;

    AVPacket pkt_out;
    int i, got_frame, got_packet;
    int64_t new_pts = mfiOut->iFrameCountV;
    if (pkt_in != NULL) {
        // Decode from packet to Frame
        i = avcodec_decode_video2(cc_in, pFrameV, &got_frame, pkt_in);
        if (i<0 || got_frame==0) {
            fprintf(stderr, "Video decode error ret=%i got=%i \n", i, got_frame);
            return -1;
        //fprintf(stderr, "frame V type=%i format=%i \n", pFrameV->type, pFrameV->format);
        // Rescale image to another frame
        rescale_frame(pFrameV, pFrameOut, cc_out);
        // Set PTS
        pFrameOut->pts = new_pts;
    } else {
    pkt_out.data = NULL;
    pkt_out.size = 0;

    // Encode from Frame to packet
    i = avcodec_encode_video2(cc_out, &pkt_out, pFrameOut, &got_packet);
    if (i<0 ) {
        fprintf(stderr, "Video encode error ret=%i got=%i \n", i, got_packet);
        fprintf(stderr, "out pkt V idx=%i dur=%i dts=%i64 pts=%i64 \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts);
        return -1;
    // Correct DTS
    //new_pts = av_opt_ptr(avcodec_get_frame_class(), pFrameOut, "best_effort_timestamp");
    pkt_out.dts = new_pts;
    pkt_out.pts = new_pts;
    pkt_out.stream_index = mfiOut->iStreamV;

    // Clear pFrameOut

    //pkt_out.dts = pkt_out.pts = AV_NOPTS_VALUE;
    //correct_pts(mfiOutput.pStreamV, mfiOutput.pCodecContV, &pkt);
    fprintf(stderr, "out pkt V idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts);
    //fprintf(stderr, "out pkt V idx=%i dur=%i \n", pkt_out.stream_index, pkt_out.duration);

    if (i == 0) {
        if (got_packet == 1) {
            // write the compressed packet in the media file
            i = av_interleaved_write_frame(mfiOut->pFormatCont, &pkt_out);
            if (i<0) {
                fprintf(stderr, "Video stream write error ret=%i \n", i);
                return -1;
        } else {
            return 1;
    } else {
        return -1;
    return 0;

/* main */
int main(int argc, char **argv) {
    const char *filename;
    AVOutputFormat *fmt;
    AVFormatContext *fc;
    AVStream *audio_st, *video_st;
    int i, got_frame, got_packet;
    MFInfo mfiInput, mfiOutput;
    AVFrame *pFrameA, *pFrameV;
    AVPacket pkt, pktA, pktV, *ppkt;

    if (argc != 3) {
        printf("usage: %s input_file.flv output_file\n"
                "\n", argv[0]);
        return 1;
    mfiInput.filename = argv[1];
    mfiOutput.filename = argv[2];

    /* initialize libavcodec, and register all codecs and formats */

    /* read input mediafile info */
    if (open_mediafile(&mfiInput) !=0) return 1;

    /* set output mediafile parameters */
    mfiOutput.iPictH = mfiInput.iPictH;
    mfiOutput.iPictW = mfiInput.iPictW;
    mfiOutput.iBitRateA = 64000;
    mfiOutput.iBitRateV = 400000;
    //mfiOutput.iSampleRateA = 44100;
    mfiOutput.iSampleRateA = mfiInput.iSampleRateA;
    mfiOutput.iFrameCountV = 1;
    mfiOutput.iFrameCountA = 1;

    /* allocate output mediafile */
    if (create_output_mediafile(&mfiOutput) !=0) return 1;

    /* allocate frame buffers */
    av_new_packet(&pkt, 0);
    //av_new_packet(&pktA, 0);
    //av_new_packet(&pktV, 0);

    /* Read frames from input and save write frames to output */
    for(;;) {
        //fprintf(stderr, "frame %i\n", i);
        // Read next frame into packet
        if (av_read_frame(mfiInput.pFormatCont, &pkt) !=0) break;
        fprintf(stderr, "in  pkt idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt.stream_index, pkt.duration, pkt.dts, pkt.pts);
        // Check frame type for AUDIO (from what stream he come) and transcode
        if (pkt.stream_index == mfiInput.iStreamA) {
            if (transcode_a(&pkt, &mfiInput, &mfiOutput) != 0) return 1;
        // Check frame type for VIDEO (from what stream he come) and transcode
        if (pkt.stream_index == mfiInput.iStreamV) {
            //if (transcode_v(&pkt, &mfiInput, &mfiOutput) < 0) return 1;
        // write the compressed packet in the media file
        //av_interleaved_write_frame(mfiOutput.pFormatCont, &pkt);
        //if (i>10) return 1;
    // Write last frames
    while (transcode_v(NULL, &mfiInput, &mfiOutput) == 0) {
    /* write the trailer, if any.  the trailer must be written
      * before you close the CodecContexts open when you wrote the
      * header; otherwise write_trailer may try to use memory that
      * was freed on av_codec_close() */
    fprintf(stderr, "av_write_trailer \n");
    return 0;
