FFMPEG-based video converter

/* Employee trial solution for Inventos
Need to write program for Linux with libavcodec/libavformat, that takes two filenames for input and output files.
Input file in .flv format must be transcoded to output file .mp4 with h264/aac codecs */
// #include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "libavutil/mathematics.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"

/* 5 seconds stream duration */
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT PIX_FMT_YUV420P /* default pix_fmt */

/* mediafile info */
typedef struct {
char *filename;
AVFormatContext *pFormatCont;
int iStreamA, iStreamV;
AVCodecContext *pCodecContA, *pCodecContV;
AVCodec *pCodecA, *pCodecV;
AVStream *pStreamA, *pStreamV;
int iPictH, iPictW, iBitRateA, iBitRateV, iSampleRateA;
int iFrameCountA, iFrameCountV;
} MFInfo;

/* add an audio output stream */
static int add_audio_stream(MFInfo *mfi, enum CodecID codec_id) {
AVFormatContext *fc = mfi->pFormatCont;
AVCodec *codec = NULL;
AVStream *st = NULL;
AVCodecContext *cc = NULL;
/* find the audio encoder */
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "audio codec not found\n");
return -1;
}
/* allocate audio stream */
st = avformat_new_stream(fc, codec);
if (!st) {
fprintf(stderr, "Could not alloc audio stream\n");
return -1;
}
st->id = 1;
cc = st->codec;
avcodec_get_context_defaults3(cc, codec);
cc->codec = codec;
cc->codec_id = codec_id;
/* put sample parameters */
cc->sample_fmt = AV_SAMPLE_FMT_S16;
cc->bit_rate = mfi->iBitRateA;
cc->sample_rate = mfi->iSampleRateA;
cc->channels = 2;
// some formats want stream headers to be separate
if (fc->oformat->flags & AVFMT_GLOBALHEADER)
cc->flags |= CODEC_FLAG_GLOBAL_HEADER;
/* open codec */
if (avcodec_open2(cc, NULL, NULL) < 0) {
fprintf(stderr, "could not open audio codec \n");
return -1;
}
mfi->pStreamA = st;
mfi->pCodecContA = cc;
mfi->pCodecA = codec;
mfi->iStreamA = st->index;
return 0;
}

/**************************************************************/
/* video output */
static AVFrame *picture, *tmp_picture;
static uint8_t *video_outbuf;
static int frame_count, video_outbuf_size;

/* add a video output stream */
static int add_video_stream(MFInfo *mfi, enum CodecID codec_id) {
AVFormatContext *fc = mfi->pFormatCont;
AVCodecContext *cc = NULL;
AVStream *st = NULL;
AVCodec *codec = NULL;
/* find the video encoder */
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "video codec not found\n");
return -1;
}

/* allocate video stream */
st = avformat_new_stream(fc, codec);
if (!st) {
fprintf(stderr, "Could not alloc video stream\n");
return -1;
}
cc = st->codec;
avcodec_get_context_defaults3(cc, codec);
cc->codec = codec;
cc->codec_id = codec_id;

/* put sample parameters */
cc->bit_rate = mfi->iBitRateV;
/* resolution must be a multiple of two */
cc->width = mfi->iPictW;
cc->height = mfi->iPictH;

/* time base: this is the fundamental unit of time (in seconds) in terms
of which frame timestamps are represented. for fixed-fps content,
timebase should be 1/framerate and timestamp increments should be
identically 1. */
cc->time_base.den = STREAM_FRAME_RATE;
cc->time_base.num = 1;
cc->gop_size = 12; /* emit one intra frame every twelve frames at most */
cc->pix_fmt = STREAM_PIX_FMT;

// some formats want stream headers to be separate
if (fc->oformat->flags & AVFMT_GLOBALHEADER)
cc->flags |= CODEC_FLAG_GLOBAL_HEADER;

/* open the video codec */
if (avcodec_open2(cc, NULL, NULL) < 0) {
fprintf(stderr, "could not open video codec\n");
return -1;
}
mfi->pStreamV = st; mfi->pCodecContV = cc;
mfi->pCodecV = codec;
mfi->iStreamV = st->index;
return 0;
}

/* open mediafile, get a/v streams and find codecs */
static int open_mediafile(MFInfo *mfi) {
int i;

AVFormatContext *pFormatCont = NULL;

// Open video file
mfi->pFormatCont = NULL;
if(avformat_open_input(&pFormatCont, mfi->filename, NULL, NULL)!=0) {
fprintf(stderr, "Could not open '%s'\n", mfi->filename);
return -1;
}

// Retrieve stream information
if(avformat_find_stream_info(pFormatCont, NULL)<0) {
fprintf(stderr, "Couldn't find stream information'\n");
return -1;
}

// Dump information about file onto standard error
av_dump_format(pFormatCont, 0, mfi->filename, 0);
// Find the first video and audio stream
mfi->iStreamA = -1;
mfi->iStreamV = -1;
for(i=0; i < pFormatCont->nb_streams; i++) {
if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO) {
mfi->iStreamV=i;
}
if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO) {
mfi->iStreamA=i;
}
}

if(mfi->iStreamV == -1) {
fprintf(stderr, "Didn't find a video stream\n");
return -1;
} else {
// Get pointer to video stream
mfi->pStreamV=pFormatCont->streams[mfi->iStreamV];
// Get a pointer to the codec context for the video stream
mfi->pCodecContV=mfi->pStreamV->codec;
// Find the decoder for the video stream
mfi->pCodecV=avcodec_find_decoder(mfi->pCodecContV->codec_id);
if(mfi->pCodecV==NULL) {
fprintf(stderr, "Video codec not found\n");
return -1;
}

// Open video codec
if(avcodec_open2(mfi->pCodecContV, mfi->pCodecV, NULL) < 0) {
fprintf(stderr, "Could not open video codec'\n");
return -1;
}

// Set mediafile properties
mfi->iBitRateV = mfi->pCodecContV->bit_rate;
mfi->iPictW = mfi->pCodecContV->width;
mfi->iPictH = mfi->pCodecContV->height;
}
if(mfi->iStreamA == -1) {
fprintf(stderr, "Didn't find a audio stream'\n");
return -1;
} else {
// Get pointer to audio stream
mfi->pStreamA=pFormatCont->streams[mfi->iStreamA];
// Get a pointer to the codec context for the audio stream
mfi->pCodecContA=mfi->pStreamA->codec;
// Find the decoder for the audio stream
mfi->pCodecA=avcodec_find_decoder(mfi->pCodecContA->codec_id);
if(mfi->pCodecA==NULL) {
fprintf(stderr, "Audio codec not found'\n");
return -1;
}
// Open audio codec
if(avcodec_open2(mfi->pCodecContA, mfi->pCodecA, NULL) < 0) {
fprintf(stderr, "Could not open audio codec'\n");
return -1;
}
// Set mediafile properties
mfi->iBitRateA = mfi->pCodecContA->bit_rate;
mfi->iSampleRateA = mfi->pCodecContA->sample_rate;
}
mfi->pFormatCont = pFormatCont;
return 0;
}

static int create_output_mediafile(MFInfo *mfi) {
AVFormatContext *fc = NULL;
AVOutputFormat *ofmt = NULL;

/* allocate the output media context */
avformat_alloc_output_context2(&fc, NULL, "mp4", mfi->filename);
if (!fc) {
return -1;
}
mfi->pFormatCont=fc;
//ofmt = fc->oformat;

/* add the audio and video streams and initialize the codecs */
if (add_video_stream(mfi, CODEC_ID_H264) != 0) return -1;
if (add_audio_stream(mfi, CODEC_ID_AAC) != 0) return -1;
av_dump_format(fc, 0, mfi->filename, 1);

/* open the output file, if needed */
if (!(fc->oformat->flags & AVFMT_NOFILE)) {
if (avio_open(&fc->pb, mfi->filename, AVIO_FLAG_WRITE) < 0) {
fprintf(stderr, "Could not open '%s'\n", mfi->filename);
return -1;
}
}

/* write the stream header, if any */
avformat_write_header(fc, NULL);
}

static void close_output_mediafile(MFInfo *mfi) {
int i;
AVFormatContext *fc = mfi->pFormatCont;
avformat_free_context(fc);
return;

/* !!!!!!!!!!!!!!!! */
/* close each codec */
avcodec_close(mfi->pCodecContA);
avcodec_close(mfi->pCodecContV);

/* free the streams */
for(i = 0; i < fc->nb_streams; i++) {
av_freep(&fc->streams[i]->codec);
av_freep(&fc->streams[i]);
}
if (!(fc->oformat->flags & AVFMT_NOFILE)) {
/* close the output file */
avio_close(fc->pb);
}

/* free the contect */
av_free(fc);
}

static void close_input_mediafile(MFInfo *mfi) {
AVFormatContext *fc = mfi->pFormatCont;

/* close each codec */
//avcodec_close(mfi->pCodecA);
//avcodec_close(mfi->pCodecV);
avformat_close_input(&fc);
}

static void dump_frame(AVFrame *pFrame) {
fprintf(stderr, "pts=%i pkt_pts=%i linesize[0]=%i \n", pFrame->pts, pFrame->pkt_pts, pFrame->linesize[0]);
fprintf(stderr, "nb_samples=%i format=%s \n", pFrame->nb_samples, av_get_sample_fmt_name(pFrame->format));
//fprintf(stderr, "nb_samples=%i format=%s \n", pFrame->nb_samples, av_get_sample_fmt_name(pFrame->format));
}

static int resample_frame(AVFrame *pFrameIn, AVFrame *pFrameOut, AVCodecContext *cc_in, AVCodecContext *cc_out) {
static struct SwrContext *aud_convert_ctx;
const AVFrame *pcFrameIn = pFrameIn;
uint8_t *sample_buf;
int nb_samples_out, sample_buf_size;

/* test!! */
if (cc_out->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
nb_samples_out = pFrameIn->nb_samples;
else
nb_samples_out = cc_out->frame_size;
sample_buf_size = av_samples_get_buffer_size(NULL, cc_out->channels, nb_samples_out, cc_out->sample_fmt, 1);
sample_buf = av_malloc(sample_buf_size);
swr_convert(aud_convert_ctx, sample_buf, nb_samples_out, (const uint8_t **)pFrameIn->data, pFrameIn->nb_samples);
pFrameOut->nb_samples = nb_samples_out; pFrameOut->format = cc_out->sample_fmt;
avcodec_fill_audio_frame(pFrameOut, cc_out->channels, cc_out->sample_fmt,
sample_buf, sample_buf_size, 1);
dump_frame(pFrameOut);
return 0;
}

static int transcode_a(AVPacket *pkt_in, MFInfo *mfiIn, MFInfo *mfiOut) {
AVCodecContext *cc_in = mfiIn->pCodecContA;
AVCodecContext *cc_out = mfiOut->pStreamA->codec;
AVFrame *pFrameA, *pFrameOut;
AVPacket pkt_out;
int i, got_frame, got_packet;
int64_t new_pts = pkt_in->pts;

pFrameA = avcodec_alloc_frame();
pFrameOut = avcodec_alloc_frame();
av_init_packet(&pkt_out);
pkt_out.data = NULL;
pkt_out.size = 0;

// Decode from packet to Frame
i = avcodec_decode_audio4(cc_in, pFrameA, &got_frame, pkt_in);
if (i<0 || got_frame==0) {
fprintf(stderr, "Audio decode error ret=%i got=%i \n", i, got_frame);
return -1;
}
dump_frame(pFrameA);
resample_frame(pFrameA, pFrameOut, cc_in, cc_out);

// Calculate PTS
pFrameOut->pts = mfiOut->iFrameCountA;
//mfiOut->iFrameCountA++;
dump_frame(pFrameOut);

// Encode from Frame to packet
i = avcodec_encode_audio2(cc_out, &pkt_out, pFrameOut, &got_packet);
if (i<0 || got_packet==0) {
fprintf(stderr, "Audio encode error ret=%i got=%i \n", i, got_packet);
return -1;
}

    // Correct DTS
pkt_out.dts = new_pts;
pkt_out.pts = new_pts;
//pkt_out.dts = pFrameOut->pts;
  //pkt_out.pts = pFrameOut->pts;
  pkt_out.stream_index = mfiOut->iStreamA;

// Clear pFrameOut
av_free(pFrameOut);
fprintf(stderr, "out pkt A idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts);

// write the compressed packet in the media file
i = av_interleaved_write_frame(mfiOut->pFormatCont, &pkt_out);
if (i<0) {
fprintf(stderr, "Audio stream write error ret=%i \n", i);
return -1;
}
return 0;
}

static int rescale_frame(AVFrame *pFrameIn, AVFrame *pFrameOut, AVCodecContext *cc_out) {
static struct SwsContext *img_convert_ctx;
AVCodecContext *cc = pFrameIn->owner;
const AVFrame *pcFrameIn = pFrameIn;

//fprintf(stderr, "w=%i h=%i pf=%i \n", pFrameIn->width, pFrameIn->height, avcodec_pix_fmt_to_codec_tag(pFrameIn->format));
//fprintf(stderr, "w=%i h=%i pf=%i \n", cc_out->width, cc_out->height, avcodec_pix_fmt_to_codec_tag(cc_out->pix_fmt));
if (img_convert_ctx == NULL) {
img_convert_ctx = sws_getContext(pFrameIn->width, pFrameIn->height, pFrameIn->format,
cc_out->width, cc_out->height, cc_out->pix_fmt,
SWS_BICUBIC, NULL, NULL, NULL);
if (img_convert_ctx == NULL) {
fprintf(stderr, "Cannot initialize the conversion context\n");
return -1;
}
}
avcodec_get_frame_defaults(pFrameOut);
avpicture_alloc((AVPicture*)pFrameOut, cc_out->pix_fmt,
cc_out->width, cc_out->height);
sws_scale(img_convert_ctx, (const uint8_t * const*)pcFrameIn->data, pcFrameIn->linesize,
0, cc_out->height, pFrameOut->data, pFrameOut->linesize);
return 0;
}

static int transcode_v(AVPacket *pkt_in, MFInfo *mfiIn, MFInfo *mfiOut) {
AVCodecContext *cc_in = mfiIn->pCodecContV;

AVCodecContext *cc_out = mfiOut->pStreamV->codec;
AVFrame *pFrameV, *pFrameOut;

    AVPacket pkt_out;
int i, got_frame, got_packet;
int64_t new_pts = mfiOut->iFrameCountV;
if (pkt_in != NULL) {
pFrameV=avcodec_alloc_frame();
pFrameOut=avcodec_alloc_frame();
// Decode from packet to Frame
    i = avcodec_decode_video2(cc_in, pFrameV, &got_frame, pkt_in);
if (i<0 || got_frame==0) {
fprintf(stderr, "Video decode error ret=%i got=%i \n", i, got_frame);
return -1;
}
//fprintf(stderr, "frame V type=%i format=%i \n", pFrameV->type, pFrameV->format);
// Rescale image to another frame
rescale_frame(pFrameV, pFrameOut, cc_out);
// Set PTS
      pFrameOut->pts = new_pts;
} else {
pFrameOut=NULL;
}
av_init_packet(&pkt_out);
pkt_out.data = NULL;
pkt_out.size = 0;

    // Encode from Frame to packet
  i = avcodec_encode_video2(cc_out, &pkt_out, pFrameOut, &got_packet);
if (i<0 ) {
fprintf(stderr, "Video encode error ret=%i got=%i \n", i, got_packet);
fprintf(stderr, "out pkt V idx=%i dur=%i dts=%i64 pts=%i64 \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts);
return -1;
}
// Correct DTS
  //new_pts = av_opt_ptr(avcodec_get_frame_class(), pFrameOut, "best_effort_timestamp");
pkt_out.dts = new_pts;
pkt_out.pts = new_pts;
pkt_out.stream_index = mfiOut->iStreamV;

// Clear pFrameOut
av_free(pFrameOut);

//pkt_out.dts = pkt_out.pts = AV_NOPTS_VALUE;
//correct_pts(mfiOutput.pStreamV, mfiOutput.pCodecContV, &pkt);
fprintf(stderr, "out pkt V idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt_out.stream_index, pkt_out.duration, pkt_out.dts, pkt_out.pts);
//fprintf(stderr, "out pkt V idx=%i dur=%i \n", pkt_out.stream_index, pkt_out.duration);

if (i == 0) {
if (got_packet == 1) {
// write the compressed packet in the media file
i = av_interleaved_write_frame(mfiOut->pFormatCont, &pkt_out);
if (i<0) {
fprintf(stderr, "Video stream write error ret=%i \n", i);
return -1;
}
} else {
return 1;
}
} else {
return -1;
}
return 0;
}

/**************************************************************/
/* main */
int main(int argc, char **argv) {
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *fc;
AVStream *audio_st, *video_st;
int i, got_frame, got_packet;
MFInfo mfiInput, mfiOutput;
AVFrame *pFrameA, *pFrameV;
AVPacket pkt, pktA, pktV, *ppkt;

if (argc != 3) {
printf("usage: %s input_file.flv output_file\n"
"\n", argv[0]);
return 1;
}
mfiInput.filename = argv[1];
mfiOutput.filename = argv[2];

/* initialize libavcodec, and register all codecs and formats */
av_register_all();

/* read input mediafile info */
if (open_mediafile(&mfiInput) !=0) return 1;

    /* set output mediafile parameters */
  mfiOutput.iPictH = mfiInput.iPictH;
mfiOutput.iPictW = mfiInput.iPictW;
mfiOutput.iBitRateA = 64000;
mfiOutput.iBitRateV = 400000;
//mfiOutput.iSampleRateA = 44100;
  mfiOutput.iSampleRateA = mfiInput.iSampleRateA;
mfiOutput.iFrameCountV = 1;
mfiOutput.iFrameCountA = 1;

/* allocate output mediafile */
if (create_output_mediafile(&mfiOutput) !=0) return 1;

/* allocate frame buffers */
//pFrameA=avcodec_alloc_frame();
//pFrameV=avcodec_alloc_frame();
av_new_packet(&pkt, 0);
//av_new_packet(&pktA, 0);
//av_new_packet(&pktV, 0);

    /* Read frames from input and save write frames to output */
i=0;
for(;;) {
i++;
//fprintf(stderr, "frame %i\n", i);
      // Read next frame into packet
      if (av_read_frame(mfiInput.pFormatCont, &pkt) !=0) break;
fprintf(stderr, "in pkt idx=%i dur=%i dts=%"PRIi64" pts=%"PRIi64" \n", pkt.stream_index, pkt.duration, pkt.dts, pkt.pts);
// Check frame type for AUDIO (from what stream he come) and transcode
    if (pkt.stream_index == mfiInput.iStreamA) {
if (transcode_a(&pkt, &mfiInput, &mfiOutput) != 0) return 1;
mfiOutput.iFrameCountA++;
}
// Check frame type for VIDEO (from what stream he come) and transcode
      if (pkt.stream_index == mfiInput.iStreamV) {
//if (transcode_v(&pkt, &mfiInput, &mfiOutput) < 0) return 1;
        mfiOutput.iFrameCountV++;
}
// write the compressed packet in the media file
//av_interleaved_write_frame(mfiOutput.pFormatCont, &pkt);
      av_free_packet(&pkt);
av_init_packet(&pkt);
//if (i>10) return 1;
}
// Write last frames
  while (transcode_v(NULL, &mfiInput, &mfiOutput) == 0) {
mfiOutput.iFrameCountV++;
}
/* write the trailer, if any. the trailer must be written
* before you close the CodecContexts open when you wrote the
* header; otherwise write_trailer may try to use memory that
* was freed on av_codec_close() */
fprintf(stderr, "av_write_trailer \n");
av_write_trailer(mfiOutput.pFormatCont);
close_output_mediafile(&mfiOutput);
close_input_mediafile(&mfiInput);
return 0;

}