如何用ffmpeg編寫一個顯式控制關鍵幀位置的視頻編碼器？

我想編寫一個ffmpeg編碼器，它可以將iFrame（關鍵幀）放在我想要的位置。我在哪裏可以找到教程或參考資料？如何用ffmpeg編寫一個顯式控制關鍵幀位置的視頻編碼器？

P.S
是否有可能用mencoder或任何開源編碼器做到這一點。我想編碼H263文件。我正在編寫用於linux的&。

2010-02-18 SunnyShah

..What語言？ – Shoban 2010-02-18 14:40:21

我可以在C，C++編寫，但教程或參考可以是任何一種語言。我只是想知道/瞭解ffmpeg的API。 – SunnyShah 2010-02-18 14:44:12

您需要查看libavcodec文檔 - 特別是在avcodec_encode_video（）。我發現最好的文檔位於ffmpeg頭文件和ffmpeg源提供的API示例源代碼中。具體來說，看看libavcodec/api-example.c甚至ffmpeg.c。

要強制I幀，您需要將您正在編碼的圖片的pict_type成員設置爲1：1是I幀，2是P幀，並且我不記得代碼是什麼對於一個B框架我的頭頂...此外，key_frame成員需要設置爲1.

一些介紹性材料可用here和here，但我真的不知道它有多好。

您需要小心如何分配API調用所需的框架對象。在我看來，api-example.c是你最好的選擇。查找函數video_encode_example（） - 它很簡潔，並且說明了所有需要擔心的重要事情 - 特別注意對傳遞NULL圖片參數的avcodec_encode_video（）的第二次調用 - 它需要獲取最後一幀視頻MPEG視頻按順序編碼，最終可能會延遲幾幀。

來源

2010-02-24 18:02:44

順便說一句，圖片的'pict_type'成員的值是'AV_PICTURE_TYPE_I'，'AV_PICTURE_TYPE_P'，'AV_PICTURE_TYPE_B'，等等...... – 2011-07-25 13:06:00

GStreamer有decent documentation，先後爲number of languages綁定（雖然本機API爲C），並支持所有的視頻格式，你可以找到插件，包括通過gstreamer-ffmpeg H.263。

來源

2010-02-23 08:58:19

您將需要libavcodec庫，對於第一步，我認爲您可以瞭解它在ffmpeg源代碼中的ffplay.c文件中的使用。它會告訴你很多。你也可以在rtstegvideo.sourceforge.net上查看我的項目。

希望得到這個幫助。

來源

2010-03-17 09:48:41 deddihp

如果您是Java程序員，則使用Xuggler。

來源

2011-12-04 19:49:10

一個上行最新版本API-example.c的可以在http://ffmpeg.org/doxygen/trunk/doc_2examples_2decoding_encoding_8c-example.html

它執行了全部的視頻編碼在單個和相對短的功能被發現。所以這可能是一個很好的開始。編譯並運行它。然後開始修改它，直到它達到你想要的。

它還具有音頻編碼和音頻&視頻解碼示例。

來源

2012-03-18 09:37:54 bcmpinc

最好是查看最新版本的官方文檔：http://ffmpeg.org/doxygen/trunk/doc_2examples_2decoding_encoding_8c-example.html – 2013-12-27 10:25:55

上的FFmpeg 2.7

基於Ori Pessach's答案最小可運行的例子中，下面是產生形式的幀的最小例子。

控制幀類型代碼的關鍵部分是：

c = avcodec_alloc_context3(codec); 
/* Minimal distance of I-frames. This is the maximum value allowed, 
or else we get a warning at runtime. */ 
c->keyint_min = 600; 
/* Or else it defaults to 0 b-frames are not allowed. */ 
c->max_b_frames = 1;

和：

frame->key_frame = 0; 
switch (frame->pts % 4) { 
    case 0: 
     frame->key_frame = 1; 
     frame->pict_type = AV_PICTURE_TYPE_I; 
    break; 
    case 1: 
    case 3: 
     frame->pict_type = AV_PICTURE_TYPE_P; 
    break; 
    case 2: 
     frame->pict_type = AV_PICTURE_TYPE_B; 
    break; 
}

然後，我們可以驗證與幀類型：截至提到

ffprobe -select_streams v \ 
    -show_frames \ 
    -show_entries frame=pict_type \ 
    -of csv \ 
    tmp.h264

：https://superuser.com/questions/885452/extracting-the-index-of-key-frames-from-a-video-using-ffmpeg

有些規則是由FFmpeg的執行，即使我設法克服：

所述第一幀是一個I幀
不能PLAC e在I幀之前的B0幀（TODO爲什麼？）

Preview of generated output。

#include <libavcodec/avcodec.h> 
#include <libavutil/imgutils.h> 
#include <libavutil/opt.h> 
#include <libswscale/swscale.h> 

static AVCodecContext *c = NULL; 
static AVFrame *frame; 
static AVPacket pkt; 
static FILE *file; 
struct SwsContext *sws_context = NULL; 

/* 
Convert RGB24 array to YUV. Save directly to the `frame`, 
modifying its `data` and `linesize` fields 
*/ 
static void ffmpeg_encoder_set_frame_yuv_from_rgb(uint8_t *rgb) { 
    const int in_linesize[1] = { 3 * c->width }; 
    sws_context = sws_getCachedContext(sws_context, 
      c->width, c->height, AV_PIX_FMT_RGB24, 
      c->width, c->height, AV_PIX_FMT_YUV420P, 
      0, 0, 0, 0); 
    sws_scale(sws_context, (const uint8_t * const *)&rgb, in_linesize, 0, 
      c->height, frame->data, frame->linesize); 
} 

/* 
Generate 2 different images with four colored rectangles, each 25 frames long: 

Image 1: 

    black | red 
    ------+----- 
    green | blue 

Image 2: 

    yellow | red 
    -------+----- 
    green | white 
*/ 
uint8_t* generate_rgb(int width, int height, int pts, uint8_t *rgb) { 
    int x, y, cur; 
    rgb = realloc(rgb, 3 * sizeof(uint8_t) * height * width); 
    for (y = 0; y < height; y++) { 
     for (x = 0; x < width; x++) { 
      cur = 3 * (y * width + x); 
      rgb[cur + 0] = 0; 
      rgb[cur + 1] = 0; 
      rgb[cur + 2] = 0; 
      if ((frame->pts/25) % 2 == 0) { 
       if (y < height/2) { 
        if (x < width/2) { 
         /* Black. */ 
        } else { 
         rgb[cur + 0] = 255; 
        } 
       } else { 
        if (x < width/2) { 
         rgb[cur + 1] = 255; 
        } else { 
         rgb[cur + 2] = 255; 
        } 
       } 
      } else { 
       if (y < height/2) { 
        rgb[cur + 0] = 255; 
        if (x < width/2) { 
         rgb[cur + 1] = 255; 
        } else { 
         rgb[cur + 2] = 255; 
        } 
       } else { 
        if (x < width/2) { 
         rgb[cur + 1] = 255; 
         rgb[cur + 2] = 255; 
        } else { 
         rgb[cur + 0] = 255; 
         rgb[cur + 1] = 255; 
         rgb[cur + 2] = 255; 
        } 
       } 
      } 
     } 
    } 
    return rgb; 
} 

/* Allocate resources and write header data to the output file. */ 
void ffmpeg_encoder_start(const char *filename, int codec_id, int fps, int width, int height) { 
    AVCodec *codec; 
    int ret; 
    codec = avcodec_find_encoder(codec_id); 
    if (!codec) { 
     fprintf(stderr, "Codec not found\n"); 
     exit(1); 
    } 
    c = avcodec_alloc_context3(codec); 
    if (!c) { 
     fprintf(stderr, "Could not allocate video codec context\n"); 
     exit(1); 
    } 
    c->bit_rate = 400000; 
    c->width = width; 
    c->height = height; 
    c->time_base.num = 1; 
    c->time_base.den = fps; 
    /* I, P, B frame placement parameters. */ 
    c->gop_size = 600; 
    c->max_b_frames = 1; 
    c->keyint_min = 600; 
    c->pix_fmt = AV_PIX_FMT_YUV420P; 
    if (codec_id == AV_CODEC_ID_H264) 
     av_opt_set(c->priv_data, "preset", "slow", 0); 
    if (avcodec_open2(c, codec, NULL) < 0) { 
     fprintf(stderr, "Could not open codec\n"); 
     exit(1); 
    } 
    file = fopen(filename, "wb"); 
    if (!file) { 
     fprintf(stderr, "Could not open %s\n", filename); 
     exit(1); 
    } 
    frame = av_frame_alloc(); 
    if (!frame) { 
     fprintf(stderr, "Could not allocate video frame\n"); 
     exit(1); 
    } 
    frame->format = c->pix_fmt; 
    frame->width = c->width; 
    frame->height = c->height; 
    ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height, c->pix_fmt, 32); 
    if (ret < 0) { 
     fprintf(stderr, "Could not allocate raw picture buffer\n"); 
     exit(1); 
    } 
} 

/* 
Write trailing data to the output file 
and free resources allocated by ffmpeg_encoder_start. 
*/ 
void ffmpeg_encoder_finish(void) { 
    uint8_t endcode[] = { 0, 0, 1, 0xb7 }; 
    int got_output, ret; 
    do { 
     fflush(stdout); 
     ret = avcodec_encode_video2(c, &pkt, NULL, &got_output); 
     if (ret < 0) { 
      fprintf(stderr, "Error encoding frame\n"); 
      exit(1); 
     } 
     if (got_output) { 
      fwrite(pkt.data, 1, pkt.size, file); 
      av_packet_unref(&pkt); 
     } 
    } while (got_output); 
    fwrite(endcode, 1, sizeof(endcode), file); 
    fclose(file); 
    avcodec_close(c); 
    av_free(c); 
    av_freep(&frame->data[0]); 
    av_frame_free(&frame); 
} 

/* 
Encode one frame from an RGB24 input and save it to the output file. 
Must be called after ffmpeg_encoder_start, and ffmpeg_encoder_finish 
must be called after the last call to this function. 
*/ 
void ffmpeg_encoder_encode_frame(uint8_t *rgb) { 
    int ret, got_output; 
    ffmpeg_encoder_set_frame_yuv_from_rgb(rgb); 
    av_init_packet(&pkt); 
    pkt.data = NULL; 
    pkt.size = 0; 
    switch (frame->pts % 4) { 
     case 0: 
      frame->key_frame = 1; 
      frame->pict_type = AV_PICTURE_TYPE_I; 
     break; 
     case 1: 
     case 3: 
      frame->key_frame = 0; 
      frame->pict_type = AV_PICTURE_TYPE_P; 
     break; 
     case 2: 
      frame->key_frame = 0; 
      frame->pict_type = AV_PICTURE_TYPE_B; 
     break; 
    } 
    ret = avcodec_encode_video2(c, &pkt, frame, &got_output); 
    if (ret < 0) { 
     fprintf(stderr, "Error encoding frame\n"); 
     exit(1); 
    } 
    if (got_output) { 
     fwrite(pkt.data, 1, pkt.size, file); 
     av_packet_unref(&pkt); 
    } 
} 

/* Represents the main loop of an application which generates one frame per loop. */ 
static void encode_example(const char *filename, int codec_id) { 
    int pts; 
    int width = 320; 
    int height = 240; 
    uint8_t *rgb = NULL; 
    ffmpeg_encoder_start(filename, codec_id, 25, width, height); 
    for (pts = 0; pts < 100; pts++) { 
     frame->pts = pts; 
     rgb = generate_rgb(width, height, pts, rgb); 
     ffmpeg_encoder_encode_frame(rgb); 
    } 
    ffmpeg_encoder_finish(); 
} 

int main(void) { 
    avcodec_register_all(); 
    encode_example("tmp.h264", AV_CODEC_ID_H264); 
    encode_example("tmp.mpg", AV_CODEC_ID_MPEG1VIDEO); 
    /* TODO: is this encoded correctly? Possible to view it without container? */ 
    /*encode_example("tmp.vp8", AV_CODEC_ID_VP8);*/ 
    return 0; 
}

在Ubuntu 15.10上測試。 GitHub upstream。

你真的要做到這一點？

在大多數情況下，你最好只是控制AVCodecContext全局參數。

如果新幀與前一幀完全不同，FFmpeg可以完成像使用關鍵幀這樣的智能功能，而差分編碼則不會獲得太多效果。

例如，如果我們設置只是：

c->keyint_min = 600;

然後我們得到在上面的例子中，由於存在於所生成的視頻4個突然變化幀這是合乎邏輯正好是4的關鍵幀。

來源

2016-04-04 21:10:00

如何用ffmpeg編寫一個顯式控制關鍵幀位置的視頻編碼器？

回答

相關問題