28 #include <nvEncodeAPI.h>
40 #define CUDAAPI __stdcall
46 #define LOAD_FUNC(l, s) GetProcAddress(l, s)
47 #define DL_CLOSE_FUNC(l) FreeLibrary(l)
49 #define LOAD_FUNC(l, s) dlsym(l, s)
50 #define DL_CLOSE_FUNC(l) dlclose(l)
53 typedef enum cudaError_enum {
70 #if NVENCAPI_MAJOR_VERSION < 5
71 static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } };
176 mask = queue->
size - 1;
177 read_pos = (queue->
pos - queue->
count) & mask;
180 return &queue->
data[read_pos];
219 mask = queue->
size - 1;
222 queue->
pos = (queue->
pos + 1) & mask;
264 #define CHECK_LOAD_FUNC(t, f, s) \
266 (f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
268 av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
284 dl_fn->
cuda_lib = dlopen(
"libcuda.so", RTLD_LAZY);
321 #define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
325 int device_count = 0;
328 int smminor = 0, smmajor = 0;
353 for (i = 0; i < device_count; ++i) {
358 smver = (smmajor << 4) | smminor;
360 av_log(avctx,
AV_LOG_VERBOSE,
"[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= 0x30) ?
"Available" :
"Not Available");
383 NVENCSTATUS nvstatus;
395 if (
sizeof(
void*) == 8) {
401 dl_fn->
nvenc_lib = dlopen(
"libnvidia-encode.so.1", RTLD_LAZY);
411 if (!nvEncodeAPICreateInstance) {
416 dl_fn->
nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
418 nvstatus = nvEncodeAPICreateInstance(&dl_fn->
nvenc_funcs);
420 if (nvstatus != NV_ENC_SUCCESS) {
465 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
466 NV_ENC_PRESET_CONFIG preset_config = { 0 };
469 GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
470 NVENCSTATUS nv_status = NV_ENC_SUCCESS;
471 int surfaceCount = 0;
477 #if NVENCAPI_MAJOR_VERSION < 5
483 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
498 preset_config.version = NV_ENC_PRESET_CONFIG_VER;
499 preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
500 encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
501 encode_session_params.apiVersion = NVENCAPI_VERSION;
503 #if NVENCAPI_MAJOR_VERSION < 5
504 encode_session_params.clientKeyPtr = &license;
517 av_log(avctx,
AV_LOG_FATAL,
"Failed creating CUDA context for NVENC: 0x%x\n", (
int)cu_res);
530 encode_session_params.device = ctx->
cu_context;
531 encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
533 nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->
nvencoder);
534 if (nv_status != NV_ENC_SUCCESS) {
536 av_log(avctx,
AV_LOG_FATAL,
"OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (
int)nv_status);
542 if (!strcmp(ctx->
preset,
"hp")) {
543 encoder_preset = NV_ENC_PRESET_HP_GUID;
544 }
else if (!strcmp(ctx->
preset,
"hq")) {
545 encoder_preset = NV_ENC_PRESET_HQ_GUID;
546 }
else if (!strcmp(ctx->
preset,
"bd")) {
547 encoder_preset = NV_ENC_PRESET_BD_GUID;
548 }
else if (!strcmp(ctx->
preset,
"ll")) {
549 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
551 }
else if (!strcmp(ctx->
preset,
"llhp")) {
552 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
554 }
else if (!strcmp(ctx->
preset,
"llhq")) {
555 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
557 }
else if (!strcmp(ctx->
preset,
"default")) {
558 encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
560 av_log(avctx,
AV_LOG_FATAL,
"Preset \"%s\" is unknown! Supported presets: hp, hq, bd, ll, llhp, llhq, default\n", ctx->
preset);
566 nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->
nvencoder, NV_ENC_CODEC_H264_GUID, encoder_preset, &preset_config);
567 if (nv_status != NV_ENC_SUCCESS) {
592 if (avctx->
width == 720 &&
605 num_mbs = ((avctx->
width + 15) >> 4) * ((avctx->
height + 15) >> 4);
617 if (avctx->
refs >= 0) {
633 ctx->
encode_config.encodeCodecConfig.h264Config.idrPeriod = 1;
648 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
649 }
else if (ctx->
twopass == 1 || isLL) {
650 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
652 ctx->
encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
653 ctx->
encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
656 av_log(avctx,
AV_LOG_WARNING,
"Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n");
658 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
661 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
668 }
else if (avctx->
qmin >= 0 && avctx->
qmax >= 0) {
669 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
687 ctx->
encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
689 ctx->
encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
694 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
697 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
701 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
705 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
709 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
710 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
714 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->
color_trc;
722 if (nv_status != NV_ENC_SUCCESS) {
743 NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
744 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
745 allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
746 allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
748 allocSurf.width = (avctx->
width + 31) & ~31;
749 allocSurf.height = (avctx->
height + 31) & ~31;
751 allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
755 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
759 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
763 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
772 nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->
nvencoder, &allocSurf);
773 if (nv_status = NV_ENC_SUCCESS){
786 allocOut.size = 1024 * 1024;
788 allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
790 nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->
nvencoder, &allocOut);
791 if (nv_status = NV_ENC_SUCCESS) {
804 uint32_t outSize = 0;
806 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
807 payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
809 payload.spsppsBuffer = tmpHeader;
810 payload.inBufferSize =
sizeof(tmpHeader);
811 payload.outSPSPPSPayloadSize = &outSize;
813 nv_status = p_nvenc->nvEncGetSequenceParams(ctx->
nvencoder, &payload);
814 if (nv_status != NV_ENC_SUCCESS) {
827 memcpy(avctx->
extradata, tmpHeader, outSize);
840 for (i = 0; i < surfaceCount; ++i) {
847 p_nvenc->nvEncDestroyEncoder(ctx->
nvencoder);
866 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
879 p_nvenc->nvEncDestroyEncoder(ctx->
nvencoder);
896 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
898 uint32_t *slice_offsets =
av_mallocz(ctx->
encode_config.encodeCodecConfig.h264Config.sliceModeData *
sizeof(*slice_offsets));
899 NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
900 NVENCSTATUS nv_status;
906 lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
908 lock_params.doNotWait = 0;
910 lock_params.sliceOffsets = slice_offsets;
912 nv_status = p_nvenc->nvEncLockBitstream(ctx->
nvencoder, &lock_params);
913 if (nv_status != NV_ENC_SUCCESS) {
924 memcpy(pkt->
data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
927 if (nv_status != NV_ENC_SUCCESS)
928 av_log(avctx,
AV_LOG_ERROR,
"Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
930 switch (lock_params.pictureType) {
931 case NV_ENC_PIC_TYPE_IDR:
933 case NV_ENC_PIC_TYPE_I:
936 case NV_ENC_PIC_TYPE_P:
939 case NV_ENC_PIC_TYPE_B:
942 case NV_ENC_PIC_TYPE_BI:
946 av_log(avctx,
AV_LOG_ERROR,
"Unknown picture type encountered, expect the output to be broken.\n");
947 av_log(avctx,
AV_LOG_ERROR,
"Please report this error and include as much information on how to reproduce it as possible.\n");
952 pkt->
pts = lock_params.outputTimeStamp;
982 NVENCSTATUS nv_status;
988 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
990 NV_ENC_PIC_PARAMS pic_params = { 0 };
991 pic_params.version = NV_ENC_PIC_PARAMS_VER;
994 NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1008 lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1011 nv_status = p_nvenc->nvEncLockInputBuffer(ctx->
nvencoder, &lockBufferParams);
1012 if (nv_status != NV_ENC_SUCCESS) {
1018 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1024 buf += inSurf->
height * lockBufferParams.pitch;
1030 buf += (inSurf->
height * lockBufferParams.pitch) >> 2;
1036 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1042 buf += inSurf->
height * lockBufferParams.pitch;
1048 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1054 buf += inSurf->
height * lockBufferParams.pitch;
1060 buf += inSurf->
height * lockBufferParams.pitch;
1071 if (nv_status != NV_ENC_SUCCESS) {
1089 pic_params.bufferFmt = inSurf->
format;
1090 pic_params.inputWidth = avctx->
width;
1091 pic_params.inputHeight = avctx->
height;
1093 pic_params.completionEvent = 0;
1097 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
1099 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
1102 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
1105 pic_params.encodePicFlags = 0;
1106 pic_params.inputTimeStamp = frame->
pts;
1107 pic_params.inputDuration = 0;
1108 pic_params.codecPicParams.h264PicParams.sliceMode = ctx->
encode_config.encodeCodecConfig.h264Config.sliceMode;
1109 pic_params.codecPicParams.h264PicParams.sliceModeData = ctx->
encode_config.encodeCodecConfig.h264Config.sliceModeData;
1111 #if NVENCAPI_MAJOR_VERSION < 5
1112 memcpy(&pic_params.rcParams, &ctx->
encode_config.rcParams,
sizeof(NV_ENC_RC_PARAMS));
1120 pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
1123 nv_status = p_nvenc->nvEncEncodePicture(ctx->
nvencoder, &pic_params);
1125 if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT) {
1134 if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1139 if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1166 tmpoutsurf->
busy = 0;
1183 #define OFFSET(x) offsetof(NvencContext, x)
1184 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1186 {
"preset",
"Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)",
OFFSET(
preset),
AV_OPT_TYPE_STRING, { .str =
"hq" }, 0, 0,
VE },
1188 {
"2pass",
"Use 2pass cbr encoding mode (low latency mode only)",
OFFSET(twopass),
AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1,
VE },
1189 {
"gpu",
"Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
OFFSET(gpu),
AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX,
VE },
1220 .priv_class = &nvenc_class,
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size)
Check AVPacket size and/or allocate data.
static av_cold int nvenc_check_cuda(AVCodecContext *avctx)
NvencOutputSurface * surface
This structure describes decoded (raw) audio or video data.
ptrdiff_t const GLvoid * data
NvencDataList output_surface_queue
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
NvencInputSurface * input_surfaces
#define AV_LOG_WARNING
Something somehow does not look correct.
#define LIBAVUTIL_VERSION_INT
memory handling functions
AVFrame * coded_frame
the picture in the bitstream
static av_cold int init(AVCodecContext *avctx)
int max_b_frames
maximum number of B-frames between non-B-frames Note: The output will be delayed by max_b_frames+1 re...
NVENCSTATUS(NVENCAPI * PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList)
enum AVColorRange color_range
MPEG vs JPEG YUV range.
PCUCTXDESTROY cu_ctx_destroy
NV_ENCODE_API_FUNCTION_LIST nvenc_funcs
NvencDynLoadFunctions nvenc_dload_funcs
AVRational sample_aspect_ratio
sample aspect ratio (0 if unknown) That is the width of a pixel divided by the height of the pixel...
PCUDEVICEGETNAME cu_device_get_name
enum AVPixelFormat pix_fmt
Pixel format, see AV_PIX_FMT_xxx.
CUresult(CUDAAPI * PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev)
static const AVOption options[]
NvencDataList timestamp_list
#define FF_PROFILE_H264_MAIN
static const AVClass nvenc_class
AVRational time_base
This is the fundamental unit of time (in seconds) in terms of which frame timestamps are represented...
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
#define av_assert0(cond)
assert() equivalent, that is always enabled.
CUresult(CUDAAPI * PCUDEVICEGET)(CUdevice *device, int ordinal)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
#define CHECK_LOAD_FUNC(t, f, s)
PCUDEVICEGETCOUNT cu_device_get_count
#define CODEC_FLAG_GLOBAL_HEADER
Place global headers in extradata instead of every keyframe.
Multithreading support functions.
PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability
#define FF_PROFILE_UNKNOWN
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
NV_ENC_INITIALIZE_PARAMS init_encode_params
#define AV_LOG_VERBOSE
Detailed information.
#define check_cuda_errors(f)
NvencOutputSurface * output_surfaces
static av_cold int nvenc_encode_close(AVCodecContext *avctx)
#define AV_PKT_FLAG_KEY
The packet contains a keyframe.
NvencDataList output_surface_ready_queue
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
int has_b_frames
Size of the frame reordering buffer in the decoder.
static const uint16_t mask[17]
static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf)
#define CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
int qmax
maximum quantizer
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
PCUDEVICEGET cu_device_get
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
int rc_max_rate
maximum bitrate
simple assert() macros that are a bit more flexible than ISO C assert().
const char * name
Name of the codec implementation.
Libavcodec external API header.
int flags
A combination of AV_PKT_FLAG values.
int rc_buffer_size
decoder bitstream buffer size
int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max)
Reduce a fraction.
common internal API header
#define FF_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
int refs
number of reference frames
int bit_rate
the average bitrate
enum AVPictureType pict_type
Picture type of the frame.
int width
picture width / height.
enum AVColorPrimaries color_primaries
Chromaticity coordinates of the source primaries.
#define CODEC_FLAG_INTERLACED_DCT
Use interlaced DCT.
CUresult(CUDAAPI * PCUDEVICEGETCOUNT)(int *count)
int ticks_per_frame
For some codecs, the time base is closer to the field rate than the frame rate.
static int out_surf_queue_enqueue(NvencDataList *queue, NvencOutputSurface *surface)
CUresult(CUDAAPI * PCUDEVICEGETNAME)(char *name, int len, CUdevice dev)
the normal 2^n-1 "JPEG" YUV ranges
static av_cold int nvenc_encode_init(AVCodecContext *avctx)
static NvencOutputSurface * out_surf_queue_dequeue(NvencDataList *queue)
PCUCTXCREATE cu_ctx_create
NV_ENC_CONFIG encode_config
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
static av_cold void nvenc_unload_nvenc(AVCodecContext *avctx)
main external API structure.
int qmin
minimum quantizer
static int timestamp_queue_enqueue(NvencDataList *queue, int64_t timestamp)
static av_cold int nvenc_dyload_cuda(AVCodecContext *avctx)
Describe the class of an AVClass context structure.
enum AVColorSpace colorspace
YUV colorspace type.
enum AVColorTransferCharacteristic color_trc
Color Transfer Characteristic.
int(* func)(AVBPrint *dst, const char *in, const char *arg)
static int64_t timestamp_queue_dequeue(NvencDataList *queue)
int global_quality
Global quality for codecs which cannot change it per frame.
static av_cold int nvenc_dyload_nvenc(AVCodecContext *avctx)
static const AVCodecDefault nvenc_defaults[]
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
int gop_size
the number of pictures in a group of pictures, or 0 for intra_only
#define FF_PROFILE_H264_HIGH
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
common internal api header.
CUresult(CUDAAPI * PCUCTXDESTROY)(CUcontext ctx)
PCUCTXPOPCURRENT cu_ctx_pop_current
static const GUID dummy_license
CUdevice nvenc_devices[16]
static enum AVPixelFormat pix_fmts_nvenc[]
CUresult(CUDAAPI * PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev)
CUresult(CUDAAPI * PCUINIT)(unsigned int Flags)
int top_field_first
If the content is interlaced, is top field displayed first.
#define FF_PROFILE_H264_BASELINE
NvencInputSurface * input_surface
static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet)
NV_ENC_OUTPUT_PTR output_surface
int64_t dts
Decompression timestamp in AVStream->time_base units; the time at which the packet is decompressed...
#define AV_LOG_FATAL
Something went wrong and recovery is not possible.
static NvencData * data_queue_dequeue(NvencDataList *queue)
static const AVCodecDefault defaults[]
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
#define AVERROR_EXTERNAL
Generic error in an external library.
AVPixelFormat
Pixel format.
This structure stores compressed data.
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
CUresult(CUDAAPI * PCUCTXPOPCURRENT)(CUcontext *pctx)
#define AV_NOPTS_VALUE
Undefined timestamp value.
static int data_queue_enqueue(NvencDataList *queue, NvencData *data)