WebM Codec SDK
vp9_spatial_svc_encoder
1 /*
2  * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This is an example demonstrating how to implement a multi-layer
13  * VP9 encoding scheme based on spatial scalability for video applications
14  * that benefit from a scalable bitstream.
15  */
16 
17 #include <math.h>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 
23 
24 #include "../args.h"
25 #include "../tools_common.h"
26 #include "../video_writer.h"
27 
28 #include "../vpx_ports/vpx_timer.h"
29 #include "vpx/svc_context.h"
30 #include "vpx/vp8cx.h"
31 #include "vpx/vpx_encoder.h"
32 #include "../vpxstats.h"
33 #include "vp9/encoder/vp9_encoder.h"
34 #define OUTPUT_RC_STATS 1
35 
36 static const arg_def_t skip_frames_arg =
37  ARG_DEF("s", "skip-frames", 1, "input frames to skip");
38 static const arg_def_t frames_arg =
39  ARG_DEF("f", "frames", 1, "number of frames to encode");
40 static const arg_def_t threads_arg =
41  ARG_DEF("th", "threads", 1, "number of threads to use");
42 #if OUTPUT_RC_STATS
43 static const arg_def_t output_rc_stats_arg =
44  ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
45 #endif
46 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
47 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
48 static const arg_def_t timebase_arg =
49  ARG_DEF("t", "timebase", 1, "timebase (num/den)");
50 static const arg_def_t bitrate_arg = ARG_DEF(
51  "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
52 static const arg_def_t spatial_layers_arg =
53  ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
54 static const arg_def_t temporal_layers_arg =
55  ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
56 static const arg_def_t temporal_layering_mode_arg =
57  ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
58  "VP9E_TEMPORAL_LAYERING_MODE");
59 static const arg_def_t kf_dist_arg =
60  ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
61 static const arg_def_t scale_factors_arg =
62  ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
63 static const arg_def_t passes_arg =
64  ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
65 static const arg_def_t pass_arg =
66  ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
67 static const arg_def_t fpf_name_arg =
68  ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
69 static const arg_def_t min_q_arg =
70  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
71 static const arg_def_t max_q_arg =
72  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
73 static const arg_def_t min_bitrate_arg =
74  ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
75 static const arg_def_t max_bitrate_arg =
76  ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
77 static const arg_def_t lag_in_frame_arg =
78  ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
79  "generating any outputs");
80 static const arg_def_t rc_end_usage_arg =
81  ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
82 static const arg_def_t speed_arg =
83  ARG_DEF("sp", "speed", 1, "speed configuration");
84 static const arg_def_t aqmode_arg =
85  ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
86 
87 #if CONFIG_VP9_HIGHBITDEPTH
88 static const struct arg_enum_list bitdepth_enum[] = {
89  {"8", VPX_BITS_8},
90  {"10", VPX_BITS_10},
91  {"12", VPX_BITS_12},
92  {NULL, 0}
93 };
94 
95 static const arg_def_t bitdepth_arg =
96  ARG_DEF_ENUM("d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ",
97  bitdepth_enum);
98 #endif // CONFIG_VP9_HIGHBITDEPTH
99 
100 
101 static const arg_def_t *svc_args[] = {
102  &frames_arg, &width_arg, &height_arg,
103  &timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg,
104  &kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
105  &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
106  &max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
107  &lag_in_frame_arg, &threads_arg, &aqmode_arg,
108 #if OUTPUT_RC_STATS
109  &output_rc_stats_arg,
110 #endif
111 
112 #if CONFIG_VP9_HIGHBITDEPTH
113  &bitdepth_arg,
114 #endif
115  &speed_arg,
116  &rc_end_usage_arg, NULL
117 };
118 
119 static const uint32_t default_frames_to_skip = 0;
120 static const uint32_t default_frames_to_code = 60 * 60;
121 static const uint32_t default_width = 1920;
122 static const uint32_t default_height = 1080;
123 static const uint32_t default_timebase_num = 1;
124 static const uint32_t default_timebase_den = 60;
125 static const uint32_t default_bitrate = 1000;
126 static const uint32_t default_spatial_layers = 5;
127 static const uint32_t default_temporal_layers = 1;
128 static const uint32_t default_kf_dist = 100;
129 static const uint32_t default_temporal_layering_mode = 0;
130 static const uint32_t default_output_rc_stats = 0;
131 static const int32_t default_speed = -1; // -1 means use library default.
132 static const uint32_t default_threads = 0; // zero means use library default.
133 
134 typedef struct {
135  const char *input_filename;
136  const char *output_filename;
137  uint32_t frames_to_code;
138  uint32_t frames_to_skip;
139  struct VpxInputContext input_ctx;
140  stats_io_t rc_stats;
141  int passes;
142  int pass;
143 } AppInput;
144 
145 static const char *exec_name;
146 
147 void usage_exit(void) {
148  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
149  exec_name);
150  fprintf(stderr, "Options:\n");
151  arg_show_usage(stderr, svc_args);
152  exit(EXIT_FAILURE);
153 }
154 
155 static void parse_command_line(int argc, const char **argv_,
156  AppInput *app_input, SvcContext *svc_ctx,
157  vpx_codec_enc_cfg_t *enc_cfg) {
158  struct arg arg = {0};
159  char **argv = NULL;
160  char **argi = NULL;
161  char **argj = NULL;
162  vpx_codec_err_t res;
163  int passes = 0;
164  int pass = 0;
165  const char *fpf_file_name = NULL;
166  unsigned int min_bitrate = 0;
167  unsigned int max_bitrate = 0;
168  char string_options[1024] = {0};
169 
170  // initialize SvcContext with parameters that will be passed to vpx_svc_init
171  svc_ctx->log_level = SVC_LOG_DEBUG;
172  svc_ctx->spatial_layers = default_spatial_layers;
173  svc_ctx->temporal_layers = default_temporal_layers;
174  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
175 #if OUTPUT_RC_STATS
176  svc_ctx->output_rc_stat = default_output_rc_stats;
177 #endif
178  svc_ctx->speed = default_speed;
179  svc_ctx->threads = default_threads;
180 
181  // start with default encoder configuration
182  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
183  if (res) {
184  die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
185  }
186  // update enc_cfg with app default values
187  enc_cfg->g_w = default_width;
188  enc_cfg->g_h = default_height;
189  enc_cfg->g_timebase.num = default_timebase_num;
190  enc_cfg->g_timebase.den = default_timebase_den;
191  enc_cfg->rc_target_bitrate = default_bitrate;
192  enc_cfg->kf_min_dist = default_kf_dist;
193  enc_cfg->kf_max_dist = default_kf_dist;
194  enc_cfg->rc_end_usage = VPX_CQ;
195 
196  // initialize AppInput with default values
197  app_input->frames_to_code = default_frames_to_code;
198  app_input->frames_to_skip = default_frames_to_skip;
199 
200  // process command line options
201  argv = argv_dup(argc - 1, argv_ + 1);
202  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
203  arg.argv_step = 1;
204 
205  if (arg_match(&arg, &frames_arg, argi)) {
206  app_input->frames_to_code = arg_parse_uint(&arg);
207  } else if (arg_match(&arg, &width_arg, argi)) {
208  enc_cfg->g_w = arg_parse_uint(&arg);
209  } else if (arg_match(&arg, &height_arg, argi)) {
210  enc_cfg->g_h = arg_parse_uint(&arg);
211  } else if (arg_match(&arg, &timebase_arg, argi)) {
212  enc_cfg->g_timebase = arg_parse_rational(&arg);
213  } else if (arg_match(&arg, &bitrate_arg, argi)) {
214  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
215  } else if (arg_match(&arg, &skip_frames_arg, argi)) {
216  app_input->frames_to_skip = arg_parse_uint(&arg);
217  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
218  svc_ctx->spatial_layers = arg_parse_uint(&arg);
219  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
220  svc_ctx->temporal_layers = arg_parse_uint(&arg);
221 #if OUTPUT_RC_STATS
222  } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
223  svc_ctx->output_rc_stat = arg_parse_uint(&arg);
224 #endif
225  } else if (arg_match(&arg, &speed_arg, argi)) {
226  svc_ctx->speed = arg_parse_uint(&arg);
227  } else if (arg_match(&arg, &aqmode_arg, argi)) {
228  svc_ctx->aqmode = arg_parse_uint(&arg);
229  } else if (arg_match(&arg, &threads_arg, argi)) {
230  svc_ctx->threads = arg_parse_uint(&arg);
231  } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
232  svc_ctx->temporal_layering_mode =
233  enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
234  if (svc_ctx->temporal_layering_mode) {
235  enc_cfg->g_error_resilient = 1;
236  }
237  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
238  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
239  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
240  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
241  snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
242  string_options, arg.val);
243  } else if (arg_match(&arg, &passes_arg, argi)) {
244  passes = arg_parse_uint(&arg);
245  if (passes < 1 || passes > 2) {
246  die("Error: Invalid number of passes (%d)\n", passes);
247  }
248  } else if (arg_match(&arg, &pass_arg, argi)) {
249  pass = arg_parse_uint(&arg);
250  if (pass < 1 || pass > 2) {
251  die("Error: Invalid pass selected (%d)\n", pass);
252  }
253  } else if (arg_match(&arg, &fpf_name_arg, argi)) {
254  fpf_file_name = arg.val;
255  } else if (arg_match(&arg, &min_q_arg, argi)) {
256  snprintf(string_options, sizeof(string_options), "%s min-quantizers=%s",
257  string_options, arg.val);
258  } else if (arg_match(&arg, &max_q_arg, argi)) {
259  snprintf(string_options, sizeof(string_options), "%s max-quantizers=%s",
260  string_options, arg.val);
261  } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
262  min_bitrate = arg_parse_uint(&arg);
263  } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
264  max_bitrate = arg_parse_uint(&arg);
265  } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
266  enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
267  } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
268  enc_cfg->rc_end_usage = arg_parse_uint(&arg);
269 #if CONFIG_VP9_HIGHBITDEPTH
270  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
271  enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
272  switch (enc_cfg->g_bit_depth) {
273  case VPX_BITS_8:
274  enc_cfg->g_input_bit_depth = 8;
275  enc_cfg->g_profile = 0;
276  break;
277  case VPX_BITS_10:
278  enc_cfg->g_input_bit_depth = 10;
279  enc_cfg->g_profile = 2;
280  break;
281  case VPX_BITS_12:
282  enc_cfg->g_input_bit_depth = 12;
283  enc_cfg->g_profile = 2;
284  break;
285  default:
286  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
287  break;
288  }
289 #endif // CONFIG_VP9_HIGHBITDEPTH
290  } else {
291  ++argj;
292  }
293  }
294 
295  // There will be a space in front of the string options
296  if (strlen(string_options) > 0)
297  vpx_svc_set_options(svc_ctx, string_options + 1);
298 
299  if (passes == 0 || passes == 1) {
300  if (pass) {
301  fprintf(stderr, "pass is ignored since there's only one pass\n");
302  }
303  enc_cfg->g_pass = VPX_RC_ONE_PASS;
304  } else {
305  if (pass == 0) {
306  die("pass must be specified when passes is 2\n");
307  }
308 
309  if (fpf_file_name == NULL) {
310  die("fpf must be specified when passes is 2\n");
311  }
312 
313  if (pass == 1) {
314  enc_cfg->g_pass = VPX_RC_FIRST_PASS;
315  if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
316  fatal("Failed to open statistics store");
317  }
318  } else {
319  enc_cfg->g_pass = VPX_RC_LAST_PASS;
320  if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
321  fatal("Failed to open statistics store");
322  }
323  enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
324  }
325  app_input->passes = passes;
326  app_input->pass = pass;
327  }
328 
329  if (enc_cfg->rc_target_bitrate > 0) {
330  if (min_bitrate > 0) {
331  enc_cfg->rc_2pass_vbr_minsection_pct =
332  min_bitrate * 100 / enc_cfg->rc_target_bitrate;
333  }
334  if (max_bitrate > 0) {
335  enc_cfg->rc_2pass_vbr_maxsection_pct =
336  max_bitrate * 100 / enc_cfg->rc_target_bitrate;
337  }
338  }
339 
340  // Check for unrecognized options
341  for (argi = argv; *argi; ++argi)
342  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
343  die("Error: Unrecognized option %s\n", *argi);
344 
345  if (argv[0] == NULL || argv[1] == 0) {
346  usage_exit();
347  }
348  app_input->input_filename = argv[0];
349  app_input->output_filename = argv[1];
350  free(argv);
351 
352  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
353  enc_cfg->g_h % 2)
354  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
355 
356  printf(
357  "Codec %s\nframes: %d, skip: %d\n"
358  "layers: %d\n"
359  "width %d, height: %d,\n"
360  "num: %d, den: %d, bitrate: %d,\n"
361  "gop size: %d\n",
362  vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
363  app_input->frames_to_skip,
364  svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
365  enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
366  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
367 }
368 
369 #if OUTPUT_RC_STATS
370 // For rate control encoding stats.
371 struct RateControlStats {
372  // Number of input frames per layer.
373  int layer_input_frames[VPX_MAX_LAYERS];
374  // Total (cumulative) number of encoded frames per layer.
375  int layer_tot_enc_frames[VPX_MAX_LAYERS];
376  // Number of encoded non-key frames per layer.
377  int layer_enc_frames[VPX_MAX_LAYERS];
378  // Framerate per layer (cumulative).
379  double layer_framerate[VPX_MAX_LAYERS];
380  // Target average frame size per layer (per-frame-bandwidth per layer).
381  double layer_pfb[VPX_MAX_LAYERS];
382  // Actual average frame size per layer.
383  double layer_avg_frame_size[VPX_MAX_LAYERS];
384  // Average rate mismatch per layer (|target - actual| / target).
385  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
386  // Actual encoding bitrate per layer (cumulative).
387  double layer_encoding_bitrate[VPX_MAX_LAYERS];
388  // Average of the short-time encoder actual bitrate.
389  // TODO(marpan): Should we add these short-time stats for each layer?
390  double avg_st_encoding_bitrate;
391  // Variance of the short-time encoder actual bitrate.
392  double variance_st_encoding_bitrate;
393  // Window (number of frames) for computing short-time encoding bitrate.
394  int window_size;
395  // Number of window measurements.
396  int window_count;
397 };
398 
399 // Note: these rate control stats assume only 1 key frame in the
400 // sequence (i.e., first frame only).
401 static void set_rate_control_stats(struct RateControlStats *rc,
402  vpx_codec_enc_cfg_t *cfg) {
403  unsigned int sl, tl;
404  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
405  // per-frame-bandwidth, for the rate control encoding stats below.
406  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
407 
408  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
409  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
410  const int layer = sl * cfg->ts_number_layers + tl;
411  const int tlayer0 = sl * cfg->ts_number_layers;
412  if (cfg->ts_number_layers == 1)
413  rc->layer_framerate[layer] = framerate;
414  else
415  rc->layer_framerate[layer] =
416  framerate / cfg->ts_rate_decimator[tl];
417  if (tl > 0) {
418  rc->layer_pfb[layer] = 1000.0 *
419  (cfg->layer_target_bitrate[layer] -
420  cfg->layer_target_bitrate[layer - 1]) /
421  (rc->layer_framerate[layer] -
422  rc->layer_framerate[layer - 1]);
423  } else {
424  rc->layer_pfb[tlayer0] = 1000.0 *
425  cfg->layer_target_bitrate[tlayer0] /
426  rc->layer_framerate[tlayer0];
427  }
428  rc->layer_input_frames[layer] = 0;
429  rc->layer_enc_frames[layer] = 0;
430  rc->layer_tot_enc_frames[layer] = 0;
431  rc->layer_encoding_bitrate[layer] = 0.0;
432  rc->layer_avg_frame_size[layer] = 0.0;
433  rc->layer_avg_rate_mismatch[layer] = 0.0;
434  }
435  }
436  rc->window_count = 0;
437  rc->window_size = 15;
438  rc->avg_st_encoding_bitrate = 0.0;
439  rc->variance_st_encoding_bitrate = 0.0;
440 }
441 
442 static void printout_rate_control_summary(struct RateControlStats *rc,
443  vpx_codec_enc_cfg_t *cfg,
444  int frame_cnt) {
445  unsigned int sl, tl;
446  int tot_num_frames = 0;
447  double perc_fluctuation = 0.0;
448  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
449  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
451  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
452  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
453  const int layer = sl * cfg->ts_number_layers + tl;
454  const int num_dropped = (tl > 0) ?
455  (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
456  (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
457  if (!sl)
458  tot_num_frames += rc->layer_input_frames[layer];
459  rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
460  rc->layer_encoding_bitrate[layer] / tot_num_frames;
461  rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
462  rc->layer_enc_frames[layer];
463  rc->layer_avg_rate_mismatch[layer] =
464  100.0 * rc->layer_avg_rate_mismatch[layer] /
465  rc->layer_enc_frames[layer];
466  printf("For layer#: sl%d tl%d \n", sl, tl);
467  printf("Bitrate (target vs actual): %d %f.0 kbps\n",
468  cfg->layer_target_bitrate[layer],
469  rc->layer_encoding_bitrate[layer]);
470  printf("Average frame size (target vs actual): %f %f bits\n",
471  rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
472  printf("Average rate_mismatch: %f\n",
473  rc->layer_avg_rate_mismatch[layer]);
474  printf("Number of input frames, encoded (non-key) frames, "
475  "and percent dropped frames: %d %d %f.0 \n",
476  rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
477  100.0 * num_dropped / rc->layer_input_frames[layer]);
478  printf("\n");
479  }
480  }
481  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
482  rc->variance_st_encoding_bitrate =
483  rc->variance_st_encoding_bitrate / rc->window_count -
484  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
485  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
486  rc->avg_st_encoding_bitrate;
487  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
488  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
489  rc->avg_st_encoding_bitrate,
490  sqrt(rc->variance_st_encoding_bitrate),
491  perc_fluctuation);
492  if (frame_cnt != tot_num_frames)
493  die("Error: Number of input frames not equal to output encoded frames != "
494  "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
495 }
496 
497 vpx_codec_err_t parse_superframe_index(const uint8_t *data,
498  size_t data_sz,
499  uint32_t sizes[8], int *count) {
500  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
501  // it is a super frame index. If the last byte of real video compression
502  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
503  // not the associated matching marker byte at the front of the index we have
504  // an invalid bitstream and need to return an error.
505 
506  uint8_t marker;
507 
508  marker = *(data + data_sz - 1);
509  *count = 0;
510 
511 
512  if ((marker & 0xe0) == 0xc0) {
513  const uint32_t frames = (marker & 0x7) + 1;
514  const uint32_t mag = ((marker >> 3) & 0x3) + 1;
515  const size_t index_sz = 2 + mag * frames;
516 
517  // This chunk is marked as having a superframe index but doesn't have
518  // enough data for it, thus it's an invalid superframe index.
519  if (data_sz < index_sz)
521 
522  {
523  const uint8_t marker2 = *(data + data_sz - index_sz);
524 
525  // This chunk is marked as having a superframe index but doesn't have
526  // the matching marker byte at the front of the index therefore it's an
527  // invalid chunk.
528  if (marker != marker2)
530  }
531 
532  {
533  // Found a valid superframe index.
534  uint32_t i, j;
535  const uint8_t *x = &data[data_sz - index_sz + 1];
536 
537  for (i = 0; i < frames; ++i) {
538  uint32_t this_sz = 0;
539 
540  for (j = 0; j < mag; ++j)
541  this_sz |= (*x++) << (j * 8);
542  sizes[i] = this_sz;
543  }
544  *count = frames;
545  }
546  }
547  return VPX_CODEC_OK;
548 }
549 #endif
550 
551 // Example pattern for spatial layers and 2 temporal layers used in the
552 // bypass/flexible mode. The pattern corresponds to the pattern
553 // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
554 // non-flexible mode.
555 void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
556  int is_key_frame,
557  vpx_svc_ref_frame_config_t *ref_frame_config) {
558  for (sl = 0; sl < num_spatial_layers; ++sl) {
559  if (!tl) {
560  if (!sl) {
561  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
565  } else {
566  if (is_key_frame) {
567  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_LAST |
571  } else {
572  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
575  }
576  }
577  } else if (tl == 1) {
578  if (!sl) {
579  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
583  } else {
584  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
587  }
588  }
589  if (tl == 0) {
590  ref_frame_config->lst_fb_idx[sl] = sl;
591  if (sl)
592  ref_frame_config->gld_fb_idx[sl] = sl - 1;
593  else
594  ref_frame_config->gld_fb_idx[sl] = 0;
595  ref_frame_config->alt_fb_idx[sl] = 0;
596  } else if (tl == 1) {
597  ref_frame_config->lst_fb_idx[sl] = sl;
598  ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
599  ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
600  }
601  }
602 }
603 
604 int main(int argc, const char **argv) {
605  AppInput app_input = {0};
606  VpxVideoWriter *writer = NULL;
607  VpxVideoInfo info = {0};
608  vpx_codec_ctx_t codec;
609  vpx_codec_enc_cfg_t enc_cfg;
610  SvcContext svc_ctx;
611  uint32_t i;
612  uint32_t frame_cnt = 0;
613  vpx_image_t raw;
614  vpx_codec_err_t res;
615  int pts = 0; /* PTS starts at 0 */
616  int frame_duration = 1; /* 1 timebase tick per frame */
617  FILE *infile = NULL;
618  int end_of_stream = 0;
619  int frames_received = 0;
620 #if OUTPUT_RC_STATS
621  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
622  struct RateControlStats rc;
623  vpx_svc_layer_id_t layer_id;
624  vpx_svc_ref_frame_config_t ref_frame_config;
625  int sl, tl;
626  double sum_bitrate = 0.0;
627  double sum_bitrate2 = 0.0;
628  double framerate = 30.0;
629 #endif
630  struct vpx_usec_timer timer;
631  int64_t cx_time = 0;
632  memset(&svc_ctx, 0, sizeof(svc_ctx));
633  svc_ctx.log_print = 1;
634  exec_name = argv[0];
635  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
636 
637  // Allocate image buffer
638 #if CONFIG_VP9_HIGHBITDEPTH
639  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ?
640  VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
641  enc_cfg.g_w, enc_cfg.g_h, 32)) {
642  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
643  }
644 #else
645  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
646  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
647  }
648 #endif // CONFIG_VP9_HIGHBITDEPTH
649 
650  if (!(infile = fopen(app_input.input_filename, "rb")))
651  die("Failed to open %s for reading\n", app_input.input_filename);
652 
653  // Initialize codec
654  if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
655  VPX_CODEC_OK)
656  die("Failed to initialize encoder\n");
657 
658 #if OUTPUT_RC_STATS
659  if (svc_ctx.output_rc_stat) {
660  set_rate_control_stats(&rc, &enc_cfg);
661  framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
662  }
663 #endif
664 
665  info.codec_fourcc = VP9_FOURCC;
666  info.time_base.numerator = enc_cfg.g_timebase.num;
667  info.time_base.denominator = enc_cfg.g_timebase.den;
668 
669  if (!(app_input.passes == 2 && app_input.pass == 1)) {
670  // We don't save the bitstream for the 1st pass on two pass rate control
671  writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
672  &info);
673  if (!writer)
674  die("Failed to open %s for writing\n", app_input.output_filename);
675  }
676 #if OUTPUT_RC_STATS
677  // For now, just write temporal layer streams.
678  // TODO(wonkap): do spatial by re-writing superframe.
679  if (svc_ctx.output_rc_stat) {
680  for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
681  char file_name[PATH_MAX];
682 
683  snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
684  app_input.output_filename, tl);
685  outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
686  if (!outfile[tl])
687  die("Failed to open %s for writing", file_name);
688  }
689  }
690 #endif
691 
692  // skip initial frames
693  for (i = 0; i < app_input.frames_to_skip; ++i)
694  vpx_img_read(&raw, infile);
695 
696  if (svc_ctx.speed != -1)
697  vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
698  if (svc_ctx.threads)
699  vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
700  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
702 
703 
704  // Encode frames
705  while (!end_of_stream) {
706  vpx_codec_iter_t iter = NULL;
707  const vpx_codec_cx_pkt_t *cx_pkt;
708  if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
709  // We need one extra vpx_svc_encode call at end of stream to flush
710  // encoder and get remaining data
711  end_of_stream = 1;
712  }
713 
714  // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
715  // and the buffer indices for each spatial layer of the current
716  // (super)frame to be encoded. The temporal layer_id for the current frame
717  // also needs to be set.
718  // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
719  // mode to "VP9E_LAYERING_MODE_BYPASS".
720  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
721  layer_id.spatial_layer_id = 0;
722  // Example for 2 temporal layers.
723  if (frame_cnt % 2 == 0)
724  layer_id.temporal_layer_id = 0;
725  else
726  layer_id.temporal_layer_id = 1;
727  // Note that we only set the temporal layer_id, since we are calling
728  // the encode for the whole superframe. The encoder will internally loop
729  // over all the spatial layers for the current superframe.
730  vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
731  set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
732  svc_ctx.spatial_layers,
733  frame_cnt == 0,
734  &ref_frame_config);
736  &ref_frame_config);
737  // Keep track of input frames, to account for frame drops in rate control
738  // stats/metrics.
739  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
740  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
741  layer_id.temporal_layer_id];
742  }
743  }
744 
745  vpx_usec_timer_start(&timer);
746  res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
747  pts, frame_duration, svc_ctx.speed >= 5 ?
749  vpx_usec_timer_mark(&timer);
750  cx_time += vpx_usec_timer_elapsed(&timer);
751 
752  printf("%s", vpx_svc_get_message(&svc_ctx));
753  fflush(stdout);
754  if (res != VPX_CODEC_OK) {
755  die_codec(&codec, "Failed to encode frame");
756  }
757 
758  while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
759  switch (cx_pkt->kind) {
760  case VPX_CODEC_CX_FRAME_PKT: {
761  SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
762  if (cx_pkt->data.frame.sz > 0) {
763 #if OUTPUT_RC_STATS
764  uint32_t sizes[8];
765  int count = 0;
766 #endif
767  vpx_video_writer_write_frame(writer,
768  cx_pkt->data.frame.buf,
769  cx_pkt->data.frame.sz,
770  cx_pkt->data.frame.pts);
771 #if OUTPUT_RC_STATS
772  // TODO(marpan/wonkap): Put this (to line728) in separate function.
773  if (svc_ctx.output_rc_stat) {
774  vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
775  parse_superframe_index(cx_pkt->data.frame.buf,
776  cx_pkt->data.frame.sz, sizes, &count);
777  // Note computing input_layer_frames here won't account for frame
778  // drops in rate control stats.
779  // TODO(marpan): Fix this for non-bypass mode so we can get stats
780  // for dropped frames.
781  if (svc_ctx.temporal_layering_mode !=
783  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
784  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
785  layer_id.temporal_layer_id];
786  }
787  }
788  for (tl = layer_id.temporal_layer_id;
789  tl < enc_cfg.ts_number_layers; ++tl) {
790  vpx_video_writer_write_frame(outfile[tl],
791  cx_pkt->data.frame.buf,
792  cx_pkt->data.frame.sz,
793  cx_pkt->data.frame.pts);
794  }
795 
796  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
797  for (tl = layer_id.temporal_layer_id;
798  tl < enc_cfg.ts_number_layers; ++tl) {
799  const int layer = sl * enc_cfg.ts_number_layers + tl;
800  ++rc.layer_tot_enc_frames[layer];
801  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
802  // Keep count of rate control stats per layer, for non-key
803  // frames.
804  if (tl == layer_id.temporal_layer_id &&
805  !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
806  rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
807  rc.layer_avg_rate_mismatch[layer] +=
808  fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
809  rc.layer_pfb[layer];
810  ++rc.layer_enc_frames[layer];
811  }
812  }
813  }
814 
815  // Update for short-time encoding bitrate states, for moving
816  // window of size rc->window, shifted by rc->window / 2.
817  // Ignore first window segment, due to key frame.
818  if (frame_cnt > rc.window_size) {
819  tl = layer_id.temporal_layer_id;
820  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
821  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
822  }
823  if (frame_cnt % rc.window_size == 0) {
824  rc.window_count += 1;
825  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
826  rc.variance_st_encoding_bitrate +=
827  (sum_bitrate / rc.window_size) *
828  (sum_bitrate / rc.window_size);
829  sum_bitrate = 0.0;
830  }
831  }
832 
833  // Second shifted window.
834  if (frame_cnt > rc.window_size + rc.window_size / 2) {
835  tl = layer_id.temporal_layer_id;
836  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
837  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
838  }
839 
840  if (frame_cnt > 2 * rc.window_size &&
841  frame_cnt % rc.window_size == 0) {
842  rc.window_count += 1;
843  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
844  rc.variance_st_encoding_bitrate +=
845  (sum_bitrate2 / rc.window_size) *
846  (sum_bitrate2 / rc.window_size);
847  sum_bitrate2 = 0.0;
848  }
849  }
850  }
851 #endif
852  }
853 
854  printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
855  !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
856  (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
857  if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
858  si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
859  ++frames_received;
860  break;
861  }
862  case VPX_CODEC_STATS_PKT: {
863  stats_write(&app_input.rc_stats,
864  cx_pkt->data.twopass_stats.buf,
865  cx_pkt->data.twopass_stats.sz);
866  break;
867  }
868  default: {
869  break;
870  }
871  }
872  }
873 
874  if (!end_of_stream) {
875  ++frame_cnt;
876  pts += frame_duration;
877  }
878  }
879 
880  // Compensate for the extra frame count for the bypass mode.
881  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
882  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
883  const int layer = sl * enc_cfg.ts_number_layers +
884  layer_id.temporal_layer_id;
885  --rc.layer_input_frames[layer];
886  }
887  }
888 
889  printf("Processed %d frames\n", frame_cnt);
890  fclose(infile);
891 #if OUTPUT_RC_STATS
892  if (svc_ctx.output_rc_stat) {
893  printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
894  printf("\n");
895  }
896 #endif
897  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
898  if (app_input.passes == 2)
899  stats_close(&app_input.rc_stats, 1);
900  if (writer) {
901  vpx_video_writer_close(writer);
902  }
903 #if OUTPUT_RC_STATS
904  if (svc_ctx.output_rc_stat) {
905  for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
906  vpx_video_writer_close(outfile[tl]);
907  }
908  }
909 #endif
910  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
911  frame_cnt,
912  1000 * (float)cx_time / (double)(frame_cnt * 1000000),
913  1000000 * (double)frame_cnt / (double)cx_time);
914  vpx_img_free(&raw);
915  // display average size, psnr
916  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
917  vpx_svc_release(&svc_ctx);
918  return EXIT_SUCCESS;
919 }
vpx_fixed_buf_t twopass_stats
Definition: vpx_encoder.h:214
unsigned int ts_number_layers
Number of temporal coding layers.
Definition: vpx_encoder.h:715
Codec control function to set encoder internal speed settings.
Definition: vp8cx.h:164
#define VPX_MAX_LAYERS
Definition: vpx_encoder.h:46
#define VP8_EFLAG_NO_REF_LAST
Don&#39;t reference the last frame.
Definition: vp8cx.h:58
#define VP8_EFLAG_NO_UPD_GF
Don&#39;t update the golden frame.
Definition: vp8cx.h:92
Image Descriptor.
Definition: vpx_image.h:88
Describes the encoder algorithm interface to applications.
const char * vpx_codec_iface_name(vpx_codec_iface_t *iface)
Return the name for a given interface.
const char * vpx_codec_err_to_string(vpx_codec_err_t err)
Convert error number to printable string.
int lst_fb_idx[5]
Definition: vp8cx.h:699
#define VPX_TS_MAX_LAYERS
Definition: vpx_encoder.h:40
struct vpx_rational g_timebase
Stream timebase units.
Definition: vpx_encoder.h:397
unsigned int layer_target_bitrate[12]
Target bitrate for each spatial/temporal layer.
Definition: vpx_encoder.h:755
#define VP8_EFLAG_NO_REF_GF
Don&#39;t reference the golden frame.
Definition: vp8cx.h:67
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: vpx_encoder.h:383
int den
Definition: vpx_encoder.h:261
Definition: vpx_encoder.h:177
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: vpx_encoder.h:685
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: vpx_encoder.h:429
Encoder configuration structure.
Definition: vpx_encoder.h:314
The coded data for this stream is corrupt or incomplete.
Definition: vpx_codec.h:129
Encoder output packet.
Definition: vpx_encoder.h:195
void * buf
Definition: vpx_encoder.h:109
unsigned int ts_rate_decimator[5]
Frame rate decimation factor for each temporal layer.
Definition: vpx_encoder.h:729
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: vpx_encoder.h:675
Definition: vpx_encoder.h:268
unsigned int g_profile
Bitstream profile to use.
Definition: vpx_encoder.h:346
Definition: vpx_encoder.h:269
Codec control function to set number of tile columns.
Definition: vp8cx.h:353
struct vpx_codec_cx_pkt::@1::@2 frame
int frame_flags[5]
Definition: vp8cx.h:698
vpx_image_t * vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
Definition: vpx_image.h:56
unsigned int g_w
Width of the frame.
Definition: vpx_encoder.h:357
Codec control function to set adaptive quantization mode.
Definition: vp8cx.h:400
Codec control function to get svc layer ID.
Definition: vp8cx.h:466
unsigned int g_h
Height of the frame.
Definition: vpx_encoder.h:367
enum vpx_codec_cx_pkt_kind kind
Definition: vpx_encoder.h:196
vp9 svc layer parameters
Definition: vp8cx.h:684
Operation completed without error.
Definition: vpx_codec.h:91
#define VP8_EFLAG_NO_UPD_LAST
Don&#39;t update the last frame.
Definition: vp8cx.h:84
void vpx_img_free(vpx_image_t *img)
Close an image descriptor.
unsigned int rc_target_bitrate
Target data rate.
Definition: vpx_encoder.h:525
#define VPX_DL_REALTIME
Definition: vpx_encoder.h:911
int num
Definition: vpx_encoder.h:260
Definition: vpx_codec.h:222
Codec control function to set the frame flags and buffer indices for spatial layers. The frame flags and buffer indices are set using the struct vpx_svc_ref_frame_config defined below.
Definition: vp8cx.h:539
enum vpx_enc_pass g_pass
Multi-pass Encoding Mode.
Definition: vpx_encoder.h:414
#define VPX_DL_GOOD_QUALITY
Definition: vpx_encoder.h:914
unsigned int ss_number_layers
Number of spatial coding layers.
Definition: vpx_encoder.h:695
vpx_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: vpx_encoder.h:375
Provides definitions for using VP8 or VP9 encoder algorithm within the vpx Codec Interface.
Bypass mode. Used when application needs to control temporal layering. This will only work when the n...
Definition: vp8cx.h:592
vpx_codec_err_t
Algorithm return codes.
Definition: vpx_codec.h:89
const vpx_codec_cx_pkt_t * vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter)
Encoded data iterator.
union vpx_codec_cx_pkt::@1 data
int temporal_layering_mode
Temporal layering mode indicating which temporal layering scheme to use.
Definition: vpx_encoder.h:763
vpx_fixed_buf_t rc_twopass_stats_in
Two-pass stats buffer.
Definition: vpx_encoder.h:512
vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int reserved)
Get a default configuration.
Definition: vpx_encoder.h:277
#define vpx_codec_control(ctx, id, data)
vpx_codec_control wrapper macro
Definition: vpx_codec.h:407
#define VP8_EFLAG_NO_REF_ARF
Don&#39;t reference the alternate reference frame.
Definition: vp8cx.h:76
vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
Destroy a codec instance.
size_t sz
Definition: vpx_encoder.h:110
Definition: vpx_codec.h:220
vp9 svc frame flag parameters.
Definition: vp8cx.h:697
#define VPX_FRAME_IS_KEY
Definition: vpx_encoder.h:130
Definition: vpx_codec.h:221
int alt_fb_idx[5]
Definition: vp8cx.h:701
const void * vpx_codec_iter_t
Iterator.
Definition: vpx_codec.h:188
Definition: vpx_encoder.h:176
unsigned int rc_2pass_vbr_maxsection_pct
Two-pass mode per-GOP maximum bitrate.
Definition: vpx_encoder.h:652
vpx_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: vpx_encoder.h:406
#define VP8_EFLAG_NO_UPD_ARF
Don&#39;t update the alternate reference frame.
Definition: vp8cx.h:100
unsigned int rc_2pass_vbr_minsection_pct
Two-pass mode per-GOP minimum bitrate.
Definition: vpx_encoder.h:644
int gld_fb_idx[5]
Definition: vp8cx.h:700
Codec control function to set svc layer for spatial and temporal.
Definition: vp8cx.h:449
enum vpx_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: vpx_encoder.h:504
Definition: vpx_encoder.h:267
Codec context structure.
Definition: vpx_codec.h:199