FFmpeg  2.6.9
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "thread.h"
28 #include "videodsp.h"
29 #include "vp56.h"
30 #include "vp9.h"
31 #include "vp9data.h"
32 #include "vp9dsp.h"
33 #include "libavutil/avassert.h"
34 
35 #define VP9_SYNCCODE 0x498342
36 
41 };
42 
43 enum BlockLevel {
48 };
49 
50 enum BlockSize {
65 };
66 
67 struct VP9mvrefPair {
68  VP56mv mv[2];
69  int8_t ref[2];
70 };
71 
72 typedef struct VP9Frame {
76  struct VP9mvrefPair *mv;
77 } VP9Frame;
78 
79 struct VP9Filter {
80  uint8_t level[8 * 8];
81  uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82  [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
83 };
84 
85 typedef struct VP9Block {
88  VP56mv mv[4 /* b_idx */][2 /* ref */];
89  enum BlockSize bs;
90  enum TxfmMode tx, uvtx;
91  enum BlockLevel bl;
93 } VP9Block;
94 
95 typedef struct VP9Context {
101  unsigned c_b_size;
104  int row, row7, col, col7;
106  ptrdiff_t y_stride, uv_stride;
107 
108  // bitstream header
130 #define CUR_FRAME 0
131 #define LAST_FRAME 1
133 
134  struct {
136  int8_t sharpness;
139  } filter;
140  struct {
142  int8_t mode[2];
143  int8_t ref[4];
144  } lf_delta;
148 #define MAX_SEGMENT 8
149  struct {
154  struct {
160  int16_t q_val;
161  int8_t lf_val;
162  int16_t qmul[2][2];
163  uint8_t lflvl[4][2];
164  } feat[MAX_SEGMENT];
165  } segmentation;
166  struct {
168  unsigned tile_cols, tile_rows;
170  } tiling;
171  unsigned sb_cols, sb_rows, rows, cols;
172  struct {
174  uint8_t coef[4][2][2][6][6][3];
175  } prob_ctx[4];
176  struct {
177  prob_context p;
178  uint8_t coef[4][2][2][6][6][11];
181  } prob;
182  struct {
183  unsigned y_mode[4][10];
184  unsigned uv_mode[10][10];
185  unsigned filter[4][3];
186  unsigned mv_mode[7][4];
187  unsigned intra[4][2];
188  unsigned comp[5][2];
189  unsigned single_ref[5][2][2];
190  unsigned comp_ref[5][2];
191  unsigned tx32p[2][4];
192  unsigned tx16p[2][3];
193  unsigned tx8p[2][2];
194  unsigned skip[3][2];
195  unsigned mv_joint[4];
196  struct {
197  unsigned sign[2];
198  unsigned classes[11];
199  unsigned class0[2];
200  unsigned bits[10][2];
201  unsigned class0_fp[2][4];
202  unsigned fp[4];
203  unsigned class0_hp[2];
204  unsigned hp[2];
205  } mv_comp[2];
206  unsigned partition[4][4][4];
207  unsigned coef[4][2][2][6][6][3];
208  unsigned eob[4][2][2][6][6][2];
209  } counts;
212 
213  // contextual (left/above) cache
228  // FIXME maybe merge some of the below in a flags field?
239 
240  // whole-frame cache
242  struct VP9Filter *lflvl;
244 
245  // block reconstruction intermediates
247  int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
249  struct { int x, y; } min_mv, max_mv;
251  DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
252  uint16_t mvscale[3][2];
254 } VP9Context;
255 
256 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
257  {
258  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
259  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
260  }, {
261  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
262  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
263  }
264 };
265 
267 {
268  VP9Context *s = ctx->priv_data;
269  int ret, sz;
270 
271  if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
272  return ret;
273  sz = 64 * s->sb_cols * s->sb_rows;
274  if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
275  ff_thread_release_buffer(ctx, &f->tf);
276  return AVERROR(ENOMEM);
277  }
278 
280  f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
281 
282  // retain segmentation map if it doesn't update
284  !s->intraonly && !s->keyframe && !s->errorres &&
287  }
288 
289  return 0;
290 }
291 
293 {
294  ff_thread_release_buffer(ctx, &f->tf);
296 }
297 
299 {
300  int res;
301 
302  if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
303  return res;
304  } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
305  vp9_unref_frame(ctx, dst);
306  return AVERROR(ENOMEM);
307  }
308 
310  dst->mv = src->mv;
311 
312  return 0;
313 }
314 
315 static int update_size(AVCodecContext *ctx, int w, int h)
316 {
317  VP9Context *s = ctx->priv_data;
318  uint8_t *p;
319 
320  av_assert0(w > 0 && h > 0);
321 
322  if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
323  return 0;
324 
325  ctx->width = w;
326  ctx->height = h;
327  s->sb_cols = (w + 63) >> 6;
328  s->sb_rows = (h + 63) >> 6;
329  s->cols = (w + 7) >> 3;
330  s->rows = (h + 7) >> 3;
331 
332 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
333  av_freep(&s->intra_pred_data[0]);
334  p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
335  if (!p)
336  return AVERROR(ENOMEM);
337  assign(s->intra_pred_data[0], uint8_t *, 64);
338  assign(s->intra_pred_data[1], uint8_t *, 32);
339  assign(s->intra_pred_data[2], uint8_t *, 32);
340  assign(s->above_y_nnz_ctx, uint8_t *, 16);
341  assign(s->above_mode_ctx, uint8_t *, 16);
342  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
344  assign(s->above_skip_ctx, uint8_t *, 8);
345  assign(s->above_txfm_ctx, uint8_t *, 8);
346  assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
347  assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
348  assign(s->above_segpred_ctx, uint8_t *, 8);
349  assign(s->above_intra_ctx, uint8_t *, 8);
350  assign(s->above_comp_ctx, uint8_t *, 8);
351  assign(s->above_ref_ctx, uint8_t *, 8);
352  assign(s->above_filter_ctx, uint8_t *, 8);
353  assign(s->lflvl, struct VP9Filter *, 1);
354 #undef assign
355 
356  // these will be re-allocated a little later
357  av_freep(&s->b_base);
358  av_freep(&s->block_base);
359 
360  return 0;
361 }
362 
364 {
365  VP9Context *s = ctx->priv_data;
366 
367  if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
368  return 0;
369 
370  av_free(s->b_base);
371  av_free(s->block_base);
372  if (s->uses_2pass) {
373  int sbs = s->sb_cols * s->sb_rows;
374 
375  s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
376  s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
377  if (!s->b_base || !s->block_base)
378  return AVERROR(ENOMEM);
379  s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
380  s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
381  s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
382  s->uveob_base[0] = s->eob_base + 256 * sbs;
383  s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
384  } else {
385  s->b_base = av_malloc(sizeof(VP9Block));
386  s->block_base = av_mallocz((64 * 64 + 128) * 3);
387  if (!s->b_base || !s->block_base)
388  return AVERROR(ENOMEM);
389  s->uvblock_base[0] = s->block_base + 64 * 64;
390  s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
391  s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
392  s->uveob_base[0] = s->eob_base + 256;
393  s->uveob_base[1] = s->uveob_base[0] + 64;
394  }
396 
397  return 0;
398 }
399 
400 // for some reason the sign bit is at the end, not the start, of a bit sequence
402 {
403  int v = get_bits(gb, n);
404  return get_bits1(gb) ? -v : v;
405 }
406 
408 {
409  return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
410 }
411 
412 // differential forward probability updates
413 static int update_prob(VP56RangeCoder *c, int p)
414 {
415  static const int inv_map_table[255] = {
416  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
417  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
418  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
419  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
420  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
421  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
422  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
423  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
424  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
425  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
426  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
427  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
428  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
429  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
430  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
431  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
432  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
433  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
434  252, 253, 253,
435  };
436  int d;
437 
438  /* This code is trying to do a differential probability update. For a
439  * current probability A in the range [1, 255], the difference to a new
440  * probability of any value can be expressed differentially as 1-A,255-A
441  * where some part of this (absolute range) exists both in positive as
442  * well as the negative part, whereas another part only exists in one
443  * half. We're trying to code this shared part differentially, i.e.
444  * times two where the value of the lowest bit specifies the sign, and
445  * the single part is then coded on top of this. This absolute difference
446  * then again has a value of [0,254], but a bigger value in this range
447  * indicates that we're further away from the original value A, so we
448  * can code this as a VLC code, since higher values are increasingly
449  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
450  * updates vs. the 'fine, exact' updates further down the range, which
451  * adds one extra dimension to this differential update model. */
452 
453  if (!vp8_rac_get(c)) {
454  d = vp8_rac_get_uint(c, 4) + 0;
455  } else if (!vp8_rac_get(c)) {
456  d = vp8_rac_get_uint(c, 4) + 16;
457  } else if (!vp8_rac_get(c)) {
458  d = vp8_rac_get_uint(c, 5) + 32;
459  } else {
460  d = vp8_rac_get_uint(c, 7);
461  if (d >= 65)
462  d = (d << 1) - 65 + vp8_rac_get(c);
463  d += 64;
464  av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
465  }
466 
467  return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
468  255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
469 }
470 
472  const uint8_t *data, int size, int *ref)
473 {
474  VP9Context *s = ctx->priv_data;
475  int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
476  int last_invisible;
477  const uint8_t *data2;
478 
479  /* general header */
480  if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
481  av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
482  return res;
483  }
484  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
485  av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
486  return AVERROR_INVALIDDATA;
487  }
488  s->profile = get_bits1(&s->gb);
489  if (get_bits1(&s->gb)) { // reserved bit
490  av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
491  return AVERROR_INVALIDDATA;
492  }
493  if (get_bits1(&s->gb)) {
494  *ref = get_bits(&s->gb, 3);
495  return 0;
496  }
497  s->last_uses_2pass = s->uses_2pass;
498  s->last_keyframe = s->keyframe;
499  s->keyframe = !get_bits1(&s->gb);
500  last_invisible = s->invisible;
501  s->invisible = !get_bits1(&s->gb);
502  s->errorres = get_bits1(&s->gb);
503  s->use_last_frame_mvs = !s->errorres && !last_invisible;
504  if (s->keyframe) {
505  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
506  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
507  return AVERROR_INVALIDDATA;
508  }
509  s->colorspace = get_bits(&s->gb, 3);
510  if (s->colorspace == 7) { // RGB = profile 1
511  av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
512  return AVERROR_INVALIDDATA;
513  }
514  s->fullrange = get_bits1(&s->gb);
515  // for profile 1, here follows the subsampling bits
516  s->refreshrefmask = 0xff;
517  w = get_bits(&s->gb, 16) + 1;
518  h = get_bits(&s->gb, 16) + 1;
519  if (get_bits1(&s->gb)) // display size
520  skip_bits(&s->gb, 32);
521  } else {
522  s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
523  s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
524  if (s->intraonly) {
525  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
526  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
527  return AVERROR_INVALIDDATA;
528  }
529  s->refreshrefmask = get_bits(&s->gb, 8);
530  w = get_bits(&s->gb, 16) + 1;
531  h = get_bits(&s->gb, 16) + 1;
532  if (get_bits1(&s->gb)) // display size
533  skip_bits(&s->gb, 32);
534  } else {
535  s->refreshrefmask = get_bits(&s->gb, 8);
536  s->refidx[0] = get_bits(&s->gb, 3);
537  s->signbias[0] = get_bits1(&s->gb);
538  s->refidx[1] = get_bits(&s->gb, 3);
539  s->signbias[1] = get_bits1(&s->gb);
540  s->refidx[2] = get_bits(&s->gb, 3);
541  s->signbias[2] = get_bits1(&s->gb);
542  if (!s->refs[s->refidx[0]].f->data[0] ||
543  !s->refs[s->refidx[1]].f->data[0] ||
544  !s->refs[s->refidx[2]].f->data[0]) {
545  av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
546  return AVERROR_INVALIDDATA;
547  }
548  if (get_bits1(&s->gb)) {
549  w = s->refs[s->refidx[0]].f->width;
550  h = s->refs[s->refidx[0]].f->height;
551  } else if (get_bits1(&s->gb)) {
552  w = s->refs[s->refidx[1]].f->width;
553  h = s->refs[s->refidx[1]].f->height;
554  } else if (get_bits1(&s->gb)) {
555  w = s->refs[s->refidx[2]].f->width;
556  h = s->refs[s->refidx[2]].f->height;
557  } else {
558  w = get_bits(&s->gb, 16) + 1;
559  h = get_bits(&s->gb, 16) + 1;
560  }
561  // Note that in this code, "CUR_FRAME" is actually before we
562  // have formally allocated a frame, and thus actually represents
563  // the _last_ frame
564  s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
565  s->frames[CUR_FRAME].tf.f->height == h;
566  if (get_bits1(&s->gb)) // display size
567  skip_bits(&s->gb, 32);
568  s->highprecisionmvs = get_bits1(&s->gb);
570  get_bits(&s->gb, 2);
571  s->allowcompinter = s->signbias[0] != s->signbias[1] ||
572  s->signbias[0] != s->signbias[2];
573  if (s->allowcompinter) {
574  if (s->signbias[0] == s->signbias[1]) {
575  s->fixcompref = 2;
576  s->varcompref[0] = 0;
577  s->varcompref[1] = 1;
578  } else if (s->signbias[0] == s->signbias[2]) {
579  s->fixcompref = 1;
580  s->varcompref[0] = 0;
581  s->varcompref[1] = 2;
582  } else {
583  s->fixcompref = 0;
584  s->varcompref[0] = 1;
585  s->varcompref[1] = 2;
586  }
587  }
588 
589  for (i = 0; i < 3; i++) {
590  AVFrame *ref = s->refs[s->refidx[i]].f;
591  int refw = ref->width, refh = ref->height;
592 
593  if (refw == w && refh == h) {
594  s->mvscale[i][0] = s->mvscale[i][1] = 0;
595  } else {
596  if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
597  av_log(ctx, AV_LOG_ERROR,
598  "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
599  refw, refh, w, h);
600  return AVERROR_INVALIDDATA;
601  }
602  s->mvscale[i][0] = (refw << 14) / w;
603  s->mvscale[i][1] = (refh << 14) / h;
604  s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
605  s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
606  }
607  }
608  }
609  }
610  s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
611  s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
612  s->framectxid = c = get_bits(&s->gb, 2);
613 
614  /* loopfilter header data */
615  s->filter.level = get_bits(&s->gb, 6);
616  sharp = get_bits(&s->gb, 3);
617  // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
618  // the old cache values since they are still valid
619  if (s->filter.sharpness != sharp)
620  memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
621  s->filter.sharpness = sharp;
622  if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
623  if (get_bits1(&s->gb)) {
624  for (i = 0; i < 4; i++)
625  if (get_bits1(&s->gb))
626  s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
627  for (i = 0; i < 2; i++)
628  if (get_bits1(&s->gb))
629  s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
630  }
631  } else {
632  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
633  }
634 
635  /* quantization header data */
636  s->yac_qi = get_bits(&s->gb, 8);
637  s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
638  s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
639  s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
640  s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
641  s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
642 
643  /* segmentation header info */
644  if ((s->segmentation.enabled = get_bits1(&s->gb))) {
645  if ((s->segmentation.update_map = get_bits1(&s->gb))) {
646  for (i = 0; i < 7; i++)
647  s->prob.seg[i] = get_bits1(&s->gb) ?
648  get_bits(&s->gb, 8) : 255;
649  if ((s->segmentation.temporal = get_bits1(&s->gb))) {
650  for (i = 0; i < 3; i++)
651  s->prob.segpred[i] = get_bits1(&s->gb) ?
652  get_bits(&s->gb, 8) : 255;
653  }
654  }
655  if ((!s->segmentation.update_map || s->segmentation.temporal) &&
656  (w != s->frames[CUR_FRAME].tf.f->width ||
657  h != s->frames[CUR_FRAME].tf.f->height)) {
658  av_log(ctx, AV_LOG_ERROR,
659  "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
661  return AVERROR_INVALIDDATA;
662  }
663 
664  if (get_bits1(&s->gb)) {
666  for (i = 0; i < 8; i++) {
667  if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
668  s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
669  if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
670  s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
671  if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
672  s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
673  s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
674  }
675  }
676  } else {
677  s->segmentation.feat[0].q_enabled = 0;
678  s->segmentation.feat[0].lf_enabled = 0;
679  s->segmentation.feat[0].skip_enabled = 0;
680  s->segmentation.feat[0].ref_enabled = 0;
681  }
682 
683  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
684  for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
685  int qyac, qydc, quvac, quvdc, lflvl, sh;
686 
687  if (s->segmentation.feat[i].q_enabled) {
689  qyac = s->segmentation.feat[i].q_val;
690  else
691  qyac = s->yac_qi + s->segmentation.feat[i].q_val;
692  } else {
693  qyac = s->yac_qi;
694  }
695  qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
696  quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
697  quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
698  qyac = av_clip_uintp2(qyac, 8);
699 
700  s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
701  s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
702  s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
703  s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
704 
705  sh = s->filter.level >= 32;
706  if (s->segmentation.feat[i].lf_enabled) {
708  lflvl = s->segmentation.feat[i].lf_val;
709  else
710  lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
711  } else {
712  lflvl = s->filter.level;
713  }
714  s->segmentation.feat[i].lflvl[0][0] =
715  s->segmentation.feat[i].lflvl[0][1] =
716  av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
717  for (j = 1; j < 4; j++) {
718  s->segmentation.feat[i].lflvl[j][0] =
719  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
720  s->lf_delta.mode[0]) << sh), 6);
721  s->segmentation.feat[i].lflvl[j][1] =
722  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
723  s->lf_delta.mode[1]) << sh), 6);
724  }
725  }
726 
727  /* tiling info */
728  if ((res = update_size(ctx, w, h)) < 0) {
729  av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
730  return res;
731  }
732  for (s->tiling.log2_tile_cols = 0;
733  (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
734  s->tiling.log2_tile_cols++) ;
735  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
736  max = FFMAX(0, max - 1);
737  while (max > s->tiling.log2_tile_cols) {
738  if (get_bits1(&s->gb))
739  s->tiling.log2_tile_cols++;
740  else
741  break;
742  }
743  s->tiling.log2_tile_rows = decode012(&s->gb);
744  s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
745  if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
746  s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
747  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
748  sizeof(VP56RangeCoder) * s->tiling.tile_cols);
749  if (!s->c_b) {
750  av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
751  return AVERROR(ENOMEM);
752  }
753  }
754 
755  if (s->keyframe || s->errorres || s->intraonly) {
756  s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
757  s->prob_ctx[3].p = vp9_default_probs;
758  memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
759  sizeof(vp9_default_coef_probs));
760  memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
761  sizeof(vp9_default_coef_probs));
762  memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
763  sizeof(vp9_default_coef_probs));
764  memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
765  sizeof(vp9_default_coef_probs));
766  }
767 
768  // next 16 bits is size of the rest of the header (arith-coded)
769  size2 = get_bits(&s->gb, 16);
770  data2 = align_get_bits(&s->gb);
771  if (size2 > size - (data2 - data)) {
772  av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
773  return AVERROR_INVALIDDATA;
774  }
775  ff_vp56_init_range_decoder(&s->c, data2, size2);
776  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
777  av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
778  return AVERROR_INVALIDDATA;
779  }
780 
781  if (s->keyframe || s->intraonly) {
782  memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
783  } else {
784  memset(&s->counts, 0, sizeof(s->counts));
785  }
786  // FIXME is it faster to not copy here, but do it down in the fw updates
787  // as explicit copies if the fw update is missing (and skip the copy upon
788  // fw update)?
789  s->prob.p = s->prob_ctx[c].p;
790 
791  // txfm updates
792  if (s->lossless) {
793  s->txfmmode = TX_4X4;
794  } else {
795  s->txfmmode = vp8_rac_get_uint(&s->c, 2);
796  if (s->txfmmode == 3)
797  s->txfmmode += vp8_rac_get(&s->c);
798 
799  if (s->txfmmode == TX_SWITCHABLE) {
800  for (i = 0; i < 2; i++)
801  if (vp56_rac_get_prob_branchy(&s->c, 252))
802  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
803  for (i = 0; i < 2; i++)
804  for (j = 0; j < 2; j++)
805  if (vp56_rac_get_prob_branchy(&s->c, 252))
806  s->prob.p.tx16p[i][j] =
807  update_prob(&s->c, s->prob.p.tx16p[i][j]);
808  for (i = 0; i < 2; i++)
809  for (j = 0; j < 3; j++)
810  if (vp56_rac_get_prob_branchy(&s->c, 252))
811  s->prob.p.tx32p[i][j] =
812  update_prob(&s->c, s->prob.p.tx32p[i][j]);
813  }
814  }
815 
816  // coef updates
817  for (i = 0; i < 4; i++) {
818  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
819  if (vp8_rac_get(&s->c)) {
820  for (j = 0; j < 2; j++)
821  for (k = 0; k < 2; k++)
822  for (l = 0; l < 6; l++)
823  for (m = 0; m < 6; m++) {
824  uint8_t *p = s->prob.coef[i][j][k][l][m];
825  uint8_t *r = ref[j][k][l][m];
826  if (m >= 3 && l == 0) // dc only has 3 pt
827  break;
828  for (n = 0; n < 3; n++) {
829  if (vp56_rac_get_prob_branchy(&s->c, 252)) {
830  p[n] = update_prob(&s->c, r[n]);
831  } else {
832  p[n] = r[n];
833  }
834  }
835  p[3] = 0;
836  }
837  } else {
838  for (j = 0; j < 2; j++)
839  for (k = 0; k < 2; k++)
840  for (l = 0; l < 6; l++)
841  for (m = 0; m < 6; m++) {
842  uint8_t *p = s->prob.coef[i][j][k][l][m];
843  uint8_t *r = ref[j][k][l][m];
844  if (m > 3 && l == 0) // dc only has 3 pt
845  break;
846  memcpy(p, r, 3);
847  p[3] = 0;
848  }
849  }
850  if (s->txfmmode == i)
851  break;
852  }
853 
854  // mode updates
855  for (i = 0; i < 3; i++)
856  if (vp56_rac_get_prob_branchy(&s->c, 252))
857  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
858  if (!s->keyframe && !s->intraonly) {
859  for (i = 0; i < 7; i++)
860  for (j = 0; j < 3; j++)
861  if (vp56_rac_get_prob_branchy(&s->c, 252))
862  s->prob.p.mv_mode[i][j] =
863  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
864 
865  if (s->filtermode == FILTER_SWITCHABLE)
866  for (i = 0; i < 4; i++)
867  for (j = 0; j < 2; j++)
868  if (vp56_rac_get_prob_branchy(&s->c, 252))
869  s->prob.p.filter[i][j] =
870  update_prob(&s->c, s->prob.p.filter[i][j]);
871 
872  for (i = 0; i < 4; i++)
873  if (vp56_rac_get_prob_branchy(&s->c, 252))
874  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
875 
876  if (s->allowcompinter) {
877  s->comppredmode = vp8_rac_get(&s->c);
878  if (s->comppredmode)
879  s->comppredmode += vp8_rac_get(&s->c);
880  if (s->comppredmode == PRED_SWITCHABLE)
881  for (i = 0; i < 5; i++)
882  if (vp56_rac_get_prob_branchy(&s->c, 252))
883  s->prob.p.comp[i] =
884  update_prob(&s->c, s->prob.p.comp[i]);
885  } else {
887  }
888 
889  if (s->comppredmode != PRED_COMPREF) {
890  for (i = 0; i < 5; i++) {
891  if (vp56_rac_get_prob_branchy(&s->c, 252))
892  s->prob.p.single_ref[i][0] =
893  update_prob(&s->c, s->prob.p.single_ref[i][0]);
894  if (vp56_rac_get_prob_branchy(&s->c, 252))
895  s->prob.p.single_ref[i][1] =
896  update_prob(&s->c, s->prob.p.single_ref[i][1]);
897  }
898  }
899 
900  if (s->comppredmode != PRED_SINGLEREF) {
901  for (i = 0; i < 5; i++)
902  if (vp56_rac_get_prob_branchy(&s->c, 252))
903  s->prob.p.comp_ref[i] =
904  update_prob(&s->c, s->prob.p.comp_ref[i]);
905  }
906 
907  for (i = 0; i < 4; i++)
908  for (j = 0; j < 9; j++)
909  if (vp56_rac_get_prob_branchy(&s->c, 252))
910  s->prob.p.y_mode[i][j] =
911  update_prob(&s->c, s->prob.p.y_mode[i][j]);
912 
913  for (i = 0; i < 4; i++)
914  for (j = 0; j < 4; j++)
915  for (k = 0; k < 3; k++)
916  if (vp56_rac_get_prob_branchy(&s->c, 252))
917  s->prob.p.partition[3 - i][j][k] =
918  update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
919 
920  // mv fields don't use the update_prob subexp model for some reason
921  for (i = 0; i < 3; i++)
922  if (vp56_rac_get_prob_branchy(&s->c, 252))
923  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
924 
925  for (i = 0; i < 2; i++) {
926  if (vp56_rac_get_prob_branchy(&s->c, 252))
927  s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
928 
929  for (j = 0; j < 10; j++)
930  if (vp56_rac_get_prob_branchy(&s->c, 252))
931  s->prob.p.mv_comp[i].classes[j] =
932  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
933 
934  if (vp56_rac_get_prob_branchy(&s->c, 252))
935  s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
936 
937  for (j = 0; j < 10; j++)
938  if (vp56_rac_get_prob_branchy(&s->c, 252))
939  s->prob.p.mv_comp[i].bits[j] =
940  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
941  }
942 
943  for (i = 0; i < 2; i++) {
944  for (j = 0; j < 2; j++)
945  for (k = 0; k < 3; k++)
946  if (vp56_rac_get_prob_branchy(&s->c, 252))
947  s->prob.p.mv_comp[i].class0_fp[j][k] =
948  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
949 
950  for (j = 0; j < 3; j++)
951  if (vp56_rac_get_prob_branchy(&s->c, 252))
952  s->prob.p.mv_comp[i].fp[j] =
953  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
954  }
955 
956  if (s->highprecisionmvs) {
957  for (i = 0; i < 2; i++) {
958  if (vp56_rac_get_prob_branchy(&s->c, 252))
959  s->prob.p.mv_comp[i].class0_hp =
960  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
961 
962  if (vp56_rac_get_prob_branchy(&s->c, 252))
963  s->prob.p.mv_comp[i].hp =
964  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
965  }
966  }
967  }
968 
969  return (data2 - data) + size2;
970 }
971 
972 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
973  VP9Context *s)
974 {
975  dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
976  dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
977 }
978 
979 static void find_ref_mvs(VP9Context *s,
980  VP56mv *pmv, int ref, int z, int idx, int sb)
981 {
982  static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
983  [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
984  { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
985  [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
986  { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
987  [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
988  { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
989  [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
990  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
991  [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
992  { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
993  [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
994  { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
995  [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
996  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
997  [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
998  { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
999  [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1000  { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1001  [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1002  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1003  [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1004  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1005  [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1006  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1007  [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1008  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1009  };
1010  VP9Block *b = s->b;
1011  int row = s->row, col = s->col, row7 = s->row7;
1012  const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1013 #define INVALID_MV 0x80008000U
1014  uint32_t mem = INVALID_MV;
1015  int i;
1016 
1017 #define RETURN_DIRECT_MV(mv) \
1018  do { \
1019  uint32_t m = AV_RN32A(&mv); \
1020  if (!idx) { \
1021  AV_WN32A(pmv, m); \
1022  return; \
1023  } else if (mem == INVALID_MV) { \
1024  mem = m; \
1025  } else if (m != mem) { \
1026  AV_WN32A(pmv, m); \
1027  return; \
1028  } \
1029  } while (0)
1030 
1031  if (sb >= 0) {
1032  if (sb == 2 || sb == 1) {
1033  RETURN_DIRECT_MV(b->mv[0][z]);
1034  } else if (sb == 3) {
1035  RETURN_DIRECT_MV(b->mv[2][z]);
1036  RETURN_DIRECT_MV(b->mv[1][z]);
1037  RETURN_DIRECT_MV(b->mv[0][z]);
1038  }
1039 
1040 #define RETURN_MV(mv) \
1041  do { \
1042  if (sb > 0) { \
1043  VP56mv tmp; \
1044  uint32_t m; \
1045  clamp_mv(&tmp, &mv, s); \
1046  m = AV_RN32A(&tmp); \
1047  if (!idx) { \
1048  AV_WN32A(pmv, m); \
1049  return; \
1050  } else if (mem == INVALID_MV) { \
1051  mem = m; \
1052  } else if (m != mem) { \
1053  AV_WN32A(pmv, m); \
1054  return; \
1055  } \
1056  } else { \
1057  uint32_t m = AV_RN32A(&mv); \
1058  if (!idx) { \
1059  clamp_mv(pmv, &mv, s); \
1060  return; \
1061  } else if (mem == INVALID_MV) { \
1062  mem = m; \
1063  } else if (m != mem) { \
1064  clamp_mv(pmv, &mv, s); \
1065  return; \
1066  } \
1067  } \
1068  } while (0)
1069 
1070  if (row > 0) {
1071  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1072  if (mv->ref[0] == ref) {
1073  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1074  } else if (mv->ref[1] == ref) {
1075  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1076  }
1077  }
1078  if (col > s->tiling.tile_col_start) {
1079  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1080  if (mv->ref[0] == ref) {
1081  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1082  } else if (mv->ref[1] == ref) {
1083  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1084  }
1085  }
1086  i = 2;
1087  } else {
1088  i = 0;
1089  }
1090 
1091  // previously coded MVs in this neighbourhood, using same reference frame
1092  for (; i < 8; i++) {
1093  int c = p[i][0] + col, r = p[i][1] + row;
1094 
1095  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1096  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1097 
1098  if (mv->ref[0] == ref) {
1099  RETURN_MV(mv->mv[0]);
1100  } else if (mv->ref[1] == ref) {
1101  RETURN_MV(mv->mv[1]);
1102  }
1103  }
1104  }
1105 
1106  // MV at this position in previous frame, using same reference frame
1107  if (s->use_last_frame_mvs) {
1108  struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1109 
1110  if (!s->last_uses_2pass)
1111  ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1112  if (mv->ref[0] == ref) {
1113  RETURN_MV(mv->mv[0]);
1114  } else if (mv->ref[1] == ref) {
1115  RETURN_MV(mv->mv[1]);
1116  }
1117  }
1118 
1119 #define RETURN_SCALE_MV(mv, scale) \
1120  do { \
1121  if (scale) { \
1122  VP56mv mv_temp = { -mv.x, -mv.y }; \
1123  RETURN_MV(mv_temp); \
1124  } else { \
1125  RETURN_MV(mv); \
1126  } \
1127  } while (0)
1128 
1129  // previously coded MVs in this neighbourhood, using different reference frame
1130  for (i = 0; i < 8; i++) {
1131  int c = p[i][0] + col, r = p[i][1] + row;
1132 
1133  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1134  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1135 
1136  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1137  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1138  }
1139  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1140  // BUG - libvpx has this condition regardless of whether
1141  // we used the first ref MV and pre-scaling
1142  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1143  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1144  }
1145  }
1146  }
1147 
1148  // MV at this position in previous frame, using different reference frame
1149  if (s->use_last_frame_mvs) {
1150  struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1151 
1152  // no need to await_progress, because we already did that above
1153  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1154  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1155  }
1156  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1157  // BUG - libvpx has this condition regardless of whether
1158  // we used the first ref MV and pre-scaling
1159  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1160  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1161  }
1162  }
1163 
1164  AV_ZERO32(pmv);
1165 #undef INVALID_MV
1166 #undef RETURN_MV
1167 #undef RETURN_SCALE_MV
1168 }
1169 
1170 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1171 {
1172  int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1173  int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1174  s->prob.p.mv_comp[idx].classes);
1175 
1176  s->counts.mv_comp[idx].sign[sign]++;
1177  s->counts.mv_comp[idx].classes[c]++;
1178  if (c) {
1179  int m;
1180 
1181  for (n = 0, m = 0; m < c; m++) {
1182  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1183  n |= bit << m;
1184  s->counts.mv_comp[idx].bits[m][bit]++;
1185  }
1186  n <<= 3;
1187  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1188  n |= bit << 1;
1189  s->counts.mv_comp[idx].fp[bit]++;
1190  if (hp) {
1191  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1192  s->counts.mv_comp[idx].hp[bit]++;
1193  n |= bit;
1194  } else {
1195  n |= 1;
1196  // bug in libvpx - we count for bw entropy purposes even if the
1197  // bit wasn't coded
1198  s->counts.mv_comp[idx].hp[1]++;
1199  }
1200  n += 8 << c;
1201  } else {
1202  n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1203  s->counts.mv_comp[idx].class0[n]++;
1204  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1205  s->prob.p.mv_comp[idx].class0_fp[n]);
1206  s->counts.mv_comp[idx].class0_fp[n][bit]++;
1207  n = (n << 3) | (bit << 1);
1208  if (hp) {
1209  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1210  s->counts.mv_comp[idx].class0_hp[bit]++;
1211  n |= bit;
1212  } else {
1213  n |= 1;
1214  // bug in libvpx - we count for bw entropy purposes even if the
1215  // bit wasn't coded
1216  s->counts.mv_comp[idx].class0_hp[1]++;
1217  }
1218  }
1219 
1220  return sign ? -(n + 1) : (n + 1);
1221 }
1222 
1223 static void fill_mv(VP9Context *s,
1224  VP56mv *mv, int mode, int sb)
1225 {
1226  VP9Block *b = s->b;
1227 
1228  if (mode == ZEROMV) {
1229  AV_ZERO64(mv);
1230  } else {
1231  int hp;
1232 
1233  // FIXME cache this value and reuse for other subblocks
1234  find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1235  mode == NEWMV ? -1 : sb);
1236  // FIXME maybe move this code into find_ref_mvs()
1237  if ((mode == NEWMV || sb == -1) &&
1238  !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1239  if (mv[0].y & 1) {
1240  if (mv[0].y < 0)
1241  mv[0].y++;
1242  else
1243  mv[0].y--;
1244  }
1245  if (mv[0].x & 1) {
1246  if (mv[0].x < 0)
1247  mv[0].x++;
1248  else
1249  mv[0].x--;
1250  }
1251  }
1252  if (mode == NEWMV) {
1254  s->prob.p.mv_joint);
1255 
1256  s->counts.mv_joint[j]++;
1257  if (j >= MV_JOINT_V)
1258  mv[0].y += read_mv_component(s, 0, hp);
1259  if (j & 1)
1260  mv[0].x += read_mv_component(s, 1, hp);
1261  }
1262 
1263  if (b->comp) {
1264  // FIXME cache this value and reuse for other subblocks
1265  find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1266  mode == NEWMV ? -1 : sb);
1267  if ((mode == NEWMV || sb == -1) &&
1268  !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1269  if (mv[1].y & 1) {
1270  if (mv[1].y < 0)
1271  mv[1].y++;
1272  else
1273  mv[1].y--;
1274  }
1275  if (mv[1].x & 1) {
1276  if (mv[1].x < 0)
1277  mv[1].x++;
1278  else
1279  mv[1].x--;
1280  }
1281  }
1282  if (mode == NEWMV) {
1284  s->prob.p.mv_joint);
1285 
1286  s->counts.mv_joint[j]++;
1287  if (j >= MV_JOINT_V)
1288  mv[1].y += read_mv_component(s, 0, hp);
1289  if (j & 1)
1290  mv[1].x += read_mv_component(s, 1, hp);
1291  }
1292  }
1293  }
1294 }
1295 
1296 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1297  ptrdiff_t stride, int v)
1298 {
1299  switch (w) {
1300  case 1:
1301  do {
1302  *ptr = v;
1303  ptr += stride;
1304  } while (--h);
1305  break;
1306  case 2: {
1307  int v16 = v * 0x0101;
1308  do {
1309  AV_WN16A(ptr, v16);
1310  ptr += stride;
1311  } while (--h);
1312  break;
1313  }
1314  case 4: {
1315  uint32_t v32 = v * 0x01010101;
1316  do {
1317  AV_WN32A(ptr, v32);
1318  ptr += stride;
1319  } while (--h);
1320  break;
1321  }
1322  case 8: {
1323 #if HAVE_FAST_64BIT
1324  uint64_t v64 = v * 0x0101010101010101ULL;
1325  do {
1326  AV_WN64A(ptr, v64);
1327  ptr += stride;
1328  } while (--h);
1329 #else
1330  uint32_t v32 = v * 0x01010101;
1331  do {
1332  AV_WN32A(ptr, v32);
1333  AV_WN32A(ptr + 4, v32);
1334  ptr += stride;
1335  } while (--h);
1336 #endif
1337  break;
1338  }
1339  }
1340 }
1341 
1342 static void decode_mode(AVCodecContext *ctx)
1343 {
1344  static const uint8_t left_ctx[N_BS_SIZES] = {
1345  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1346  };
1347  static const uint8_t above_ctx[N_BS_SIZES] = {
1348  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1349  };
1350  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1352  TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1353  };
1354  VP9Context *s = ctx->priv_data;
1355  VP9Block *b = s->b;
1356  int row = s->row, col = s->col, row7 = s->row7;
1357  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1358  int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1359  int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1360  int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1361  int vref, filter_id;
1362 
1363  if (!s->segmentation.enabled) {
1364  b->seg_id = 0;
1365  } else if (s->keyframe || s->intraonly) {
1367  } else if (!s->segmentation.update_map ||
1368  (s->segmentation.temporal &&
1370  s->prob.segpred[s->above_segpred_ctx[col] +
1371  s->left_segpred_ctx[row7]]))) {
1372  if (!s->errorres) {
1373  int pred = 8, x;
1374  uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1375 
1376  if (!s->last_uses_2pass)
1377  ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1378  for (y = 0; y < h4; y++) {
1379  int idx_base = (y + row) * 8 * s->sb_cols + col;
1380  for (x = 0; x < w4; x++)
1381  pred = FFMIN(pred, refsegmap[idx_base + x]);
1383  // FIXME maybe retain reference to previous frame as
1384  // segmap reference instead of copying the whole map
1385  // into a new buffer
1386  memcpy(&s->frames[CUR_FRAME].segmentation_map[idx_base],
1387  &refsegmap[idx_base], w4);
1388  }
1389  }
1390  av_assert1(pred < 8);
1391  b->seg_id = pred;
1392  } else {
1393  b->seg_id = 0;
1394  }
1395 
1396  memset(&s->above_segpred_ctx[col], 1, w4);
1397  memset(&s->left_segpred_ctx[row7], 1, h4);
1398  } else {
1400  s->prob.seg);
1401 
1402  memset(&s->above_segpred_ctx[col], 0, w4);
1403  memset(&s->left_segpred_ctx[row7], 0, h4);
1404  }
1405  if (s->segmentation.enabled &&
1406  (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1407  setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1408  w4, h4, 8 * s->sb_cols, b->seg_id);
1409  }
1410 
1411  b->skip = s->segmentation.enabled &&
1412  s->segmentation.feat[b->seg_id].skip_enabled;
1413  if (!b->skip) {
1414  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1415  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1416  s->counts.skip[c][b->skip]++;
1417  }
1418 
1419  if (s->keyframe || s->intraonly) {
1420  b->intra = 1;
1421  } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1422  b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1423  } else {
1424  int c, bit;
1425 
1426  if (have_a && have_l) {
1427  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1428  c += (c == 2);
1429  } else {
1430  c = have_a ? 2 * s->above_intra_ctx[col] :
1431  have_l ? 2 * s->left_intra_ctx[row7] : 0;
1432  }
1433  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1434  s->counts.intra[c][bit]++;
1435  b->intra = !bit;
1436  }
1437 
1438  if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1439  int c;
1440  if (have_a) {
1441  if (have_l) {
1442  c = (s->above_skip_ctx[col] ? max_tx :
1443  s->above_txfm_ctx[col]) +
1444  (s->left_skip_ctx[row7] ? max_tx :
1445  s->left_txfm_ctx[row7]) > max_tx;
1446  } else {
1447  c = s->above_skip_ctx[col] ? 1 :
1448  (s->above_txfm_ctx[col] * 2 > max_tx);
1449  }
1450  } else if (have_l) {
1451  c = s->left_skip_ctx[row7] ? 1 :
1452  (s->left_txfm_ctx[row7] * 2 > max_tx);
1453  } else {
1454  c = 1;
1455  }
1456  switch (max_tx) {
1457  case TX_32X32:
1458  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1459  if (b->tx) {
1460  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1461  if (b->tx == 2)
1462  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1463  }
1464  s->counts.tx32p[c][b->tx]++;
1465  break;
1466  case TX_16X16:
1467  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1468  if (b->tx)
1469  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1470  s->counts.tx16p[c][b->tx]++;
1471  break;
1472  case TX_8X8:
1473  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1474  s->counts.tx8p[c][b->tx]++;
1475  break;
1476  case TX_4X4:
1477  b->tx = TX_4X4;
1478  break;
1479  }
1480  } else {
1481  b->tx = FFMIN(max_tx, s->txfmmode);
1482  }
1483 
1484  if (s->keyframe || s->intraonly) {
1485  uint8_t *a = &s->above_mode_ctx[col * 2];
1486  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1487 
1488  b->comp = 0;
1489  if (b->bs > BS_8x8) {
1490  // FIXME the memory storage intermediates here aren't really
1491  // necessary, they're just there to make the code slightly
1492  // simpler for now
1493  b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1494  vp9_default_kf_ymode_probs[a[0]][l[0]]);
1495  if (b->bs != BS_8x4) {
1497  vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1498  l[0] = a[1] = b->mode[1];
1499  } else {
1500  l[0] = a[1] = b->mode[1] = b->mode[0];
1501  }
1502  if (b->bs != BS_4x8) {
1503  b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1504  vp9_default_kf_ymode_probs[a[0]][l[1]]);
1505  if (b->bs != BS_8x4) {
1507  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1508  l[1] = a[1] = b->mode[3];
1509  } else {
1510  l[1] = a[1] = b->mode[3] = b->mode[2];
1511  }
1512  } else {
1513  b->mode[2] = b->mode[0];
1514  l[1] = a[1] = b->mode[3] = b->mode[1];
1515  }
1516  } else {
1518  vp9_default_kf_ymode_probs[*a][*l]);
1519  b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1520  // FIXME this can probably be optimized
1521  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1522  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1523  }
1526  } else if (b->intra) {
1527  b->comp = 0;
1528  if (b->bs > BS_8x8) {
1530  s->prob.p.y_mode[0]);
1531  s->counts.y_mode[0][b->mode[0]]++;
1532  if (b->bs != BS_8x4) {
1534  s->prob.p.y_mode[0]);
1535  s->counts.y_mode[0][b->mode[1]]++;
1536  } else {
1537  b->mode[1] = b->mode[0];
1538  }
1539  if (b->bs != BS_4x8) {
1541  s->prob.p.y_mode[0]);
1542  s->counts.y_mode[0][b->mode[2]]++;
1543  if (b->bs != BS_8x4) {
1545  s->prob.p.y_mode[0]);
1546  s->counts.y_mode[0][b->mode[3]]++;
1547  } else {
1548  b->mode[3] = b->mode[2];
1549  }
1550  } else {
1551  b->mode[2] = b->mode[0];
1552  b->mode[3] = b->mode[1];
1553  }
1554  } else {
1555  static const uint8_t size_group[10] = {
1556  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1557  };
1558  int sz = size_group[b->bs];
1559 
1561  s->prob.p.y_mode[sz]);
1562  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1563  s->counts.y_mode[sz][b->mode[3]]++;
1564  }
1566  s->prob.p.uv_mode[b->mode[3]]);
1567  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1568  } else {
1569  static const uint8_t inter_mode_ctx_lut[14][14] = {
1570  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1571  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1572  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1573  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1574  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1575  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1576  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1577  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1578  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1579  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1580  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1581  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1582  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1583  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1584  };
1585 
1586  if (s->segmentation.feat[b->seg_id].ref_enabled) {
1587  av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1588  b->comp = 0;
1589  b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1590  } else {
1591  // read comp_pred flag
1592  if (s->comppredmode != PRED_SWITCHABLE) {
1593  b->comp = s->comppredmode == PRED_COMPREF;
1594  } else {
1595  int c;
1596 
1597  // FIXME add intra as ref=0xff (or -1) to make these easier?
1598  if (have_a) {
1599  if (have_l) {
1600  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1601  c = 4;
1602  } else if (s->above_comp_ctx[col]) {
1603  c = 2 + (s->left_intra_ctx[row7] ||
1604  s->left_ref_ctx[row7] == s->fixcompref);
1605  } else if (s->left_comp_ctx[row7]) {
1606  c = 2 + (s->above_intra_ctx[col] ||
1607  s->above_ref_ctx[col] == s->fixcompref);
1608  } else {
1609  c = (!s->above_intra_ctx[col] &&
1610  s->above_ref_ctx[col] == s->fixcompref) ^
1611  (!s->left_intra_ctx[row7] &&
1612  s->left_ref_ctx[row & 7] == s->fixcompref);
1613  }
1614  } else {
1615  c = s->above_comp_ctx[col] ? 3 :
1616  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1617  }
1618  } else if (have_l) {
1619  c = s->left_comp_ctx[row7] ? 3 :
1620  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1621  } else {
1622  c = 1;
1623  }
1624  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1625  s->counts.comp[c][b->comp]++;
1626  }
1627 
1628  // read actual references
1629  // FIXME probably cache a few variables here to prevent repetitive
1630  // memory accesses below
1631  if (b->comp) /* two references */ {
1632  int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1633 
1634  b->ref[fix_idx] = s->fixcompref;
1635  // FIXME can this codeblob be replaced by some sort of LUT?
1636  if (have_a) {
1637  if (have_l) {
1638  if (s->above_intra_ctx[col]) {
1639  if (s->left_intra_ctx[row7]) {
1640  c = 2;
1641  } else {
1642  c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1643  }
1644  } else if (s->left_intra_ctx[row7]) {
1645  c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1646  } else {
1647  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1648 
1649  if (refl == refa && refa == s->varcompref[1]) {
1650  c = 0;
1651  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1652  if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1653  (refl == s->fixcompref && refa == s->varcompref[0])) {
1654  c = 4;
1655  } else {
1656  c = (refa == refl) ? 3 : 1;
1657  }
1658  } else if (!s->left_comp_ctx[row7]) {
1659  if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1660  c = 1;
1661  } else {
1662  c = (refl == s->varcompref[1] &&
1663  refa != s->varcompref[1]) ? 2 : 4;
1664  }
1665  } else if (!s->above_comp_ctx[col]) {
1666  if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1667  c = 1;
1668  } else {
1669  c = (refa == s->varcompref[1] &&
1670  refl != s->varcompref[1]) ? 2 : 4;
1671  }
1672  } else {
1673  c = (refl == refa) ? 4 : 2;
1674  }
1675  }
1676  } else {
1677  if (s->above_intra_ctx[col]) {
1678  c = 2;
1679  } else if (s->above_comp_ctx[col]) {
1680  c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1681  } else {
1682  c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1683  }
1684  }
1685  } else if (have_l) {
1686  if (s->left_intra_ctx[row7]) {
1687  c = 2;
1688  } else if (s->left_comp_ctx[row7]) {
1689  c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1690  } else {
1691  c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1692  }
1693  } else {
1694  c = 2;
1695  }
1696  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1697  b->ref[var_idx] = s->varcompref[bit];
1698  s->counts.comp_ref[c][bit]++;
1699  } else /* single reference */ {
1700  int bit, c;
1701 
1702  if (have_a && !s->above_intra_ctx[col]) {
1703  if (have_l && !s->left_intra_ctx[row7]) {
1704  if (s->left_comp_ctx[row7]) {
1705  if (s->above_comp_ctx[col]) {
1706  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1707  !s->above_ref_ctx[col]);
1708  } else {
1709  c = (3 * !s->above_ref_ctx[col]) +
1710  (!s->fixcompref || !s->left_ref_ctx[row7]);
1711  }
1712  } else if (s->above_comp_ctx[col]) {
1713  c = (3 * !s->left_ref_ctx[row7]) +
1714  (!s->fixcompref || !s->above_ref_ctx[col]);
1715  } else {
1716  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1717  }
1718  } else if (s->above_intra_ctx[col]) {
1719  c = 2;
1720  } else if (s->above_comp_ctx[col]) {
1721  c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1722  } else {
1723  c = 4 * (!s->above_ref_ctx[col]);
1724  }
1725  } else if (have_l && !s->left_intra_ctx[row7]) {
1726  if (s->left_intra_ctx[row7]) {
1727  c = 2;
1728  } else if (s->left_comp_ctx[row7]) {
1729  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1730  } else {
1731  c = 4 * (!s->left_ref_ctx[row7]);
1732  }
1733  } else {
1734  c = 2;
1735  }
1736  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1737  s->counts.single_ref[c][0][bit]++;
1738  if (!bit) {
1739  b->ref[0] = 0;
1740  } else {
1741  // FIXME can this codeblob be replaced by some sort of LUT?
1742  if (have_a) {
1743  if (have_l) {
1744  if (s->left_intra_ctx[row7]) {
1745  if (s->above_intra_ctx[col]) {
1746  c = 2;
1747  } else if (s->above_comp_ctx[col]) {
1748  c = 1 + 2 * (s->fixcompref == 1 ||
1749  s->above_ref_ctx[col] == 1);
1750  } else if (!s->above_ref_ctx[col]) {
1751  c = 3;
1752  } else {
1753  c = 4 * (s->above_ref_ctx[col] == 1);
1754  }
1755  } else if (s->above_intra_ctx[col]) {
1756  if (s->left_intra_ctx[row7]) {
1757  c = 2;
1758  } else if (s->left_comp_ctx[row7]) {
1759  c = 1 + 2 * (s->fixcompref == 1 ||
1760  s->left_ref_ctx[row7] == 1);
1761  } else if (!s->left_ref_ctx[row7]) {
1762  c = 3;
1763  } else {
1764  c = 4 * (s->left_ref_ctx[row7] == 1);
1765  }
1766  } else if (s->above_comp_ctx[col]) {
1767  if (s->left_comp_ctx[row7]) {
1768  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1769  c = 3 * (s->fixcompref == 1 ||
1770  s->left_ref_ctx[row7] == 1);
1771  } else {
1772  c = 2;
1773  }
1774  } else if (!s->left_ref_ctx[row7]) {
1775  c = 1 + 2 * (s->fixcompref == 1 ||
1776  s->above_ref_ctx[col] == 1);
1777  } else {
1778  c = 3 * (s->left_ref_ctx[row7] == 1) +
1779  (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1780  }
1781  } else if (s->left_comp_ctx[row7]) {
1782  if (!s->above_ref_ctx[col]) {
1783  c = 1 + 2 * (s->fixcompref == 1 ||
1784  s->left_ref_ctx[row7] == 1);
1785  } else {
1786  c = 3 * (s->above_ref_ctx[col] == 1) +
1787  (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1788  }
1789  } else if (!s->above_ref_ctx[col]) {
1790  if (!s->left_ref_ctx[row7]) {
1791  c = 3;
1792  } else {
1793  c = 4 * (s->left_ref_ctx[row7] == 1);
1794  }
1795  } else if (!s->left_ref_ctx[row7]) {
1796  c = 4 * (s->above_ref_ctx[col] == 1);
1797  } else {
1798  c = 2 * (s->left_ref_ctx[row7] == 1) +
1799  2 * (s->above_ref_ctx[col] == 1);
1800  }
1801  } else {
1802  if (s->above_intra_ctx[col] ||
1803  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1804  c = 2;
1805  } else if (s->above_comp_ctx[col]) {
1806  c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1807  } else {
1808  c = 4 * (s->above_ref_ctx[col] == 1);
1809  }
1810  }
1811  } else if (have_l) {
1812  if (s->left_intra_ctx[row7] ||
1813  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1814  c = 2;
1815  } else if (s->left_comp_ctx[row7]) {
1816  c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1817  } else {
1818  c = 4 * (s->left_ref_ctx[row7] == 1);
1819  }
1820  } else {
1821  c = 2;
1822  }
1823  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1824  s->counts.single_ref[c][1][bit]++;
1825  b->ref[0] = 1 + bit;
1826  }
1827  }
1828  }
1829 
1830  if (b->bs <= BS_8x8) {
1831  if (s->segmentation.feat[b->seg_id].skip_enabled) {
1832  b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1833  } else {
1834  static const uint8_t off[10] = {
1835  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1836  };
1837 
1838  // FIXME this needs to use the LUT tables from find_ref_mvs
1839  // because not all are -1,0/0,-1
1840  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1841  [s->left_mode_ctx[row7 + off[b->bs]]];
1842 
1844  s->prob.p.mv_mode[c]);
1845  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1846  s->counts.mv_mode[c][b->mode[0] - 10]++;
1847  }
1848  }
1849 
1850  if (s->filtermode == FILTER_SWITCHABLE) {
1851  int c;
1852 
1853  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1854  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1855  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1856  s->left_filter_ctx[row7] : 3;
1857  } else {
1858  c = s->above_filter_ctx[col];
1859  }
1860  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1861  c = s->left_filter_ctx[row7];
1862  } else {
1863  c = 3;
1864  }
1865 
1866  filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1867  s->prob.p.filter[c]);
1868  s->counts.filter[c][filter_id]++;
1869  b->filter = vp9_filter_lut[filter_id];
1870  } else {
1871  b->filter = s->filtermode;
1872  }
1873 
1874  if (b->bs > BS_8x8) {
1875  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1876 
1878  s->prob.p.mv_mode[c]);
1879  s->counts.mv_mode[c][b->mode[0] - 10]++;
1880  fill_mv(s, b->mv[0], b->mode[0], 0);
1881 
1882  if (b->bs != BS_8x4) {
1884  s->prob.p.mv_mode[c]);
1885  s->counts.mv_mode[c][b->mode[1] - 10]++;
1886  fill_mv(s, b->mv[1], b->mode[1], 1);
1887  } else {
1888  b->mode[1] = b->mode[0];
1889  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1890  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1891  }
1892 
1893  if (b->bs != BS_4x8) {
1895  s->prob.p.mv_mode[c]);
1896  s->counts.mv_mode[c][b->mode[2] - 10]++;
1897  fill_mv(s, b->mv[2], b->mode[2], 2);
1898 
1899  if (b->bs != BS_8x4) {
1901  s->prob.p.mv_mode[c]);
1902  s->counts.mv_mode[c][b->mode[3] - 10]++;
1903  fill_mv(s, b->mv[3], b->mode[3], 3);
1904  } else {
1905  b->mode[3] = b->mode[2];
1906  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1907  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1908  }
1909  } else {
1910  b->mode[2] = b->mode[0];
1911  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1912  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1913  b->mode[3] = b->mode[1];
1914  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1915  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1916  }
1917  } else {
1918  fill_mv(s, b->mv[0], b->mode[0], -1);
1919  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1920  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1921  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1922  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1923  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1924  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1925  }
1926 
1927  vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1928  }
1929 
1930 #if HAVE_FAST_64BIT
1931 #define SPLAT_CTX(var, val, n) \
1932  switch (n) { \
1933  case 1: var = val; break; \
1934  case 2: AV_WN16A(&var, val * 0x0101); break; \
1935  case 4: AV_WN32A(&var, val * 0x01010101); break; \
1936  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1937  case 16: { \
1938  uint64_t v64 = val * 0x0101010101010101ULL; \
1939  AV_WN64A( &var, v64); \
1940  AV_WN64A(&((uint8_t *) &var)[8], v64); \
1941  break; \
1942  } \
1943  }
1944 #else
1945 #define SPLAT_CTX(var, val, n) \
1946  switch (n) { \
1947  case 1: var = val; break; \
1948  case 2: AV_WN16A(&var, val * 0x0101); break; \
1949  case 4: AV_WN32A(&var, val * 0x01010101); break; \
1950  case 8: { \
1951  uint32_t v32 = val * 0x01010101; \
1952  AV_WN32A( &var, v32); \
1953  AV_WN32A(&((uint8_t *) &var)[4], v32); \
1954  break; \
1955  } \
1956  case 16: { \
1957  uint32_t v32 = val * 0x01010101; \
1958  AV_WN32A( &var, v32); \
1959  AV_WN32A(&((uint8_t *) &var)[4], v32); \
1960  AV_WN32A(&((uint8_t *) &var)[8], v32); \
1961  AV_WN32A(&((uint8_t *) &var)[12], v32); \
1962  break; \
1963  } \
1964  }
1965 #endif
1966 
1967  switch (bwh_tab[1][b->bs][0]) {
1968 #define SET_CTXS(dir, off, n) \
1969  do { \
1970  SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1971  SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1972  SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1973  if (!s->keyframe && !s->intraonly) { \
1974  SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1975  SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1976  SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1977  if (!b->intra) { \
1978  SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1979  if (s->filtermode == FILTER_SWITCHABLE) { \
1980  SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1981  } \
1982  } \
1983  } \
1984  } while (0)
1985  case 1: SET_CTXS(above, col, 1); break;
1986  case 2: SET_CTXS(above, col, 2); break;
1987  case 4: SET_CTXS(above, col, 4); break;
1988  case 8: SET_CTXS(above, col, 8); break;
1989  }
1990  switch (bwh_tab[1][b->bs][1]) {
1991  case 1: SET_CTXS(left, row7, 1); break;
1992  case 2: SET_CTXS(left, row7, 2); break;
1993  case 4: SET_CTXS(left, row7, 4); break;
1994  case 8: SET_CTXS(left, row7, 8); break;
1995  }
1996 #undef SPLAT_CTX
1997 #undef SET_CTXS
1998 
1999  if (!s->keyframe && !s->intraonly) {
2000  if (b->bs > BS_8x8) {
2001  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2002 
2003  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2004  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2005  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2006  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2007  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2008  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2009  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2010  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2011  } else {
2012  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2013 
2014  for (n = 0; n < w4 * 2; n++) {
2015  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2016  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2017  }
2018  for (n = 0; n < h4 * 2; n++) {
2019  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2020  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2021  }
2022  }
2023  }
2024 
2025  // FIXME kinda ugly
2026  for (y = 0; y < h4; y++) {
2027  int x, o = (row + y) * s->sb_cols * 8 + col;
2028  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2029 
2030  if (b->intra) {
2031  for (x = 0; x < w4; x++) {
2032  mv[x].ref[0] =
2033  mv[x].ref[1] = -1;
2034  }
2035  } else if (b->comp) {
2036  for (x = 0; x < w4; x++) {
2037  mv[x].ref[0] = b->ref[0];
2038  mv[x].ref[1] = b->ref[1];
2039  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2040  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2041  }
2042  } else {
2043  for (x = 0; x < w4; x++) {
2044  mv[x].ref[0] = b->ref[0];
2045  mv[x].ref[1] = -1;
2046  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2047  }
2048  }
2049  }
2050 }
2051 
2052 // FIXME merge cnt/eob arguments?
2053 static av_always_inline int
2054 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2055  int is_tx32x32, unsigned (*cnt)[6][3],
2056  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2057  int nnz, const int16_t *scan, const int16_t (*nb)[2],
2058  const int16_t *band_counts, const int16_t *qmul)
2059 {
2060  int i = 0, band = 0, band_left = band_counts[band];
2061  uint8_t *tp = p[0][nnz];
2062  uint8_t cache[1024];
2063 
2064  do {
2065  int val, rc;
2066 
2067  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2068  eob[band][nnz][val]++;
2069  if (!val)
2070  break;
2071 
2072  skip_eob:
2073  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2074  cnt[band][nnz][0]++;
2075  if (!--band_left)
2076  band_left = band_counts[++band];
2077  cache[scan[i]] = 0;
2078  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2079  tp = p[band][nnz];
2080  if (++i == n_coeffs)
2081  break; //invalid input; blocks should end with EOB
2082  goto skip_eob;
2083  }
2084 
2085  rc = scan[i];
2086  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2087  cnt[band][nnz][1]++;
2088  val = 1;
2089  cache[rc] = 1;
2090  } else {
2091  // fill in p[3-10] (model fill) - only once per frame for each pos
2092  if (!tp[3])
2093  memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2094 
2095  cnt[band][nnz][2]++;
2096  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2097  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2098  cache[rc] = val = 2;
2099  } else {
2100  val = 3 + vp56_rac_get_prob(c, tp[5]);
2101  cache[rc] = 3;
2102  }
2103  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2104  cache[rc] = 4;
2105  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2106  val = 5 + vp56_rac_get_prob(c, 159);
2107  } else {
2108  val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2109  val += vp56_rac_get_prob(c, 145);
2110  }
2111  } else { // cat 3-6
2112  cache[rc] = 5;
2113  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2114  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2115  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2116  val += (vp56_rac_get_prob(c, 148) << 1);
2117  val += vp56_rac_get_prob(c, 140);
2118  } else {
2119  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2120  val += (vp56_rac_get_prob(c, 155) << 2);
2121  val += (vp56_rac_get_prob(c, 140) << 1);
2122  val += vp56_rac_get_prob(c, 135);
2123  }
2124  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2125  val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2126  val += (vp56_rac_get_prob(c, 157) << 3);
2127  val += (vp56_rac_get_prob(c, 141) << 2);
2128  val += (vp56_rac_get_prob(c, 134) << 1);
2129  val += vp56_rac_get_prob(c, 130);
2130  } else {
2131  val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2132  val += (vp56_rac_get_prob(c, 254) << 12);
2133  val += (vp56_rac_get_prob(c, 254) << 11);
2134  val += (vp56_rac_get_prob(c, 252) << 10);
2135  val += (vp56_rac_get_prob(c, 249) << 9);
2136  val += (vp56_rac_get_prob(c, 243) << 8);
2137  val += (vp56_rac_get_prob(c, 230) << 7);
2138  val += (vp56_rac_get_prob(c, 196) << 6);
2139  val += (vp56_rac_get_prob(c, 177) << 5);
2140  val += (vp56_rac_get_prob(c, 153) << 4);
2141  val += (vp56_rac_get_prob(c, 140) << 3);
2142  val += (vp56_rac_get_prob(c, 133) << 2);
2143  val += (vp56_rac_get_prob(c, 130) << 1);
2144  val += vp56_rac_get_prob(c, 129);
2145  }
2146  }
2147  }
2148  if (!--band_left)
2149  band_left = band_counts[++band];
2150  if (is_tx32x32)
2151  coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2152  else
2153  coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2154  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2155  tp = p[band][nnz];
2156  } while (++i < n_coeffs);
2157 
2158  return i;
2159 }
2160 
2161 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2162  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2163  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2164  const int16_t (*nb)[2], const int16_t *band_counts,
2165  const int16_t *qmul)
2166 {
2167  return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2168  nnz, scan, nb, band_counts, qmul);
2169 }
2170 
2171 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2172  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2173  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2174  const int16_t (*nb)[2], const int16_t *band_counts,
2175  const int16_t *qmul)
2176 {
2177  return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2178  nnz, scan, nb, band_counts, qmul);
2179 }
2180 
2182 {
2183  VP9Context *s = ctx->priv_data;
2184  VP9Block *b = s->b;
2185  int row = s->row, col = s->col;
2186  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2187  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2188  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2189  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2190  int end_x = FFMIN(2 * (s->cols - col), w4);
2191  int end_y = FFMIN(2 * (s->rows - row), h4);
2192  int n, pl, x, y, res;
2193  int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2194  int tx = 4 * s->lossless + b->tx;
2195  const int16_t * const *yscans = vp9_scans[tx];
2196  const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2197  const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2198  const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2199  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2200  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2201  static const int16_t band_counts[4][8] = {
2202  { 1, 2, 3, 4, 3, 16 - 13 },
2203  { 1, 2, 3, 4, 11, 64 - 21 },
2204  { 1, 2, 3, 4, 11, 256 - 21 },
2205  { 1, 2, 3, 4, 11, 1024 - 21 },
2206  };
2207  const int16_t *y_band_counts = band_counts[b->tx];
2208  const int16_t *uv_band_counts = band_counts[b->uvtx];
2209 
2210 #define MERGE(la, end, step, rd) \
2211  for (n = 0; n < end; n += step) \
2212  la[n] = !!rd(&la[n])
2213 #define MERGE_CTX(step, rd) \
2214  do { \
2215  MERGE(l, end_y, step, rd); \
2216  MERGE(a, end_x, step, rd); \
2217  } while (0)
2218 
2219 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2220  for (n = 0, y = 0; y < end_y; y += step) { \
2221  for (x = 0; x < end_x; x += step, n += step * step) { \
2222  enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2223  res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2224  c, e, p, a[x] + l[y], yscans[txtp], \
2225  ynbs[txtp], y_band_counts, qmul[0]); \
2226  a[x] = l[y] = !!res; \
2227  if (step >= 4) { \
2228  AV_WN16A(&s->eob[n], res); \
2229  } else { \
2230  s->eob[n] = res; \
2231  } \
2232  } \
2233  }
2234 
2235 #define SPLAT(la, end, step, cond) \
2236  if (step == 2) { \
2237  for (n = 1; n < end; n += step) \
2238  la[n] = la[n - 1]; \
2239  } else if (step == 4) { \
2240  if (cond) { \
2241  for (n = 0; n < end; n += step) \
2242  AV_WN32A(&la[n], la[n] * 0x01010101); \
2243  } else { \
2244  for (n = 0; n < end; n += step) \
2245  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2246  } \
2247  } else /* step == 8 */ { \
2248  if (cond) { \
2249  if (HAVE_FAST_64BIT) { \
2250  for (n = 0; n < end; n += step) \
2251  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2252  } else { \
2253  for (n = 0; n < end; n += step) { \
2254  uint32_t v32 = la[n] * 0x01010101; \
2255  AV_WN32A(&la[n], v32); \
2256  AV_WN32A(&la[n + 4], v32); \
2257  } \
2258  } \
2259  } else { \
2260  for (n = 0; n < end; n += step) \
2261  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2262  } \
2263  }
2264 #define SPLAT_CTX(step) \
2265  do { \
2266  SPLAT(a, end_x, step, end_x == w4); \
2267  SPLAT(l, end_y, step, end_y == h4); \
2268  } while (0)
2269 
2270  /* y tokens */
2271  switch (b->tx) {
2272  case TX_4X4:
2273  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2274  break;
2275  case TX_8X8:
2276  MERGE_CTX(2, AV_RN16A);
2277  DECODE_Y_COEF_LOOP(2, 0,);
2278  SPLAT_CTX(2);
2279  break;
2280  case TX_16X16:
2281  MERGE_CTX(4, AV_RN32A);
2282  DECODE_Y_COEF_LOOP(4, 0,);
2283  SPLAT_CTX(4);
2284  break;
2285  case TX_32X32:
2286  MERGE_CTX(8, AV_RN64A);
2287  DECODE_Y_COEF_LOOP(8, 0, 32);
2288  SPLAT_CTX(8);
2289  break;
2290  }
2291 
2292 #define DECODE_UV_COEF_LOOP(step) \
2293  for (n = 0, y = 0; y < end_y; y += step) { \
2294  for (x = 0; x < end_x; x += step, n += step * step) { \
2295  res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2296  16 * step * step, c, e, p, a[x] + l[y], \
2297  uvscan, uvnb, uv_band_counts, qmul[1]); \
2298  a[x] = l[y] = !!res; \
2299  if (step >= 4) { \
2300  AV_WN16A(&s->uveob[pl][n], res); \
2301  } else { \
2302  s->uveob[pl][n] = res; \
2303  } \
2304  } \
2305  }
2306 
2307  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2308  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2309  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2310  w4 >>= 1;
2311  h4 >>= 1;
2312  end_x >>= 1;
2313  end_y >>= 1;
2314  for (pl = 0; pl < 2; pl++) {
2315  a = &s->above_uv_nnz_ctx[pl][col];
2316  l = &s->left_uv_nnz_ctx[pl][row & 7];
2317  switch (b->uvtx) {
2318  case TX_4X4:
2320  break;
2321  case TX_8X8:
2322  MERGE_CTX(2, AV_RN16A);
2324  SPLAT_CTX(2);
2325  break;
2326  case TX_16X16:
2327  MERGE_CTX(4, AV_RN32A);
2329  SPLAT_CTX(4);
2330  break;
2331  case TX_32X32:
2332  MERGE_CTX(8, AV_RN64A);
2333  // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2334  // so there is no need to loop
2335  res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2336  1024, c, e, p, a[0] + l[0],
2337  uvscan, uvnb, uv_band_counts, qmul[1]);
2338  a[0] = l[0] = !!res;
2339  AV_WN16A(&s->uveob[pl][0], res);
2340  SPLAT_CTX(8);
2341  break;
2342  }
2343  }
2344 }
2345 
2347  uint8_t *dst_edge, ptrdiff_t stride_edge,
2348  uint8_t *dst_inner, ptrdiff_t stride_inner,
2349  uint8_t *l, int col, int x, int w,
2350  int row, int y, enum TxfmMode tx,
2351  int p)
2352 {
2353  int have_top = row > 0 || y > 0;
2354  int have_left = col > s->tiling.tile_col_start || x > 0;
2355  int have_right = x < w - 1;
2356  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2357  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2358  { DC_127_PRED, VERT_PRED } },
2359  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2360  { HOR_PRED, HOR_PRED } },
2361  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2362  { LEFT_DC_PRED, DC_PRED } },
2372  { DC_127_PRED, VERT_LEFT_PRED } },
2373  [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2374  { HOR_UP_PRED, HOR_UP_PRED } },
2375  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2376  { HOR_PRED, TM_VP8_PRED } },
2377  };
2378  static const struct {
2379  uint8_t needs_left:1;
2380  uint8_t needs_top:1;
2381  uint8_t needs_topleft:1;
2382  uint8_t needs_topright:1;
2383  uint8_t invert_left:1;
2384  } edges[N_INTRA_PRED_MODES] = {
2385  [VERT_PRED] = { .needs_top = 1 },
2386  [HOR_PRED] = { .needs_left = 1 },
2387  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2388  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2389  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2390  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2391  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2392  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2393  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2394  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2395  [LEFT_DC_PRED] = { .needs_left = 1 },
2396  [TOP_DC_PRED] = { .needs_top = 1 },
2397  [DC_128_PRED] = { 0 },
2398  [DC_127_PRED] = { 0 },
2399  [DC_129_PRED] = { 0 }
2400  };
2401 
2402  av_assert2(mode >= 0 && mode < 10);
2403  mode = mode_conv[mode][have_left][have_top];
2404  if (edges[mode].needs_top) {
2405  uint8_t *top, *topleft;
2406  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2407  int n_px_need_tr = 0;
2408 
2409  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2410  n_px_need_tr = 4;
2411 
2412  // if top of sb64-row, use s->intra_pred_data[] instead of
2413  // dst[-stride] for intra prediction (it contains pre- instead of
2414  // post-loopfilter data)
2415  if (have_top) {
2416  top = !(row & 7) && !y ?
2417  s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2418  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2419  if (have_left)
2420  topleft = !(row & 7) && !y ?
2421  s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2422  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2423  &dst_inner[-stride_inner];
2424  }
2425 
2426  if (have_top &&
2427  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2428  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2429  n_px_need + n_px_need_tr <= n_px_have) {
2430  *a = top;
2431  } else {
2432  if (have_top) {
2433  if (n_px_need <= n_px_have) {
2434  memcpy(*a, top, n_px_need);
2435  } else {
2436  memcpy(*a, top, n_px_have);
2437  memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2438  n_px_need - n_px_have);
2439  }
2440  } else {
2441  memset(*a, 127, n_px_need);
2442  }
2443  if (edges[mode].needs_topleft) {
2444  if (have_left && have_top) {
2445  (*a)[-1] = topleft[-1];
2446  } else {
2447  (*a)[-1] = have_top ? 129 : 127;
2448  }
2449  }
2450  if (tx == TX_4X4 && edges[mode].needs_topright) {
2451  if (have_top && have_right &&
2452  n_px_need + n_px_need_tr <= n_px_have) {
2453  memcpy(&(*a)[4], &top[4], 4);
2454  } else {
2455  memset(&(*a)[4], (*a)[3], 4);
2456  }
2457  }
2458  }
2459  }
2460  if (edges[mode].needs_left) {
2461  if (have_left) {
2462  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2463  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2464  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2465 
2466  if (edges[mode].invert_left) {
2467  if (n_px_need <= n_px_have) {
2468  for (i = 0; i < n_px_need; i++)
2469  l[i] = dst[i * stride - 1];
2470  } else {
2471  for (i = 0; i < n_px_have; i++)
2472  l[i] = dst[i * stride - 1];
2473  memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2474  }
2475  } else {
2476  if (n_px_need <= n_px_have) {
2477  for (i = 0; i < n_px_need; i++)
2478  l[n_px_need - 1 - i] = dst[i * stride - 1];
2479  } else {
2480  for (i = 0; i < n_px_have; i++)
2481  l[n_px_need - 1 - i] = dst[i * stride - 1];
2482  memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2483  }
2484  }
2485  } else {
2486  memset(l, 129, 4 << tx);
2487  }
2488  }
2489 
2490  return mode;
2491 }
2492 
2493 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2494 {
2495  VP9Context *s = ctx->priv_data;
2496  VP9Block *b = s->b;
2497  int row = s->row, col = s->col;
2498  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2499  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2500  int end_x = FFMIN(2 * (s->cols - col), w4);
2501  int end_y = FFMIN(2 * (s->rows - row), h4);
2502  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2503  int uvstep1d = 1 << b->uvtx, p;
2504  uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2505  LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2506  LOCAL_ALIGNED_32(uint8_t, l, [32]);
2507 
2508  for (n = 0, y = 0; y < end_y; y += step1d) {
2509  uint8_t *ptr = dst, *ptr_r = dst_r;
2510  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2511  ptr_r += 4 * step1d, n += step) {
2512  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2513  y * 2 + x : 0];
2514  uint8_t *a = &a_buf[32];
2515  enum TxfmType txtp = vp9_intra_txfm_type[mode];
2516  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2517 
2518  mode = check_intra_mode(s, mode, &a, ptr_r,
2519  s->frames[CUR_FRAME].tf.f->linesize[0],
2520  ptr, s->y_stride, l,
2521  col, x, w4, row, y, b->tx, 0);
2522  s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2523  if (eob)
2524  s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2525  s->block + 16 * n, eob);
2526  }
2527  dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2528  dst += 4 * step1d * s->y_stride;
2529  }
2530 
2531  // U/V
2532  w4 >>= 1;
2533  end_x >>= 1;
2534  end_y >>= 1;
2535  step = 1 << (b->uvtx * 2);
2536  for (p = 0; p < 2; p++) {
2537  dst = s->dst[1 + p];
2538  dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2539  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2540  uint8_t *ptr = dst, *ptr_r = dst_r;
2541  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2542  ptr_r += 4 * uvstep1d, n += step) {
2543  int mode = b->uvmode;
2544  uint8_t *a = &a_buf[32];
2545  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2546 
2547  mode = check_intra_mode(s, mode, &a, ptr_r,
2548  s->frames[CUR_FRAME].tf.f->linesize[1],
2549  ptr, s->uv_stride, l,
2550  col, x, w4, row, y, b->uvtx, p + 1);
2551  s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2552  if (eob)
2553  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2554  s->uvblock[p] + 16 * n, eob);
2555  }
2556  dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2557  dst += 4 * uvstep1d * s->uv_stride;
2558  }
2559  }
2560 }
2561 
2563  uint8_t *dst, ptrdiff_t dst_stride,
2564  const uint8_t *ref, ptrdiff_t ref_stride,
2566  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2567  int bw, int bh, int w, int h,
2568  const uint16_t *scale, const uint8_t *step)
2569 {
2570 #define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14)
2571  // BUG libvpx seems to scale the two components separately. This introduces
2572  // rounding errors but we have to reproduce them to be exactly compatible
2573  // with the output from libvpx...
2574  int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2575  int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2576  int refbw_m1, refbh_m1;
2577  int th;
2578 
2579  y = my >> 4;
2580  x = mx >> 4;
2581  ref += y * ref_stride + x;
2582  mx &= 15;
2583  my &= 15;
2584  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2585  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2586  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2587  // we use +7 because the last 7 pixels of each sbrow can be changed in
2588  // the longest loopfilter of the next sbrow
2589  th = (y + refbh_m1 + 4 + 7) >> 6;
2590  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2591  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2593  ref - 3 * ref_stride - 3,
2594  144, ref_stride,
2595  refbw_m1 + 8, refbh_m1 + 8,
2596  x - 3, y - 3, w, h);
2597  ref = s->edge_emu_buffer + 3 * 144 + 3;
2598  ref_stride = 144;
2599  }
2600  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2601 }
2602 
2604  uint8_t *dst_u, uint8_t *dst_v,
2605  ptrdiff_t dst_stride,
2606  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2607  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2609  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2610  int bw, int bh, int w, int h,
2611  const uint16_t *scale, const uint8_t *step)
2612 {
2613  // BUG https://code.google.com/p/webm/issues/detail?id=820
2614  int mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2615  int my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2616 #undef scale_mv
2617  int refbw_m1, refbh_m1;
2618  int th;
2619 
2620  y = my >> 4;
2621  x = mx >> 4;
2622  ref_u += y * src_stride_u + x;
2623  ref_v += y * src_stride_v + x;
2624  mx &= 15;
2625  my &= 15;
2626  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2627  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2628  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2629  // we use +7 because the last 7 pixels of each sbrow can be changed in
2630  // the longest loopfilter of the next sbrow
2631  th = (y + refbh_m1 + 4 + 7) >> 5;
2632  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2633  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2635  ref_u - 3 * src_stride_u - 3,
2636  144, src_stride_u,
2637  refbw_m1 + 8, refbh_m1 + 8,
2638  x - 3, y - 3, w, h);
2639  ref_u = s->edge_emu_buffer + 3 * 144 + 3;
2640  smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2641 
2643  ref_v - 3 * src_stride_v - 3,
2644  144, src_stride_v,
2645  refbw_m1 + 8, refbh_m1 + 8,
2646  x - 3, y - 3, w, h);
2647  ref_v = s->edge_emu_buffer + 3 * 144 + 3;
2648  smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2649  } else {
2650  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2651  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2652  }
2653 }
2654 
2655 #define FN(x) x##_scaled
2656 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2657  mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2658  mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2659 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2660  row, col, mv, bw, bh, w, h, i) \
2661  mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2662  row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2663 #include "vp9_mc_template.c"
2664 #undef mc_luma_dir
2665 #undef mc_chroma_dir
2666 #undef FN
2667 
2669  uint8_t *dst, ptrdiff_t dst_stride,
2670  const uint8_t *ref, ptrdiff_t ref_stride,
2672  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2673  int bw, int bh, int w, int h)
2674 {
2675  int mx = mv->x, my = mv->y, th;
2676 
2677  y += my >> 3;
2678  x += mx >> 3;
2679  ref += y * ref_stride + x;
2680  mx &= 7;
2681  my &= 7;
2682  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2683  // we use +7 because the last 7 pixels of each sbrow can be changed in
2684  // the longest loopfilter of the next sbrow
2685  th = (y + bh + 4 * !!my + 7) >> 6;
2686  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2687  if (x < !!mx * 3 || y < !!my * 3 ||
2688  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2690  ref - !!my * 3 * ref_stride - !!mx * 3,
2691  80, ref_stride,
2692  bw + !!mx * 7, bh + !!my * 7,
2693  x - !!mx * 3, y - !!my * 3, w, h);
2694  ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2695  ref_stride = 80;
2696  }
2697  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2698 }
2699 
2701  uint8_t *dst_u, uint8_t *dst_v,
2702  ptrdiff_t dst_stride,
2703  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2704  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2706  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2707  int bw, int bh, int w, int h)
2708 {
2709  int mx = mv->x, my = mv->y, th;
2710 
2711  y += my >> 4;
2712  x += mx >> 4;
2713  ref_u += y * src_stride_u + x;
2714  ref_v += y * src_stride_v + x;
2715  mx &= 15;
2716  my &= 15;
2717  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2718  // we use +7 because the last 7 pixels of each sbrow can be changed in
2719  // the longest loopfilter of the next sbrow
2720  th = (y + bh + 4 * !!my + 7) >> 5;
2721  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2722  if (x < !!mx * 3 || y < !!my * 3 ||
2723  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2725  ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2726  80, src_stride_u,
2727  bw + !!mx * 7, bh + !!my * 7,
2728  x - !!mx * 3, y - !!my * 3, w, h);
2729  ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2730  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2731 
2733  ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2734  80, src_stride_v,
2735  bw + !!mx * 7, bh + !!my * 7,
2736  x - !!mx * 3, y - !!my * 3, w, h);
2737  ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2738  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2739  } else {
2740  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2741  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2742  }
2743 }
2744 
2745 #define FN(x) x
2746 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2747  mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2748  mv, bw, bh, w, h)
2749 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2750  row, col, mv, bw, bh, w, h, i) \
2751  mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2752  row, col, mv, bw, bh, w, h)
2753 #include "vp9_mc_template.c"
2754 #undef mc_luma_dir_dir
2755 #undef mc_chroma_dir_dir
2756 #undef FN
2757 
2758 static void inter_recon(AVCodecContext *ctx)
2759 {
2760  VP9Context *s = ctx->priv_data;
2761  VP9Block *b = s->b;
2762  int row = s->row, col = s->col;
2763 
2764  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2765  inter_pred_scaled(ctx);
2766  } else {
2767  inter_pred(ctx);
2768  }
2769  if (!b->skip) {
2770  /* mostly copied intra_recon() */
2771 
2772  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2773  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2774  int end_x = FFMIN(2 * (s->cols - col), w4);
2775  int end_y = FFMIN(2 * (s->rows - row), h4);
2776  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2777  int uvstep1d = 1 << b->uvtx, p;
2778  uint8_t *dst = s->dst[0];
2779 
2780  // y itxfm add
2781  for (n = 0, y = 0; y < end_y; y += step1d) {
2782  uint8_t *ptr = dst;
2783  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2784  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2785 
2786  if (eob)
2787  s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2788  s->block + 16 * n, eob);
2789  }
2790  dst += 4 * s->y_stride * step1d;
2791  }
2792 
2793  // uv itxfm add
2794  end_x >>= 1;
2795  end_y >>= 1;
2796  step = 1 << (b->uvtx * 2);
2797  for (p = 0; p < 2; p++) {
2798  dst = s->dst[p + 1];
2799  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2800  uint8_t *ptr = dst;
2801  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2802  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2803 
2804  if (eob)
2805  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2806  s->uvblock[p] + 16 * n, eob);
2807  }
2808  dst += 4 * uvstep1d * s->uv_stride;
2809  }
2810  }
2811  }
2812 }
2813 
2814 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2815  int row_and_7, int col_and_7,
2816  int w, int h, int col_end, int row_end,
2817  enum TxfmMode tx, int skip_inter)
2818 {
2819  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2820  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2821  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2822  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2823 
2824  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2825  // edges. This means that for UV, we work on two subsampled blocks at
2826  // a time, and we only use the topleft block's mode information to set
2827  // things like block strength. Thus, for any block size smaller than
2828  // 16x16, ignore the odd portion of the block.
2829  if (tx == TX_4X4 && is_uv) {
2830  if (h == 1) {
2831  if (row_and_7 & 1)
2832  return;
2833  if (!row_end)
2834  h += 1;
2835  }
2836  if (w == 1) {
2837  if (col_and_7 & 1)
2838  return;
2839  if (!col_end)
2840  w += 1;
2841  }
2842  }
2843 
2844  if (tx == TX_4X4 && !skip_inter) {
2845  int t = 1 << col_and_7, m_col = (t << w) - t, y;
2846  int m_col_odd = (t << (w - 1)) - t;
2847 
2848  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2849  if (is_uv) {
2850  int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2851 
2852  for (y = row_and_7; y < h + row_and_7; y++) {
2853  int col_mask_id = 2 - !(y & 7);
2854 
2855  lflvl->mask[is_uv][0][y][1] |= m_row_8;
2856  lflvl->mask[is_uv][0][y][2] |= m_row_4;
2857  // for odd lines, if the odd col is not being filtered,
2858  // skip odd row also:
2859  // .---. <-- a
2860  // | |
2861  // |___| <-- b
2862  // ^ ^
2863  // c d
2864  //
2865  // if a/c are even row/col and b/d are odd, and d is skipped,
2866  // e.g. right edge of size-66x66.webm, then skip b also (bug)
2867  if ((col_end & 1) && (y & 1)) {
2868  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2869  } else {
2870  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2871  }
2872  }
2873  } else {
2874  int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2875 
2876  for (y = row_and_7; y < h + row_and_7; y++) {
2877  int col_mask_id = 2 - !(y & 3);
2878 
2879  lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2880  lflvl->mask[is_uv][0][y][2] |= m_row_4;
2881  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2882  lflvl->mask[is_uv][0][y][3] |= m_col;
2883  lflvl->mask[is_uv][1][y][3] |= m_col;
2884  }
2885  }
2886  } else {
2887  int y, t = 1 << col_and_7, m_col = (t << w) - t;
2888 
2889  if (!skip_inter) {
2890  int mask_id = (tx == TX_8X8);
2891  int l2 = tx + is_uv - 1, step1d = 1 << l2;
2892  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2893  int m_row = m_col & masks[l2];
2894 
2895  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2896  // 8wd loopfilter to prevent going off the visible edge.
2897  if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2898  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2899  int m_row_8 = m_row - m_row_16;
2900 
2901  for (y = row_and_7; y < h + row_and_7; y++) {
2902  lflvl->mask[is_uv][0][y][0] |= m_row_16;
2903  lflvl->mask[is_uv][0][y][1] |= m_row_8;
2904  }
2905  } else {
2906  for (y = row_and_7; y < h + row_and_7; y++)
2907  lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2908  }
2909 
2910  if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2911  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2912  lflvl->mask[is_uv][1][y][0] |= m_col;
2913  if (y - row_and_7 == h - 1)
2914  lflvl->mask[is_uv][1][y][1] |= m_col;
2915  } else {
2916  for (y = row_and_7; y < h + row_and_7; y += step1d)
2917  lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2918  }
2919  } else if (tx != TX_4X4) {
2920  int mask_id;
2921 
2922  mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2923  lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2924  mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2925  for (y = row_and_7; y < h + row_and_7; y++)
2926  lflvl->mask[is_uv][0][y][mask_id] |= t;
2927  } else if (is_uv) {
2928  int t8 = t & 0x01, t4 = t - t8;
2929 
2930  for (y = row_and_7; y < h + row_and_7; y++) {
2931  lflvl->mask[is_uv][0][y][2] |= t4;
2932  lflvl->mask[is_uv][0][y][1] |= t8;
2933  }
2934  lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2935  } else {
2936  int t8 = t & 0x11, t4 = t - t8;
2937 
2938  for (y = row_and_7; y < h + row_and_7; y++) {
2939  lflvl->mask[is_uv][0][y][2] |= t4;
2940  lflvl->mask[is_uv][0][y][1] |= t8;
2941  }
2942  lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2943  }
2944  }
2945 }
2946 
2947 static void decode_b(AVCodecContext *ctx, int row, int col,
2948  struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2949  enum BlockLevel bl, enum BlockPartition bp)
2950 {
2951  VP9Context *s = ctx->priv_data;
2952  VP9Block *b = s->b;
2953  enum BlockSize bs = bl * 3 + bp;
2954  int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2955  int emu[2];
2956  AVFrame *f = s->frames[CUR_FRAME].tf.f;
2957 
2958  s->row = row;
2959  s->row7 = row & 7;
2960  s->col = col;
2961  s->col7 = col & 7;
2962  s->min_mv.x = -(128 + col * 64);
2963  s->min_mv.y = -(128 + row * 64);
2964  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2965  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2966  if (s->pass < 2) {
2967  b->bs = bs;
2968  b->bl = bl;
2969  b->bp = bp;
2970  decode_mode(ctx);
2971  b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2972 
2973  if (!b->skip) {
2974  decode_coeffs(ctx);
2975  } else {
2976  int row7 = s->row7;
2977 
2978 #define SPLAT_ZERO_CTX(v, n) \
2979  switch (n) { \
2980  case 1: v = 0; break; \
2981  case 2: AV_ZERO16(&v); break; \
2982  case 4: AV_ZERO32(&v); break; \
2983  case 8: AV_ZERO64(&v); break; \
2984  case 16: AV_ZERO128(&v); break; \
2985  }
2986 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2987  do { \
2988  SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2989  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2990  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2991  } while (0)
2992 
2993  switch (w4) {
2994  case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2995  case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2996  case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2997  case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2998  }
2999  switch (h4) {
3000  case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
3001  case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
3002  case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
3003  case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
3004  }
3005  }
3006  if (s->pass == 1) {
3007  s->b++;
3008  s->block += w4 * h4 * 64;
3009  s->uvblock[0] += w4 * h4 * 16;
3010  s->uvblock[1] += w4 * h4 * 16;
3011  s->eob += 4 * w4 * h4;
3012  s->uveob[0] += w4 * h4;
3013  s->uveob[1] += w4 * h4;
3014 
3015  return;
3016  }
3017  }
3018 
3019  // emulated overhangs if the stride of the target buffer can't hold. This
3020  // allows to support emu-edge and so on even if we have large block
3021  // overhangs
3022  emu[0] = (col + w4) * 8 > f->linesize[0] ||
3023  (row + h4) > s->rows;
3024  emu[1] = (col + w4) * 4 > f->linesize[1] ||
3025  (row + h4) > s->rows;
3026  if (emu[0]) {
3027  s->dst[0] = s->tmp_y;
3028  s->y_stride = 64;
3029  } else {
3030  s->dst[0] = f->data[0] + yoff;
3031  s->y_stride = f->linesize[0];
3032  }
3033  if (emu[1]) {
3034  s->dst[1] = s->tmp_uv[0];
3035  s->dst[2] = s->tmp_uv[1];
3036  s->uv_stride = 32;
3037  } else {
3038  s->dst[1] = f->data[1] + uvoff;
3039  s->dst[2] = f->data[2] + uvoff;
3040  s->uv_stride = f->linesize[1];
3041  }
3042  if (b->intra) {
3043  intra_recon(ctx, yoff, uvoff);
3044  } else {
3045  inter_recon(ctx);
3046  }
3047  if (emu[0]) {
3048  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3049 
3050  for (n = 0; o < w; n++) {
3051  int bw = 64 >> n;
3052 
3053  av_assert2(n <= 4);
3054  if (w & bw) {
3055  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3056  s->tmp_y + o, 64, h, 0, 0);
3057  o += bw;
3058  }
3059  }
3060  }
3061  if (emu[1]) {
3062  int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3063 
3064  for (n = 1; o < w; n++) {
3065  int bw = 64 >> n;
3066 
3067  av_assert2(n <= 4);
3068  if (w & bw) {
3069  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3070  s->tmp_uv[0] + o, 32, h, 0, 0);
3071  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3072  s->tmp_uv[1] + o, 32, h, 0, 0);
3073  o += bw;
3074  }
3075  }
3076  }
3077 
3078  // pick filter level and find edges to apply filter to
3079  if (s->filter.level &&
3080  (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3081  [b->mode[3] != ZEROMV]) > 0) {
3082  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3083  int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3084 
3085  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3086  mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3087  mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3088  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3089  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3090  b->uvtx, skip_inter);
3091 
3092  if (!s->filter.lim_lut[lvl]) {
3093  int sharp = s->filter.sharpness;
3094  int limit = lvl;
3095 
3096  if (sharp > 0) {
3097  limit >>= (sharp + 3) >> 2;
3098  limit = FFMIN(limit, 9 - sharp);
3099  }
3100  limit = FFMAX(limit, 1);
3101 
3102  s->filter.lim_lut[lvl] = limit;
3103  s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3104  }
3105  }
3106 
3107  if (s->pass == 2) {
3108  s->b++;
3109  s->block += w4 * h4 * 64;
3110  s->uvblock[0] += w4 * h4 * 16;
3111  s->uvblock[1] += w4 * h4 * 16;
3112  s->eob += 4 * w4 * h4;
3113  s->uveob[0] += w4 * h4;
3114  s->uveob[1] += w4 * h4;
3115  }
3116 }
3117 
3118 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3119  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3120 {
3121  VP9Context *s = ctx->priv_data;
3122  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3123  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3124  const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3125  s->prob.p.partition[bl][c];
3126  enum BlockPartition bp;
3127  ptrdiff_t hbs = 4 >> bl;
3128  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3129  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3130 
3131  if (bl == BL_8X8) {
3132  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3133  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3134  } else if (col + hbs < s->cols) { // FIXME why not <=?
3135  if (row + hbs < s->rows) { // FIXME why not <=?
3136  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3137  switch (bp) {
3138  case PARTITION_NONE:
3139  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3140  break;
3141  case PARTITION_H:
3142  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3143  yoff += hbs * 8 * y_stride;
3144  uvoff += hbs * 4 * uv_stride;
3145  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3146  break;
3147  case PARTITION_V:
3148  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3149  yoff += hbs * 8;
3150  uvoff += hbs * 4;
3151  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3152  break;
3153  case PARTITION_SPLIT:
3154  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3155  decode_sb(ctx, row, col + hbs, lflvl,
3156  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3157  yoff += hbs * 8 * y_stride;
3158  uvoff += hbs * 4 * uv_stride;
3159  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3160  decode_sb(ctx, row + hbs, col + hbs, lflvl,
3161  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3162  break;
3163  default:
3164  av_assert0(0);
3165  }
3166  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3167  bp = PARTITION_SPLIT;
3168  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3169  decode_sb(ctx, row, col + hbs, lflvl,
3170  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3171  } else {
3172  bp = PARTITION_H;
3173  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3174  }
3175  } else if (row + hbs < s->rows) { // FIXME why not <=?
3176  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3177  bp = PARTITION_SPLIT;
3178  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3179  yoff += hbs * 8 * y_stride;
3180  uvoff += hbs * 4 * uv_stride;
3181  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3182  } else {
3183  bp = PARTITION_V;
3184  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3185  }
3186  } else {
3187  bp = PARTITION_SPLIT;
3188  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3189  }
3190  s->counts.partition[bl][c][bp]++;
3191 }
3192 
3193 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3194  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3195 {
3196  VP9Context *s = ctx->priv_data;
3197  VP9Block *b = s->b;
3198  ptrdiff_t hbs = 4 >> bl;
3199  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3200  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3201 
3202  if (bl == BL_8X8) {
3203  av_assert2(b->bl == BL_8X8);
3204  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3205  } else if (s->b->bl == bl) {
3206  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3207  if (b->bp == PARTITION_H && row + hbs < s->rows) {
3208  yoff += hbs * 8 * y_stride;
3209  uvoff += hbs * 4 * uv_stride;
3210  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3211  } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3212  yoff += hbs * 8;
3213  uvoff += hbs * 4;
3214  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3215  }
3216  } else {
3217  decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3218  if (col + hbs < s->cols) { // FIXME why not <=?
3219  if (row + hbs < s->rows) {
3220  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3221  uvoff + 4 * hbs, bl + 1);
3222  yoff += hbs * 8 * y_stride;
3223  uvoff += hbs * 4 * uv_stride;
3224  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3225  decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3226  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3227  } else {
3228  yoff += hbs * 8;
3229  uvoff += hbs * 4;
3230  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3231  }
3232  } else if (row + hbs < s->rows) {
3233  yoff += hbs * 8 * y_stride;
3234  uvoff += hbs * 4 * uv_stride;
3235  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3236  }
3237  }
3238 }
3239 
3240 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3241  int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3242 {
3243  VP9Context *s = ctx->priv_data;
3244  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3245  uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3246  ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3247  int y, x, p;
3248 
3249  // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3250  // if you think of them as acting on a 8x8 block max, we can interleave
3251  // each v/h within the single x loop, but that only works if we work on
3252  // 8 pixel blocks, and we won't always do that (we want at least 16px
3253  // to use SSE2 optimizations, perhaps 32 for AVX2)
3254 
3255  // filter edges between columns, Y plane (e.g. block1 | block2)
3256  for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3257  uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3258  uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3259  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3260  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3261  unsigned hm = hm1 | hm2 | hm13 | hm23;
3262 
3263  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3264  if (hm1 & x) {
3265  int L = *l, H = L >> 4;
3266  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3267 
3268  if (col || x > 1) {
3269  if (hmask1[0] & x) {
3270  if (hmask2[0] & x) {
3271  av_assert2(l[8] == L);
3272  s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3273  } else {
3274  s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3275  }
3276  } else if (hm2 & x) {
3277  L = l[8];
3278  H |= (L >> 4) << 8;
3279  E |= s->filter.mblim_lut[L] << 8;
3280  I |= s->filter.lim_lut[L] << 8;
3281  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3282  [!!(hmask2[1] & x)]
3283  [0](ptr, ls_y, E, I, H);
3284  } else {
3285  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3286  [0](ptr, ls_y, E, I, H);
3287  }
3288  }
3289  } else if (hm2 & x) {
3290  int L = l[8], H = L >> 4;
3291  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3292 
3293  if (col || x > 1) {
3294  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3295  [0](ptr + 8 * ls_y, ls_y, E, I, H);
3296  }
3297  }
3298  if (hm13 & x) {
3299  int L = *l, H = L >> 4;
3300  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3301 
3302  if (hm23 & x) {
3303  L = l[8];
3304  H |= (L >> 4) << 8;
3305  E |= s->filter.mblim_lut[L] << 8;
3306  I |= s->filter.lim_lut[L] << 8;
3307  s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3308  } else {
3309  s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3310  }
3311  } else if (hm23 & x) {
3312  int L = l[8], H = L >> 4;
3313  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3314 
3315  s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3316  }
3317  }
3318  }
3319 
3320  // block1
3321  // filter edges between rows, Y plane (e.g. ------)
3322  // block2
3323  dst = f->data[0] + yoff;
3324  lvl = lflvl->level;
3325  for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3326  uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3327  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3328 
3329  for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3330  if (row || y) {
3331  if (vm & x) {
3332  int L = *l, H = L >> 4;
3333  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3334 
3335  if (vmask[0] & x) {
3336  if (vmask[0] & (x << 1)) {
3337  av_assert2(l[1] == L);
3338  s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3339  } else {
3340  s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3341  }
3342  } else if (vm & (x << 1)) {
3343  L = l[1];
3344  H |= (L >> 4) << 8;
3345  E |= s->filter.mblim_lut[L] << 8;
3346  I |= s->filter.lim_lut[L] << 8;
3347  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3348  [!!(vmask[1] & (x << 1))]
3349  [1](ptr, ls_y, E, I, H);
3350  } else {
3351  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3352  [1](ptr, ls_y, E, I, H);
3353  }
3354  } else if (vm & (x << 1)) {
3355  int L = l[1], H = L >> 4;
3356  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3357 
3358  s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3359  [1](ptr + 8, ls_y, E, I, H);
3360  }
3361  }
3362  if (vm3 & x) {
3363  int L = *l, H = L >> 4;
3364  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3365 
3366  if (vm3 & (x << 1)) {
3367  L = l[1];
3368  H |= (L >> 4) << 8;
3369  E |= s->filter.mblim_lut[L] << 8;
3370  I |= s->filter.lim_lut[L] << 8;
3371  s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3372  } else {
3373  s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3374  }
3375  } else if (vm3 & (x << 1)) {
3376  int L = l[1], H = L >> 4;
3377  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3378 
3379  s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3380  }
3381  }
3382  }
3383 
3384  // same principle but for U/V planes
3385  for (p = 0; p < 2; p++) {
3386  lvl = lflvl->level;
3387  dst = f->data[1 + p] + uvoff;
3388  for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3389  uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3390  uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3391  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3392  unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3393 
3394  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3395  if (col || x > 1) {
3396  if (hm1 & x) {
3397  int L = *l, H = L >> 4;
3398  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3399 
3400  if (hmask1[0] & x) {
3401  if (hmask2[0] & x) {
3402  av_assert2(l[16] == L);
3403  s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3404  } else {
3405  s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3406  }
3407  } else if (hm2 & x) {
3408  L = l[16];
3409  H |= (L >> 4) << 8;
3410  E |= s->filter.mblim_lut[L] << 8;
3411  I |= s->filter.lim_lut[L] << 8;
3412  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3413  [!!(hmask2[1] & x)]
3414  [0](ptr, ls_uv, E, I, H);
3415  } else {
3416  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3417  [0](ptr, ls_uv, E, I, H);
3418  }
3419  } else if (hm2 & x) {
3420  int L = l[16], H = L >> 4;
3421  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3422 
3423  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3424  [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3425  }
3426  }
3427  if (x & 0xAA)
3428  l += 2;
3429  }
3430  }
3431  lvl = lflvl->level;
3432  dst = f->data[1 + p] + uvoff;
3433  for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3434  uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3435  unsigned vm = vmask[0] | vmask[1] | vmask[2];
3436 
3437  for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3438  if (row || y) {
3439  if (vm & x) {
3440  int L = *l, H = L >> 4;
3441  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3442 
3443  if (vmask[0] & x) {
3444  if (vmask[0] & (x << 2)) {
3445  av_assert2(l[2] == L);
3446  s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3447  } else {
3448  s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3449  }
3450  } else if (vm & (x << 2)) {
3451  L = l[2];
3452  H |= (L >> 4) << 8;
3453  E |= s->filter.mblim_lut[L] << 8;
3454  I |= s->filter.lim_lut[L] << 8;
3455  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3456  [!!(vmask[1] & (x << 2))]
3457  [1](ptr, ls_uv, E, I, H);
3458  } else {
3459  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3460  [1](ptr, ls_uv, E, I, H);
3461  }
3462  } else if (vm & (x << 2)) {
3463  int L = l[2], H = L >> 4;
3464  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3465 
3466  s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3467  [1](ptr + 8, ls_uv, E, I, H);
3468  }
3469  }
3470  }
3471  if (y & 1)
3472  lvl += 16;
3473  }
3474  }
3475 }
3476 
3477 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3478 {
3479  int sb_start = ( idx * n) >> log2_n;
3480  int sb_end = ((idx + 1) * n) >> log2_n;
3481  *start = FFMIN(sb_start, n) << 3;
3482  *end = FFMIN(sb_end, n) << 3;
3483 }
3484 
3485 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3486  int max_count, int update_factor)
3487 {
3488  unsigned ct = ct0 + ct1, p2, p1;
3489 
3490  if (!ct)
3491  return;
3492 
3493  p1 = *p;
3494  p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3495  p2 = av_clip(p2, 1, 255);
3496  ct = FFMIN(ct, max_count);
3497  update_factor = FASTDIV(update_factor * ct, max_count);
3498 
3499  // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3500  *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3501 }
3502 
3503 static void adapt_probs(VP9Context *s)
3504 {
3505  int i, j, k, l, m;
3506  prob_context *p = &s->prob_ctx[s->framectxid].p;
3507  int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3508 
3509  // coefficients
3510  for (i = 0; i < 4; i++)
3511  for (j = 0; j < 2; j++)
3512  for (k = 0; k < 2; k++)
3513  for (l = 0; l < 6; l++)
3514  for (m = 0; m < 6; m++) {
3515  uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3516  unsigned *e = s->counts.eob[i][j][k][l][m];
3517  unsigned *c = s->counts.coef[i][j][k][l][m];
3518 
3519  if (l == 0 && m >= 3) // dc only has 3 pt
3520  break;
3521 
3522  adapt_prob(&pp[0], e[0], e[1], 24, uf);
3523  adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3524  adapt_prob(&pp[2], c[1], c[2], 24, uf);
3525  }
3526 
3527  if (s->keyframe || s->intraonly) {
3528  memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3529  memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3530  memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3531  memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3532  return;
3533  }
3534 
3535  // skip flag
3536  for (i = 0; i < 3; i++)
3537  adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3538 
3539  // intra/inter flag
3540  for (i = 0; i < 4; i++)
3541  adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3542 
3543  // comppred flag
3544  if (s->comppredmode == PRED_SWITCHABLE) {
3545  for (i = 0; i < 5; i++)
3546  adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3547  }
3548 
3549  // reference frames
3550  if (s->comppredmode != PRED_SINGLEREF) {
3551  for (i = 0; i < 5; i++)
3552  adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3553  s->counts.comp_ref[i][1], 20, 128);
3554  }
3555 
3556  if (s->comppredmode != PRED_COMPREF) {
3557  for (i = 0; i < 5; i++) {
3558  uint8_t *pp = p->single_ref[i];
3559  unsigned (*c)[2] = s->counts.single_ref[i];
3560 
3561  adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3562  adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3563  }
3564  }
3565 
3566  // block partitioning
3567  for (i = 0; i < 4; i++)
3568  for (j = 0; j < 4; j++) {
3569  uint8_t *pp = p->partition[i][j];
3570  unsigned *c = s->counts.partition[i][j];
3571 
3572  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3573  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3574  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3575  }
3576 
3577  // tx size
3578  if (s->txfmmode == TX_SWITCHABLE) {
3579  for (i = 0; i < 2; i++) {
3580  unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3581 
3582  adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3583  adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3584  adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3585  adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3586  adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3587  adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3588  }
3589  }
3590 
3591  // interpolation filter
3592  if (s->filtermode == FILTER_SWITCHABLE) {
3593  for (i = 0; i < 4; i++) {
3594  uint8_t *pp = p->filter[i];
3595  unsigned *c = s->counts.filter[i];
3596 
3597  adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3598  adapt_prob(&pp[1], c[1], c[2], 20, 128);
3599  }
3600  }
3601 
3602  // inter modes
3603  for (i = 0; i < 7; i++) {
3604  uint8_t *pp = p->mv_mode[i];
3605  unsigned *c = s->counts.mv_mode[i];
3606 
3607  adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3608  adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3609  adapt_prob(&pp[2], c[1], c[3], 20, 128);
3610  }
3611 
3612  // mv joints
3613  {
3614  uint8_t *pp = p->mv_joint;
3615  unsigned *c = s->counts.mv_joint;
3616 
3617  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3618  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3619  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3620  }
3621 
3622  // mv components
3623  for (i = 0; i < 2; i++) {
3624  uint8_t *pp;
3625  unsigned *c, (*c2)[2], sum;
3626 
3627  adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3628  s->counts.mv_comp[i].sign[1], 20, 128);
3629 
3630  pp = p->mv_comp[i].classes;
3631  c = s->counts.mv_comp[i].classes;
3632  sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3633  adapt_prob(&pp[0], c[0], sum, 20, 128);
3634  sum -= c[1];
3635  adapt_prob(&pp[1], c[1], sum, 20, 128);
3636  sum -= c[2] + c[3];
3637  adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3638  adapt_prob(&pp[3], c[2], c[3], 20, 128);
3639  sum -= c[4] + c[5];
3640  adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3641  adapt_prob(&pp[5], c[4], c[5], 20, 128);
3642  sum -= c[6];
3643  adapt_prob(&pp[6], c[6], sum, 20, 128);
3644  adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3645  adapt_prob(&pp[8], c[7], c[8], 20, 128);
3646  adapt_prob(&pp[9], c[9], c[10], 20, 128);
3647 
3648  adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3649  s->counts.mv_comp[i].class0[1], 20, 128);
3650  pp = p->mv_comp[i].bits;
3651  c2 = s->counts.mv_comp[i].bits;
3652  for (j = 0; j < 10; j++)
3653  adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3654 
3655  for (j = 0; j < 2; j++) {
3656  pp = p->mv_comp[i].class0_fp[j];
3657  c = s->counts.mv_comp[i].class0_fp[j];
3658  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3659  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3660  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3661  }
3662  pp = p->mv_comp[i].fp;
3663  c = s->counts.mv_comp[i].fp;
3664  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3665  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3666  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3667 
3668  if (s->highprecisionmvs) {
3669  adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3670  s->counts.mv_comp[i].class0_hp[1], 20, 128);
3671  adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3672  s->counts.mv_comp[i].hp[1], 20, 128);
3673  }
3674  }
3675 
3676  // y intra modes
3677  for (i = 0; i < 4; i++) {
3678  uint8_t *pp = p->y_mode[i];
3679  unsigned *c = s->counts.y_mode[i], sum, s2;
3680 
3681  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3682  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3683  sum -= c[TM_VP8_PRED];
3684  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3685  sum -= c[VERT_PRED];
3686  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3687  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3688  sum -= s2;
3689  adapt_prob(&pp[3], s2, sum, 20, 128);
3690  s2 -= c[HOR_PRED];
3691  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3692  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3693  sum -= c[DIAG_DOWN_LEFT_PRED];
3694  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3695  sum -= c[VERT_LEFT_PRED];
3696  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3697  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3698  }
3699 
3700  // uv intra modes
3701  for (i = 0; i < 10; i++) {
3702  uint8_t *pp = p->uv_mode[i];
3703  unsigned *c = s->counts.uv_mode[i], sum, s2;
3704 
3705  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3706  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3707  sum -= c[TM_VP8_PRED];
3708  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3709  sum -= c[VERT_PRED];
3710  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3711  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3712  sum -= s2;
3713  adapt_prob(&pp[3], s2, sum, 20, 128);
3714  s2 -= c[HOR_PRED];
3715  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3716  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3717  sum -= c[DIAG_DOWN_LEFT_PRED];
3718  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3719  sum -= c[VERT_LEFT_PRED];
3720  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3721  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3722  }
3723 }
3724 
3725 static void free_buffers(VP9Context *s)
3726 {
3727  av_freep(&s->intra_pred_data[0]);
3728  av_freep(&s->b_base);
3729  av_freep(&s->block_base);
3730 }
3731 
3733 {
3734  VP9Context *s = ctx->priv_data;
3735  int i;
3736 
3737  for (i = 0; i < 2; i++) {
3738  if (s->frames[i].tf.f->data[0])
3739  vp9_unref_frame(ctx, &s->frames[i]);
3740  av_frame_free(&s->frames[i].tf.f);
3741  }
3742  for (i = 0; i < 8; i++) {
3743  if (s->refs[i].f->data[0])
3744  ff_thread_release_buffer(ctx, &s->refs[i]);
3745  av_frame_free(&s->refs[i].f);
3746  if (s->next_refs[i].f->data[0])
3747  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3748  av_frame_free(&s->next_refs[i].f);
3749  }
3750  free_buffers(s);
3751  av_freep(&s->c_b);
3752  s->c_b_size = 0;
3753 
3754  return 0;
3755 }
3756 
3757 
3758 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3759  int *got_frame, AVPacket *pkt)
3760 {
3761  const uint8_t *data = pkt->data;
3762  int size = pkt->size;
3763  VP9Context *s = ctx->priv_data;
3764  int res, tile_row, tile_col, i, ref, row, col;
3765  ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3766  AVFrame *f;
3767 
3768  if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3769  return res;
3770  } else if (res == 0) {
3771  if (!s->refs[ref].f->data[0]) {
3772  av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3773  return AVERROR_INVALIDDATA;
3774  }
3775  if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3776  return res;
3777  *got_frame = 1;
3778  return pkt->size;
3779  }
3780  data += res;
3781  size -= res;
3782 
3783  if (s->frames[LAST_FRAME].tf.f->data[0])
3784  vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3785  if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3786  (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3787  return res;
3788  if (s->frames[CUR_FRAME].tf.f->data[0])
3789  vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3790  if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3791  return res;
3792  f = s->frames[CUR_FRAME].tf.f;
3793  f->key_frame = s->keyframe;
3795  ls_y = f->linesize[0];
3796  ls_uv =f->linesize[1];
3797 
3798  // ref frame setup
3799  for (i = 0; i < 8; i++) {
3800  if (s->next_refs[i].f->data[0])
3801  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3802  if (s->refreshrefmask & (1 << i)) {
3803  res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3804  } else {
3805  res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3806  }
3807  if (res < 0)
3808  return res;
3809  }
3810 
3811  if (s->fullrange)
3813  else
3815 
3816  switch (s->colorspace) {
3817  case 1: ctx->colorspace = AVCOL_SPC_BT470BG; break;
3818  case 2: ctx->colorspace = AVCOL_SPC_BT709; break;
3819  case 3: ctx->colorspace = AVCOL_SPC_SMPTE170M; break;
3820  case 4: ctx->colorspace = AVCOL_SPC_SMPTE240M; break;
3821  }
3822 
3823  // main tile decode loop
3824  memset(s->above_partition_ctx, 0, s->cols);
3825  memset(s->above_skip_ctx, 0, s->cols);
3826  if (s->keyframe || s->intraonly) {
3827  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3828  } else {
3829  memset(s->above_mode_ctx, NEARESTMV, s->cols);
3830  }
3831  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3832  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3833  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3834  memset(s->above_segpred_ctx, 0, s->cols);
3835  s->pass = s->uses_2pass =
3837  if ((res = update_block_buffers(ctx)) < 0) {
3838  av_log(ctx, AV_LOG_ERROR,
3839  "Failed to allocate block buffers\n");
3840  return res;
3841  }
3842  if (s->refreshctx && s->parallelmode) {
3843  int j, k, l, m;
3844 
3845  for (i = 0; i < 4; i++) {
3846  for (j = 0; j < 2; j++)
3847  for (k = 0; k < 2; k++)
3848  for (l = 0; l < 6; l++)
3849  for (m = 0; m < 6; m++)
3850  memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3851  s->prob.coef[i][j][k][l][m], 3);
3852  if (s->txfmmode == i)
3853  break;
3854  }
3855  s->prob_ctx[s->framectxid].p = s->prob.p;
3857  } else if (!s->refreshctx) {
3859  }
3860 
3861  do {
3862  yoff = uvoff = 0;
3863  s->b = s->b_base;
3864  s->block = s->block_base;
3865  s->uvblock[0] = s->uvblock_base[0];
3866  s->uvblock[1] = s->uvblock_base[1];
3867  s->eob = s->eob_base;
3868  s->uveob[0] = s->uveob_base[0];
3869  s->uveob[1] = s->uveob_base[1];
3870 
3871  for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3873  tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3874  if (s->pass != 2) {
3875  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3876  int64_t tile_size;
3877 
3878  if (tile_col == s->tiling.tile_cols - 1 &&
3879  tile_row == s->tiling.tile_rows - 1) {
3880  tile_size = size;
3881  } else {
3882  tile_size = AV_RB32(data);
3883  data += 4;
3884  size -= 4;
3885  }
3886  if (tile_size > size) {
3887  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3888  return AVERROR_INVALIDDATA;
3889  }
3890  ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3891  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3892  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3893  return AVERROR_INVALIDDATA;
3894  }
3895  data += tile_size;
3896  size -= tile_size;
3897  }
3898  }
3899 
3900  for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3901  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3902  struct VP9Filter *lflvl_ptr = s->lflvl;
3903  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3904 
3905  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3907  tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3908 
3909  if (s->pass != 2) {
3910  memset(s->left_partition_ctx, 0, 8);
3911  memset(s->left_skip_ctx, 0, 8);
3912  if (s->keyframe || s->intraonly) {
3913  memset(s->left_mode_ctx, DC_PRED, 16);
3914  } else {
3915  memset(s->left_mode_ctx, NEARESTMV, 8);
3916  }
3917  memset(s->left_y_nnz_ctx, 0, 16);
3918  memset(s->left_uv_nnz_ctx, 0, 16);
3919  memset(s->left_segpred_ctx, 0, 8);
3920 
3921  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3922  }
3923 
3924  for (col = s->tiling.tile_col_start;
3925  col < s->tiling.tile_col_end;
3926  col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3927  // FIXME integrate with lf code (i.e. zero after each
3928  // use, similar to invtxfm coefficients, or similar)
3929  if (s->pass != 1) {
3930  memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3931  }
3932 
3933  if (s->pass == 2) {
3934  decode_sb_mem(ctx, row, col, lflvl_ptr,
3935  yoff2, uvoff2, BL_64X64);
3936  } else {
3937  decode_sb(ctx, row, col, lflvl_ptr,
3938  yoff2, uvoff2, BL_64X64);
3939  }
3940  }
3941  if (s->pass != 2) {
3942  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3943  }
3944  }
3945 
3946  if (s->pass == 1) {
3947  continue;
3948  }
3949 
3950  // backup pre-loopfilter reconstruction data for intra
3951  // prediction of next row of sb64s
3952  if (row + 8 < s->rows) {
3953  memcpy(s->intra_pred_data[0],
3954  f->data[0] + yoff + 63 * ls_y,
3955  8 * s->cols);
3956  memcpy(s->intra_pred_data[1],
3957  f->data[1] + uvoff + 31 * ls_uv,
3958  4 * s->cols);
3959  memcpy(s->intra_pred_data[2],
3960  f->data[2] + uvoff + 31 * ls_uv,
3961  4 * s->cols);
3962  }
3963 
3964  // loopfilter one row
3965  if (s->filter.level) {
3966  yoff2 = yoff;
3967  uvoff2 = uvoff;
3968  lflvl_ptr = s->lflvl;
3969  for (col = 0; col < s->cols;
3970  col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3971  loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3972  }
3973  }
3974 
3975  // FIXME maybe we can make this more finegrained by running the
3976  // loopfilter per-block instead of after each sbrow
3977  // In fact that would also make intra pred left preparation easier?
3978  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3979  }
3980  }
3981 
3982  if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3983  adapt_probs(s);
3985  }
3986  } while (s->pass++ == 1);
3987  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3988 
3989  // ref frame setup
3990  for (i = 0; i < 8; i++) {
3991  if (s->refs[i].f->data[0])
3992  ff_thread_release_buffer(ctx, &s->refs[i]);
3993  ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3994  }
3995 
3996  if (!s->invisible) {
3997  if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3998  return res;
3999  *got_frame = 1;
4000  }
4001 
4002  return pkt->size;
4003 }
4004 
4006 {
4007  VP9Context *s = ctx->priv_data;
4008  int i;
4009 
4010  for (i = 0; i < 2; i++)
4011  vp9_unref_frame(ctx, &s->frames[i]);
4012  for (i = 0; i < 8; i++)
4013  ff_thread_release_buffer(ctx, &s->refs[i]);
4014 }
4015 
4016 static int init_frames(AVCodecContext *ctx)
4017 {
4018  VP9Context *s = ctx->priv_data;
4019  int i;
4020 
4021  for (i = 0; i < 2; i++) {
4022  s->frames[i].tf.f = av_frame_alloc();
4023  if (!s->frames[i].tf.f) {
4024  vp9_decode_free(ctx);
4025  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4026  return AVERROR(ENOMEM);
4027  }
4028  }
4029  for (i = 0; i < 8; i++) {
4030  s->refs[i].f = av_frame_alloc();
4031  s->next_refs[i].f = av_frame_alloc();
4032  if (!s->refs[i].f || !s->next_refs[i].f) {
4033  vp9_decode_free(ctx);
4034  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4035  return AVERROR(ENOMEM);
4036  }
4037  }
4038 
4039  return 0;
4040 }
4041 
4043 {
4044  VP9Context *s = ctx->priv_data;
4045 
4046  ctx->internal->allocate_progress = 1;
4047  ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4048  ff_vp9dsp_init(&s->dsp);
4049  ff_videodsp_init(&s->vdsp, 8);
4050  s->filter.sharpness = -1;
4051 
4052  return init_frames(ctx);
4053 }
4054 
4056 {
4057  return init_frames(avctx);
4058 }
4059 
4061 {
4062  int i, res;
4063  VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4064 
4065  // detect size changes in other threads
4066  if (s->intra_pred_data[0] &&
4067  (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4068  free_buffers(s);
4069  }
4070 
4071  for (i = 0; i < 2; i++) {
4072  if (s->frames[i].tf.f->data[0])
4073  vp9_unref_frame(dst, &s->frames[i]);
4074  if (ssrc->frames[i].tf.f->data[0]) {
4075  if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4076  return res;
4077  }
4078  }
4079  for (i = 0; i < 8; i++) {
4080  if (s->refs[i].f->data[0])
4081  ff_thread_release_buffer(dst, &s->refs[i]);
4082  if (ssrc->next_refs[i].f->data[0]) {
4083  if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4084  return res;
4085  }
4086  }
4087 
4088  s->invisible = ssrc->invisible;
4089  s->keyframe = ssrc->keyframe;
4090  s->uses_2pass = ssrc->uses_2pass;
4091  memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4092  memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4093  if (ssrc->segmentation.enabled) {
4094  memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4095  sizeof(s->segmentation.feat));
4096  }
4097 
4098  return 0;
4099 }
4100 
4102  .name = "vp9",
4103  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4104  .type = AVMEDIA_TYPE_VIDEO,
4105  .id = AV_CODEC_ID_VP9,
4106  .priv_data_size = sizeof(VP9Context),
4107  .init = vp9_decode_init,
4108  .close = vp9_decode_free,
4110  .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4114 };
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
Definition: pixfmt.h:494
ThreadFrame tf
Definition: vp9.c:73
BlockPartition
Definition: vp9data.h:29
CompPredMode
Definition: vp9.c:37
uint8_t skip[3]
Definition: vp9data.h:1319
uint8_t resetctx
Definition: vp9.c:117
const char const char void * val
Definition: avisynth_c.h:672
Definition: vp9.c:53
unsigned hp[2]
Definition: vp9.c:204
Definition: vp9.h:47
float v
const char * s
Definition: avisynth_c.h:669
uint8_t lossless
Definition: vp9.c:147
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
uint8_t * segmentation_map
Definition: vp9.c:75
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it...
Definition: buffer.c:124
This structure describes decoded (raw) audio or video data.
Definition: frame.h:163
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
Definition: vp9.c:3477
static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9.c:2493
unsigned comp_ref[5][2]
Definition: vp9.c:190
static int update_size(AVCodecContext *ctx, int w, int h)
Definition: vp9.c:315
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
Definition: vp9.c:52
uint8_t mblim_lut[64]
Definition: vp9.c:138
uint8_t left_segpred_ctx[8]
Definition: vp9.c:221
VP5 and VP6 compatible video decoder (common features)
static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
Definition: vp9.c:401
static const int8_t vp9_segmentation_tree[7][2]
Definition: vp9data.h:66
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2054
static void FN() inter_pred(AVCodecContext *ctx)
static const uint8_t vp9_model_pareto8[256][8]
Definition: vp9data.h:1048
uint8_t * above_skip_ctx
Definition: vp9.c:231
uint8_t * eob_base
Definition: vp9.c:248
uint8_t comp[5]
Definition: vp9data.h:1313
uint8_t mvstep[3][2]
Definition: vp9.c:253
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:260
uint8_t fp[3]
Definition: vp9data.h:1327
AVFrame * f
Definition: thread.h:36
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:376
Definition: vp9.c:55
int row
Definition: vp9.c:104
#define INVALID_MV
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
uint8_t tx32p[2][3]
Definition: vp9data.h:1316
#define SPLAT_CTX(var, val, n)
static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
Definition: vp9.c:266
static const uint8_t vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.h:88
also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / IEC 61966-2-4 xvYCC601 ...
Definition: pixfmt.h:498
VideoDSPContext vdsp
Definition: vp9.c:97
int8_t uvdc_qdelta
Definition: vp9.c:146
static const int8_t vp9_mv_fp_tree[3][2]
Definition: vp9data.h:2149
uint8_t tx16p[2][2]
Definition: vp9data.h:1317
static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h)
Definition: vp9.c:2700
uint8_t last_keyframe
Definition: vp9.c:110
enum AVColorRange color_range
MPEG vs JPEG YUV range.
Definition: avcodec.h:1958
#define SET_CTXS(dir, off, n)
int size
Definition: avcodec.h:1161
const char * b
Definition: vf_curves.c:109
static const int8_t vp9_intramode_tree[9][2]
Definition: vp9data.h:76
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:53
uint8_t left_uv_nnz_ctx[2][8]
Definition: vp9.c:217
Definition: vp9.c:51
also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / functionally identical to above ...
Definition: pixfmt.h:499
static const uint8_t vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.h:202
struct VP9mvrefPair * mv
Definition: vp9.c:76
enum AVPixelFormat pix_fmt
Pixel format, see AV_PIX_FMT_xxx.
Definition: avcodec.h:1442
unsigned skip[3][2]
Definition: vp9.c:194
unsigned tile_cols
Definition: vp9.c:168
uint8_t lf_enabled
Definition: vp9.c:156
struct VP9Context::@96 segmentation
#define FF_ARRAY_ELEMS(a)
#define t8
Definition: regdef.h:53
static void adapt_probs(VP9Context *s)
Definition: vp9.c:3503
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Definition: vp9dsp.h:51
Definition: vp9.c:85
static AVPacket pkt
static int decode_frame_header(AVCodecContext *ctx, const uint8_t *data, int size, int *ref)
Definition: vp9.c:471
uint8_t parallelmode
Definition: vp9.c:124
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
unsigned cols
Definition: vp9.c:171
unsigned tile_col_end
Definition: vp9.c:169
struct VP9Context::@94 filter
uint8_t class0_hp
Definition: vp9data.h:1328
uint8_t ref[2]
Definition: vp9.c:86
AVCodec.
Definition: avcodec.h:3173
uint8_t intra[4]
Definition: vp9data.h:1312
static int vp9_decode_frame(AVCodecContext *ctx, void *frame, int *got_frame, AVPacket *pkt)
Definition: vp9.c:3758
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
#define AV_COPY32(d, s)
Definition: intreadwrite.h:586
uint8_t edge_emu_buffer[135 *144]
Definition: vp9.c:243
unsigned fp[4]
Definition: vp9.c:202
uint8_t update_map
Definition: vp9.c:153
Definition: vp9.h:29
uint8_t * intra_pred_data[3]
Definition: vp9.c:241
uint8_t errorres
Definition: vp9.c:113
int y
Definition: vp9.c:249
uint8_t varcompref[2]
Definition: vp9.c:128
#define AV_RN32A(p)
Definition: intreadwrite.h:526
Definition: vp9.c:62
unsigned uv_mode[10][10]
Definition: vp9.c:184
vp9_mc_func mc[5][4][2][2][2]
Definition: vp9dsp.h:114
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int16_t y
Definition: vp56.h:67
static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
Definition: vp9.c:292
void(* emulated_edge_mc)(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
Definition: videodsp.h:63
uint8_t coef[4][2][2][6][6][3]
Definition: vp9.c:174
#define VP9_SYNCCODE
Definition: vp9.c:35
if()
Definition: avfilter.c:975
int mem
Definition: avisynth_c.h:722
uint8_t
#define av_cold
Definition: attributes.h:74
static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
Definition: vp9.c:4060
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:135
VP9Frame frames[2]
Definition: vp9.c:132
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
Definition: vp9.c:57
int16_t qmul[2][2]
Definition: vp9.c:162
uint8_t colorspace
Definition: vp9.c:114
unsigned y_mode[4][10]
Definition: vp9.c:183
mode
Definition: f_perms.c:27
static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9.c:2814
#define H
Definition: swscale-test.c:344
TxfmType
Definition: vp9.h:37
uint8_t classes[10]
Definition: vp9data.h:1323
static void free_buffers(VP9Context *s)
Definition: vp9.c:3725
#define AV_RB32
Definition: intreadwrite.h:130
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:67
Multithreading support functions.
Definition: vp9.h:46
int av_frame_ref(AVFrame *dst, const AVFrame *src)
Set up a new reference to the data described by the source frame.
Definition: frame.c:278
uint8_t * uveob_base[2]
Definition: vp9.c:248
static const uint8_t vp9_default_coef_probs[4][2][2][6][6][3]
Definition: vp9data.h:1439
int col
Definition: vp9.c:104
unsigned log2_tile_rows
Definition: vp9.c:167
#define CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
Definition: avcodec.h:787
static AVFrame * frame
BlockLevel
Definition: vp9.c:43
static av_cold int vp9_decode_free(AVCodecContext *ctx)
Definition: vp9.c:3732
uint8_t * data
Definition: avcodec.h:1160
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc, uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, const uint16_t *scale, const uint8_t *step)
Definition: vp9.c:2562
AVBufferRef * extradata
Definition: vp9.c:74
static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p)
Definition: vp9.c:2346
int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src)
Definition: utils.c:3660
bitstream reader API header.
uint8_t tmp_y[64 *64]
Definition: vp9.c:250
uint8_t * above_uv_nnz_ctx[2]
Definition: vp9.c:230
VP9DSPContext dsp
Definition: vp9.c:96
uint8_t lim_lut[64]
Definition: vp9.c:137
static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src, VP9Context *s)
Definition: vp9.c:972
ptrdiff_t size
Definition: opengl_enc.c:101
unsigned sign[2]
Definition: vp9.c:197
Definition: vp9.h:38
static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
Definition: vp9.c:3118
void ff_thread_finish_setup(AVCodecContext *avctx)
If the codec defines update_thread_context(), call this when they are ready for the next thread to st...
uint16_t mvscale[3][2]
Definition: vp9.c:252
uint8_t mode[4]
Definition: vp9.c:86
Definition: vp9.c:79
uint8_t left_ref_ctx[8]
Definition: vp9.c:224
static void decode_coeffs(AVCodecContext *ctx)
Definition: vp9.c:2181
int x
Definition: vp9.c:249
uint8_t * above_txfm_ctx
Definition: vp9.c:232
#define av_log(a,...)
int8_t ref[4]
Definition: vp9.c:143
unsigned m
Definition: audioconvert.c:187
Definition: vp9.h:30
int16_t * block
Definition: vp9.c:247
uint8_t fixcompref
Definition: vp9.c:122
uint8_t mask[2][2][8][4]
Definition: vp9.c:82
int16_t * uvblock[2]
Definition: vp9.c:247
Definition: vp9.c:45
Definition: vp9.h:28
uint8_t keyframe
Definition: vp9.c:110
int width
width and height of the video frame
Definition: frame.h:212
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:175
uint8_t allowcompinter
Definition: vp9.c:121
int8_t sharpness
Definition: vp9.c:136
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given block if it is not large enough, otherwise do nothing.
Definition: mem.c:478
#define s2
Definition: regdef.h:39
struct VP9Context::@101 max_mv
void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
Wrapper around release_buffer() frame-for multithreaded codecs.
static int update_block_buffers(AVCodecContext *ctx)
Definition: vp9.c:363
unsigned mv_mode[7][4]
Definition: vp9.c:186
enum CompPredMode comppredmode
Definition: vp9.c:211
static const int8_t vp9_mv_class_tree[10][2]
Definition: vp9data.h:2136
uint8_t left_partition_ctx[8]
Definition: vp9.c:218
Definition: vp9.c:61
#define AVERROR(e)
Definition: error.h:43
uint8_t comp_ref[5]
Definition: vp9data.h:1315
GetBitContext gb
Definition: vp9.c:98
ptrdiff_t uv_stride
Definition: vp9.c:106
uint8_t single_ref[5][2]
Definition: vp9data.h:1314
uint8_t mv_mode[7][3]
Definition: vp9data.h:1311
uint8_t filter[4][2]
Definition: vp9data.h:1310
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:148
unsigned mv_joint[4]
Definition: vp9.c:195
static enum FilterMode vp9_filter_lut[3]
Definition: vp9data.h:233
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:180
int active_thread_type
Which multithreading methods are in use by the codec.
Definition: avcodec.h:2770
const char * r
Definition: vf_curves.c:107
unsigned tile_row_start
Definition: vp9.c:169
struct VP9Context::@96::@102 feat[MAX_SEGMENT]
uint8_t intra
Definition: vp9.c:86
#define MERGE_CTX(step, rd)
TxfmMode
Definition: vp9.h:27
simple assert() macros that are a bit more flexible than ISO C assert().
static void find_ref_mvs(VP9Context *s, VP56mv *pmv, int ref, int z, int idx, int sb)
Definition: vp9.c:979
static enum TxfmType vp9_intra_txfm_type[14]
Definition: vp9data.h:309
uint8_t refidx[3]
Definition: vp9.c:126
const char * name
Name of the codec implementation.
Definition: avcodec.h:3180
unsigned comp[5][2]
Definition: vp9.c:188
unsigned tx8p[2][2]
Definition: vp9.c:193
int16_t * uvblock_base[2]
Definition: vp9.c:247
struct VP9Context::@95 lf_delta
uint8_t use_last_frame_mvs
Definition: vp9.c:112
#define FFMAX(a, b)
Definition: common.h:79
Libavcodec external API header.
uint8_t class0_fp[2][3]
Definition: vp9data.h:1326
Definition: vp9.c:95
uint8_t * above_filter_ctx
Definition: vp9.c:237
#define RETURN_DIRECT_MV(mv)
uint8_t hp
Definition: vp9data.h:1329
#define ONLY_IF_THREADS_ENABLED(x)
Define a function with only the non-default version specified.
Definition: internal.h:219
unsigned tile_rows
Definition: vp9.c:168
Definition: vp9.c:44
struct VP9Filter * lflvl
Definition: vp9.c:242
static void vp9_decode_flush(AVCodecContext *ctx)
Definition: vp9.c:4005
#define CUR_FRAME
Definition: vp9.c:130
static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl, int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
Definition: vp9.c:3240
unsigned c_b_size
Definition: vp9.c:101
uint8_t yac_qi
Definition: vp9.c:145
#define th
Definition: regdef.h:75
av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
Definition: videodsp.c:38
static void decode_mode(AVCodecContext *ctx)
Definition: vp9.c:1342
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:234
#define E
Definition: avdct.c:32
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h)
Definition: vp9.c:2668
uint8_t framectxid
Definition: vp9.c:125
unsigned class0_fp[2][4]
Definition: vp9.c:201
#define scale_mv(n, dim)
#define FFMIN(a, b)
Definition: common.h:81
VP56mv left_mv_ctx[16][2]
Definition: vp9.c:216
uint8_t left_y_nnz_ctx[16]
Definition: vp9.c:214
float y
int8_t ref[2]
Definition: vp9.c:69
uint8_t level[8 *8]
Definition: vp9.c:80
ret
Definition: avfilter.c:974
int width
picture width / height.
Definition: avcodec.h:1412
int col7
Definition: vp9.c:104
uint8_t left_mode_ctx[16]
Definition: vp9.c:215
static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
Definition: vp9.c:3193
unsigned eob[4][2][2][6][6][2]
Definition: vp9.c:208
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:88
void ff_thread_report_progress(ThreadFrame *f, int n, int field)
Notify later decoding threads when part of their reference picture is ready.
uint8_t partition[4][4][3]
Definition: vp9data.h:1331
unsigned tx32p[2][4]
Definition: vp9.c:191
unsigned tx16p[2][3]
Definition: vp9.c:192
enum FilterMode filtermode
Definition: vp9.c:120
static const uint8_t bwh_tab[2][N_BS_SIZES][2]
Definition: vp9.c:256
uint8_t ref_val
Definition: vp9.c:159
uint8_t profile
Definition: vp9.c:109
uint8_t uvmode
Definition: vp9.c:86
uint8_t * above_partition_ctx
Definition: vp9.c:226
int n
Definition: avisynth_c.h:589
#define LAST_FRAME
Definition: vp9.c:131
uint8_t left_comp_ctx[8]
Definition: vp9.c:223
#define AV_WN64A(p, v)
Definition: intreadwrite.h:542
#define AV_WN16A(p, v)
Definition: intreadwrite.h:534
static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
Definition: vp9.c:4055
#define L(x)
Definition: vp56_arith.h:36
static const int8_t vp9_inter_mode_tree[3][2]
Definition: vp9data.h:222
#define vp56_rac_get_prob
Definition: vp56.h:250
static int init_frames(AVCodecContext *ctx)
Definition: vp9.c:4016
uint8_t * above_segpred_ctx
Definition: vp9.c:233
unsigned tile_col_start
Definition: vp9.c:169
static void flush(AVCodecContext *avctx)
Definition: aacdec.c:502
the normal 2^n-1 "JPEG" YUV ranges
Definition: pixfmt.h:515
struct VP9Context::@99 prob
unsigned intra[4][2]
Definition: vp9.c:187
#define mc
static const float pred[4]
Definition: siprdata.h:259
static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2161
unsigned rows
Definition: vp9.c:171
unsigned sb_cols
Definition: vp9.c:171
static const int8_t mv[256][2]
Definition: 4xm.c:77
uint8_t sign
Definition: vp9data.h:1322
uint8_t enabled
Definition: vp9.c:141
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:102
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:267
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
int row7
Definition: vp9.c:104
int8_t uvac_qdelta
Definition: vp9.c:146
FilterMode
Definition: vp9.h:64
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:80
static const int16_t vp9_ac_qlookup[256]
Definition: vp9data.h:274
unsigned class0[2]
Definition: vp9.c:199
VP56mv(* above_mv_ctx)[2]
Definition: vp9.c:238
AVS_Value src
Definition: avisynth_c.h:524
av_cold void ff_vp9dsp_init(VP9DSPContext *dsp)
Definition: vp9dsp.c:2232
uint8_t ref_enabled
Definition: vp9.c:157
int16_t * block_base
Definition: vp9.c:247
static const prob_context vp9_default_probs
Definition: vp9data.h:1334
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
static void fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb)
Definition: vp9.c:1223
uint8_t level
Definition: vp9.c:135
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:191
Definition: vp9.c:56
int pass
Definition: vp9.c:103
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:441
int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
Wrapper around get_buffer() for frame-multithreaded codecs.
static const int16_t vp9_dc_qlookup[256]
Definition: vp9data.h:239
uint8_t left_skip_ctx[8]
Definition: vp9.c:219
main external API structure.
Definition: avcodec.h:1239
#define FASTDIV(a, b)
Definition: mathops.h:211
Definition: vp9data.h:219
int16_t q_val
Definition: vp9.c:160
uint8_t * data
The data buffer.
Definition: buffer.h:89
uint8_t left_txfm_ctx[8]
Definition: vp9.c:220
Definition: vp9.c:47
static av_cold int vp9_decode_init(AVCodecContext *ctx)
Definition: vp9.c:4042
VP56RangeCoder * c_b
Definition: vp9.c:100
uint8_t invisible
Definition: vp9.c:111
enum TxfmMode tx uvtx
Definition: vp9.c:90
unsigned single_ref[5][2][2]
Definition: vp9.c:189
uint8_t y_mode[4][9]
Definition: vp9data.h:1308
AVBufferRef * av_buffer_allocz(int size)
Same as av_buffer_alloc(), except the returned buffer will be initialized to zero.
Definition: buffer.c:82
ThreadFrame refs[8]
Definition: vp9.c:129
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:304
uint8_t tx8p[2]
Definition: vp9data.h:1318
Definition: vp9.c:54
#define FF_THREAD_FRAME
Decode more than one frame at once.
Definition: avcodec.h:2762
unsigned partition[4][4][4]
Definition: vp9.c:206
uint8_t * above_y_nnz_ctx
Definition: vp9.c:229
struct VP9Context::@101 min_mv
uint8_t temporal
Definition: vp9.c:151
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:297
enum AVColorSpace colorspace
YUV colorspace type.
Definition: avcodec.h:1951
static const int16_t *const vp9_scans[5][4]
Definition: vp9data.h:472
uint8_t seg_id
Definition: vp9.c:86
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
Definition: vp9.c:64
static const int8_t vp9_filter_tree[2][2]
Definition: vp9data.h:228
struct VP9Context::@100::@103 mv_comp[2]
uint8_t left_filter_ctx[8]
Definition: vp9.c:225
uint8_t intraonly
Definition: vp9.c:116
uint8_t signbias[3]
Definition: vp9.c:127
uint8_t * above_intra_ctx
Definition: vp9.c:234
enum BlockSize bs
Definition: vp9.c:89
#define MAX_SEGMENT
Definition: vp9.c:148
int allocate_progress
Whether to allocate progress for frame threading.
Definition: internal.h:89
int uses_2pass
Definition: vp9.c:103
static unsigned int get_bits_long(GetBitContext *s, int n)
Read 0-32 bits.
Definition: get_bits.h:337
uint8_t * dst[3]
Definition: vp9.c:105
uint8_t tmp_uv[2][32 *32]
Definition: vp9.c:251
VP56mv mv[4][2]
Definition: vp9.c:88
enum BlockPartition bp
Definition: vp9.c:92
static int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
Definition: vp56.h:320
VP9Block * b
Definition: vp9.c:102
static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
Definition: vp9.c:298
enum TxfmMode txfmmode
Definition: vp9.c:210
uint8_t * uveob[2]
Definition: vp9.c:248
uint8_t * above_mode_ctx
Definition: vp9.c:227
void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size)
Definition: vp56rac.c:40
Definition: vp56.h:65
uint8_t comp
Definition: vp9.c:86
uint8_t refreshctx
Definition: vp9.c:123
ThreadFrame next_refs[8]
Definition: vp9.c:129
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:174
uint8_t fullrange
Definition: vp9.c:115
unsigned class0_hp[2]
Definition: vp9.c:203
Definition: vp9.h:48
the normal 219*2^(n-8) "MPEG" YUV ranges
Definition: pixfmt.h:514
#define LOCAL_ALIGNED_32(t, v,...)
Definition: internal.h:126
int8_t ydc_qdelta
Definition: vp9.c:146
static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
Definition: ccaption_dec.c:520
MVJoint
Definition: vp9data.h:2123
uint8_t highprecisionmvs
Definition: vp9.c:119
A reference to a data buffer.
Definition: buffer.h:81
static av_always_inline int inv_recenter_nonneg(int v, int m)
Definition: vp9.c:407
#define RETURN_MV(mv)
BlockSize
Definition: vp9.c:50
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
void(* itxfm_add[N_TXFM_SIZES+1][N_TXFM_TYPES])(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob)
Definition: vp9dsp.h:70
uint8_t seg[7]
Definition: vp9.c:179
#define AV_ZERO64(d)
Definition: intreadwrite.h:618
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:68
int16_t x
Definition: vp56.h:66
uint8_t class0
Definition: vp9data.h:1324
common internal api header.
static const uint8_t vp9_default_kf_partition_probs[4][4][3]
Definition: vp9data.h:42
#define CODEC_CAP_FRAME_THREADS
Codec supports frame-level multithreading.
Definition: avcodec.h:864
uint8_t uv_mode[10][9]
Definition: vp9data.h:1309
static int ref_frame(Vp3DecodeContext *s, ThreadFrame *dst, ThreadFrame *src)
Definition: vp3.c:1915
struct VP9Context::@98 prob_ctx[4]
Definition: vp9.c:58
uint8_t segpred[3]
Definition: vp9.c:180
#define assign(var, type, n)
prob_context p
Definition: vp9.c:173
static double c[64]
Definition: vp9.c:59
AVCodec ff_vp9_decoder
Definition: vp9.c:4101
unsigned sb_rows
Definition: vp9.c:171
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:92
#define DECODE_UV_COEF_LOOP(step)
uint8_t lflvl[4][2]
Definition: vp9.c:163
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:304
Core video DSP helper functions.
struct VP9Context::@97 tiling
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9.c:1296
Definition: vp9.c:46
static const int8_t vp9_mv_joint_tree[3][2]
Definition: vp9data.h:2130
enum BlockLevel bl
Definition: vp9.c:91
void * priv_data
Definition: avcodec.h:1281
unsigned bits[10][2]
Definition: vp9.c:200
#define t4
Definition: regdef.h:32
static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
Definition: vp9.c:1170
#define av_free(p)
unsigned tile_row_end
Definition: vp9.c:169
Definition: vp9.c:60
struct AVCodecInternal * internal
Private context used for internal data.
Definition: avcodec.h:1289
struct prob_context::@104 mv_comp[2]
enum FilterMode filter
Definition: vp9.c:87
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc, uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, const uint16_t *scale, const uint8_t *step)
Definition: vp9.c:2603
static int decode012(GetBitContext *gb)
Definition: get_bits.h:570
static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2171
int key_frame
1 -> keyframe, 0-> not
Definition: frame.h:229
#define AV_ZERO32(d)
Definition: intreadwrite.h:614
static void decode_b(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9.c:2947
static void inter_recon(AVCodecContext *ctx)
Definition: vp9.c:2758
Definition: vp9.h:31
uint8_t bits[10]
Definition: vp9data.h:1325
static const uint8_t * align_get_bits(GetBitContext *s)
Definition: get_bits.h:449
uint8_t absolute_vals
Definition: vp9.c:152
#define AV_RN16A(p)
Definition: intreadwrite.h:522
Definition: vp9.c:63
int8_t lf_val
Definition: vp9.c:161
static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1, int max_count, int update_factor)
Definition: vp9.c:3485
static const int16_t(*const [5][4] vp9_scans_nb)[2]
Definition: vp9data.h:1029
int height
Definition: frame.h:212
#define av_freep(p)
VP9Block * b_base
Definition: vp9.c:102
void INT64 start
Definition: avisynth_c.h:595
Definition: vp9.c:72
static int init_thread_copy(AVCodecContext *avctx)
Definition: alac.c:645
#define av_always_inline
Definition: attributes.h:37
#define SPLAT_ZERO_YUV(dir, var, off, n)
int last_uses_2pass
Definition: vp9.c:103
VP56mv mv[2]
Definition: vp9.c:68
static int update_prob(VP56RangeCoder *c, int p)
Definition: vp9.c:413
#define av_malloc_array(a, b)
ptrdiff_t y_stride
Definition: vp9.c:106
#define stride
uint8_t skip
Definition: vp9.c:86
int8_t mode[2]
Definition: vp9.c:142
uint8_t * above_ref_ctx
Definition: vp9.c:236
uint8_t mv_joint[3]
Definition: vp9data.h:1320
uint8_t q_enabled
Definition: vp9.c:155
uint8_t left_intra_ctx[8]
Definition: vp9.c:222
#define AV_RN64A(p)
Definition: intreadwrite.h:530
unsigned classes[11]
Definition: vp9.c:198
uint8_t refreshrefmask
Definition: vp9.c:118
This structure stores compressed data.
Definition: avcodec.h:1137
struct VP9Context::@100 counts
#define AV_GET_BUFFER_FLAG_REF
The decoder will keep a reference to the frame and may reuse it later.
Definition: avcodec.h:967
uint8_t * above_comp_ctx
Definition: vp9.c:235
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:250
#define RETURN_SCALE_MV(mv, scale)
for(j=16;j >0;--j)
int block_alloc_using_2pass
Definition: vp9.c:246
Predicted.
Definition: avutil.h:268
uint8_t skip_enabled
Definition: vp9.c:158
unsigned log2_tile_cols
Definition: vp9.c:167
static const int8_t vp9_partition_tree[3][2]
Definition: vp9data.h:36
VP56RangeCoder c
Definition: vp9.c:99