diff --git a/README.md b/README.md index 53b39d7a..37bfa449 100644 --- a/README.md +++ b/README.md @@ -200,18 +200,22 @@ Compression tools: --(no-)signhide : Sign hiding [disabled] --(no-)smp : Symmetric motion partition [disabled] --(no-)amp : Asymmetric motion partition [disabled] - --rd : Intra mode search complexity [0] + --rd : Mode search complexity [0] - 0: Skip intra if inter is good enough. - 1: Rough intra mode search with SATD. - - 2: Refine intra mode search with SSE. - - 3: Try all intra modes and enable intra - chroma mode search. + - 2: Refine mode search with SSE. + - 3: More SSE candidates for inter and + chroma mode search for 4x4 intra. + - 4: Even more SSE candidates for both. + - 5: Try all intra modes. --(no-)mv-rdo : Rate-distortion optimized motion vector costs [disabled] --(no-)zero-coeff-rdo : If a CU is set inter, check if forcing zero residual improves the RD cost. [enabled] --(no-)full-intra-search : Try all intra modes during rough search. [disabled] + --(no-)intra-chroma-search : Test non-derived intra chroma modes. + [disabled] --(no-)transform-skip : Try transform skip [disabled] --me : Integer motion estimation algorithm [hexbs] - hexbs: Hexagon Based Search @@ -227,6 +231,7 @@ Compression tools: - 2: + 1/2-pixel diagonal - 3: + 1/4-pixel horizontal and vertical - 4: + 1/4-pixel diagonal + --(no-)fast-bipred : Only perform fast bipred search. [enabled] --pu-depth-inter - : Inter prediction units sizes [0-3] - 0, 1, 2, 3: from 64x64 to 8x8 - Accepts a list of values separated by ',' diff --git a/configure.ac b/configure.ac index 074d3bea..0115e9e8 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c]) # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html ver_major=7 -ver_minor=1 +ver_minor=2 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS diff --git a/doc/kvazaar.1 b/doc/kvazaar.1 index ad8fb5b5..5b6bb4f5 100644 --- a/doc/kvazaar.1 +++ b/doc/kvazaar.1 @@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "June 2022" "kvazaar v2.1.0" "User Commands" +.TH KVAZAAR "1" "September 2022" "kvazaar v2.1.0" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -252,12 +252,14 @@ Symmetric motion partition [disabled] Asymmetric motion partition [disabled] .TP \fB\-\-rd -Intra mode search complexity [0] +Mode search complexity [0] \- 0: Skip intra if inter is good enough. \- 1: Rough intra mode search with SATD. - \- 2: Refine intra mode search with SSE. - \- 3: Try all intra modes and enable intra - chroma mode search. + \- 2: Refine mode search with SSE. + \- 3: More SSE candidates for inter and + chroma mode search for 4x4 intra. + \- 4: Even more SSE candidates for both. + \- 5: Try all intra modes. .TP \fB\-\-(no\-)mv\-rdo Rate\-distortion optimized motion vector costs @@ -271,6 +273,10 @@ residual improves the RD cost. [enabled] Try all intra modes during rough search. [disabled] .TP +\fB\-\-(no\-)intra\-chroma\-search +Test non\-derived intra chroma modes. + [disabled] +.TP \fB\-\-(no\-)transform\-skip Try transform skip [disabled] .TP @@ -294,6 +300,9 @@ Fractional pixel motion estimation level [4] \- 3: + 1/4\-pixel horizontal and vertical \- 4: + 1/4\-pixel diagonal .TP +\fB\-\-(no\-)fast\-bipred +Only perform fast bipred search. [enabled] +.TP \fB\-\-pu\-depth\-inter \- Inter prediction units sizes [0\-3] \- 0, 1, 2, 3: from 64x64 to 8x8 diff --git a/src/cabac.c b/src/cabac.c index 7cd7d926..ddd9c72a 100644 --- a/src/cabac.c +++ b/src/cabac.c @@ -272,15 +272,19 @@ void kvz_cabac_encode_bins_ep(cabac_data_t * const data, uint32_t bin_values, in * \param symbol Value of coeff_abs_level_minus3. * \param r_param Reference to Rice parameter. */ -void kvz_cabac_write_coeff_remain(cabac_data_t * const cabac, const uint32_t symbol, const uint32_t r_param) +int kvz_cabac_write_coeff_remain(cabac_data_t* const cabac, const uint32_t symbol, const uint32_t r_param) { int32_t code_number = symbol; uint32_t length; + int bits = 0; + if (code_number < (3 << r_param)) { length = code_number >> r_param; CABAC_BINS_EP(cabac, (1 << (length + 1)) - 2 , length + 1, "coeff_abs_level_remaining"); + bits += length + 1; CABAC_BINS_EP(cabac, (code_number % (1 << r_param)), r_param, "coeff_abs_level_remaining"); + bits += r_param; } else { length = r_param; code_number = code_number - (3 << r_param); @@ -289,8 +293,11 @@ void kvz_cabac_write_coeff_remain(cabac_data_t * const cabac, const uint32_t sym ++length; } CABAC_BINS_EP(cabac, (1 << (3 + length + 1 - r_param)) - 2, 3 + length + 1 - r_param, "coeff_abs_level_remaining"); + bits += 3 + length + 1 - r_param; CABAC_BINS_EP(cabac, code_number, length, "coeff_abs_level_remaining"); + bits += length; } + return bits; } void kvz_cabac_write_coeff_remain_encry(struct encoder_state_t * const state, cabac_data_t * const cabac,const uint32_t symbol, const uint32_t r_param, int32_t base_level) diff --git a/src/cabac.h b/src/cabac.h index b15cbb75..b164e6f7 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -116,8 +116,8 @@ void kvz_cabac_encode_bins_ep(cabac_data_t *data, uint32_t bin_values, int num_b void kvz_cabac_encode_bin_trm(cabac_data_t *data, uint8_t bin_value); void kvz_cabac_write(cabac_data_t *data); void kvz_cabac_finish(cabac_data_t *data); -void kvz_cabac_write_coeff_remain(cabac_data_t *cabac, uint32_t symbol, - uint32_t r_param); +int kvz_cabac_write_coeff_remain(cabac_data_t* cabac, uint32_t symbol, + uint32_t r_param); void kvz_cabac_write_coeff_remain_encry(struct encoder_state_t * const state, cabac_data_t * const cabac, const uint32_t symbol, const uint32_t r_param, int32_t base_level); uint32_t kvz_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t *data, diff --git a/src/cfg.c b/src/cfg.c index b49915df..b5278980 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -186,6 +186,8 @@ int kvz_config_init(kvz_config *cfg) cfg->combine_intra_cus = 1; cfg->force_inter = 0; + cfg->intra_chroma_search = 0; + cfg->fast_bipred = 1; return 1; } @@ -703,7 +705,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) }, { "veryslow", - "rd", "2", + "rd", "3", "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", "me", "tz", @@ -731,7 +733,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) }, { "placebo", - "rd", "2", + "rd", "3", "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", "me", "tz", @@ -1399,6 +1401,12 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) else if OPT("force-inter") { cfg->force_inter = atobool(value); } + else if OPT("intra-chroma-search") { + cfg->intra_chroma_search = atobool(value); + } + else if OPT("fast-bipred") { + cfg->fast_bipred = atobool(value); + } else { return 0; } @@ -1590,8 +1598,8 @@ int kvz_config_validate(const kvz_config *const cfg) error = 1; } - if (cfg->rdo < 0 || cfg->rdo > 3) { - fprintf(stderr, "Input error: --rd parameter out of range [0..3]\n"); + if (cfg->rdo < 0 || cfg->rdo > 6) { + fprintf(stderr, "Input error: --rd parameter out of range [0..5]\n"); error = 1; } diff --git a/src/cli.c b/src/cli.c index ffc00a68..aa2e2251 100644 --- a/src/cli.c +++ b/src/cli.c @@ -172,6 +172,10 @@ static const struct option long_options[] = { { "no-combine-intra-cus", no_argument, NULL, 0 }, { "force-inter", no_argument, NULL, 0 }, { "no-force-inter", no_argument, NULL, 0 }, + { "intra-chroma-search", no_argument, NULL, 0 }, + { "no-intra-chroma-search", no_argument, NULL, 0 }, + { "fast-bipred", no_argument, NULL, 0 }, + { "no-fast-bipred", no_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -550,18 +554,22 @@ void print_help(void) " --(no-)signhide : Sign hiding [disabled]\n" " --(no-)smp : Symmetric motion partition [disabled]\n" " --(no-)amp : Asymmetric motion partition [disabled]\n" - " --rd : Intra mode search complexity [0]\n" + " --rd : Mode search complexity [0]\n" " - 0: Skip intra if inter is good enough.\n" " - 1: Rough intra mode search with SATD.\n" - " - 2: Refine intra mode search with SSE.\n" - " - 3: Try all intra modes and enable intra\n" - " chroma mode search.\n" + " - 2: Refine mode search with SSE.\n" + " - 3: More SSE candidates for inter and\n" + " chroma mode search for 4x4 intra.\n" + " - 4: Even more SSE candidates for both.\n" + " - 5: Try all intra modes.\n" " --(no-)mv-rdo : Rate-distortion optimized motion vector costs\n" " [disabled]\n" " --(no-)zero-coeff-rdo : If a CU is set inter, check if forcing zero\n" " residual improves the RD cost. [enabled]\n" " --(no-)full-intra-search : Try all intra modes during rough search.\n" " [disabled]\n" + " --(no-)intra-chroma-search : Test non-derived intra chroma modes.\n" + " [disabled]\n" " --(no-)transform-skip : Try transform skip [disabled]\n" " --me : Integer motion estimation algorithm [hexbs]\n" " - hexbs: Hexagon Based Search\n" @@ -577,6 +585,7 @@ void print_help(void) " - 2: + 1/2-pixel diagonal\n" " - 3: + 1/4-pixel horizontal and vertical\n" " - 4: + 1/4-pixel diagonal\n" + " --(no-)fast-bipred : Only perform fast bipred search. [enabled]\n" " --pu-depth-inter - : Inter prediction units sizes [0-3]\n" " - 0, 1, 2, 3: from 64x64 to 8x8\n" " - Accepts a list of values separated by ','\n" diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index afff8a06..c005cd81 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -63,11 +63,12 @@ void kvz_encode_last_significant_xy(cabac_data_t * const cabac, uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, - uint8_t type, uint8_t scan) + uint8_t type, uint8_t scan, double* bits_out) { const int index = kvz_math_floor_log2(width) - 2; uint8_t ctx_offset = type ? 0 : (index * 3 + (index + 1) / 4); uint8_t shift = type ? index : (index + 3) / 4; + double bits = 0; cabac_ctx_t *base_ctx_x = (type ? cabac->ctx.cu_ctx_last_x_chroma : cabac->ctx.cu_ctx_last_x_luma); cabac_ctx_t *base_ctx_y = (type ? cabac->ctx.cu_ctx_last_y_chroma : cabac->ctx.cu_ctx_last_y_luma); @@ -81,37 +82,36 @@ void kvz_encode_last_significant_xy(cabac_data_t * const cabac, // x prefix for (int last_x = 0; last_x < group_idx_x; last_x++) { - cabac->cur_ctx = &base_ctx_x[ctx_offset + (last_x >> shift)]; - CABAC_BIN(cabac, 1, "last_sig_coeff_x_prefix"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_x[ctx_offset + (last_x >> shift)], 1, bits, "last_sig_coeff_x_prefix"); } if (group_idx_x < g_group_idx[width - 1]) { - cabac->cur_ctx = &base_ctx_x[ctx_offset + (group_idx_x >> shift)]; - CABAC_BIN(cabac, 0, "last_sig_coeff_x_prefix"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_x[ctx_offset + (group_idx_x >> shift)], 0, bits, "last_sig_coeff_x_prefix"); } // y prefix for (int last_y = 0; last_y < group_idx_y; last_y++) { - cabac->cur_ctx = &base_ctx_y[ctx_offset + (last_y >> shift)]; - CABAC_BIN(cabac, 1, "last_sig_coeff_y_prefix"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_y[ctx_offset + (last_y >> shift)], 1, bits, "last_sig_coeff_y_prefix"); } if (group_idx_y < g_group_idx[height - 1]) { - cabac->cur_ctx = &base_ctx_y[ctx_offset + (group_idx_y >> shift)]; - CABAC_BIN(cabac, 0, "last_sig_coeff_y_prefix"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_y[ctx_offset + (group_idx_y >> shift)], 0, bits, "last_sig_coeff_y_prefix"); } // last_sig_coeff_x_suffix if (group_idx_x > 3) { const int suffix = lastpos_x - g_min_in_group[group_idx_x]; - const int bits = (group_idx_x - 2) / 2; - CABAC_BINS_EP(cabac, suffix, bits, "last_sig_coeff_x_suffix"); + const int write_bits = (group_idx_x - 2) / 2; + CABAC_BINS_EP(cabac, suffix, write_bits, "last_sig_coeff_x_suffix"); + if (cabac->only_count) bits += write_bits; } // last_sig_coeff_y_suffix if (group_idx_y > 3) { const int suffix = lastpos_y - g_min_in_group[group_idx_y]; - const int bits = (group_idx_y - 2) / 2; - CABAC_BINS_EP(cabac, suffix, bits, "last_sig_coeff_y_suffix"); + const int write_bits = (group_idx_y - 2) / 2; + CABAC_BINS_EP(cabac, suffix, write_bits, "last_sig_coeff_y_suffix"); + if (cabac->only_count) bits += write_bits; } + if (cabac->only_count && bits_out) *bits_out += bits; } static void encode_transform_unit(encoder_state_t * const state, @@ -142,7 +142,7 @@ static void encode_transform_unit(encoder_state_t * const state, width, 0, scan_idx, - cur_pu->tr_skip); + cur_pu->tr_skip, NULL); } if (depth == MAX_DEPTH + 1) { @@ -172,11 +172,11 @@ static void encode_transform_unit(encoder_state_t * const state, const coeff_t *coeff_v = &state->coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) { - kvz_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, 2, scan_idx, 0); + kvz_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, 2, scan_idx, 0, NULL); } if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) { - kvz_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, 2, scan_idx, 0); + kvz_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, 2, scan_idx, 0, NULL); } } } diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index d189e6e0..58e4c981 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -74,6 +74,6 @@ void kvz_encode_inter_prediction_unit(encoder_state_t* const state, void kvz_encode_last_significant_xy(cabac_data_t * const cabac, uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, - uint8_t type, uint8_t scan); + uint8_t type, uint8_t scan, double* bits_out); #endif // ENCODE_CODING_TREE_H_ diff --git a/src/fast_coeff_cost.h b/src/fast_coeff_cost.h index dee647f7..e33a1e6d 100644 --- a/src/fast_coeff_cost.h +++ b/src/fast_coeff_cost.h @@ -46,59 +46,60 @@ typedef struct { // Weights for 4 buckets (coeff 0, coeff 1, coeff 2, coeff >= 3), for QPs from // 0 to MAX_FAST_COEFF_COST_QP static const double default_fast_coeff_cost_wts[][4] = { - // Just extend it by stretching the first actual values.. - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.164240, 4.161530, 3.509033, 6.928047}, - // up to here - {0.164240, 4.161530, 3.509033, 6.928047}, - {0.162844, 4.055940, 3.564467, 6.861493}, - {0.128729, 4.311973, 3.942837, 6.935403}, - {0.110956, 4.433190, 3.945753, 6.877697}, - {0.095026, 4.483547, 4.194173, 6.781540}, - {0.075046, 4.633703, 4.084193, 6.698600}, - {0.052426, 4.967223, 4.027210, 6.549197}, - {0.040219, 5.141820, 3.982650, 6.461557}, - {0.035090, 5.192493, 3.830950, 6.418477}, - {0.029845, 5.211647, 3.815457, 6.345440}, - {0.023522, 5.322213, 3.816537, 6.360677}, - {0.021305, 5.225923, 3.842700, 6.325787}, - {0.015878, 5.183090, 3.956003, 6.329680}, - {0.010430, 5.099230, 4.176803, 6.305400}, - {0.008433, 5.030257, 4.237587, 6.270133}, - {0.006500, 4.969247, 4.339397, 6.217827}, - {0.004929, 4.923500, 4.442413, 6.183523}, - {0.003715, 4.915583, 4.429090, 6.125320}, - {0.003089, 4.883907, 4.562790, 6.156447}, - {0.002466, 4.881063, 4.629883, 6.142643}, - {0.002169, 4.882493, 4.646313, 6.127663}, - {0.002546, 4.793337, 4.837413, 6.199270}, - {0.001314, 4.808853, 4.828337, 6.243437}, - {0.001154, 4.862603, 4.846883, 6.205523}, - {0.000984, 4.866403, 4.859330, 6.240893}, - {0.000813, 4.856633, 4.924527, 6.293413}, - {0.001112, 4.789260, 5.009880, 6.433540}, - {0.000552, 4.760747, 5.090447, 6.599380}, - {0.000391, 4.961447, 5.111033, 6.756370}, - {0.000332, 4.980953, 5.138127, 6.867420}, - {0.000201, 5.181957, 4.740160, 6.460997}, - {0.000240, 5.185390, 4.874840, 6.819093}, - {0.000130, 5.270350, 4.734213, 6.826240}, - {0.000104, 5.371937, 4.595087, 6.659253}, - {0.000083, 5.362000, 4.617470, 6.837770}, - {0.000069, 5.285997, 4.754993, 7.159043}, - {0.000049, 5.488470, 4.396107, 6.727357}, - {0.000058, 4.958940, 4.580460, 6.477740}, - {0.000028, 5.521253, 4.440493, 7.205017}, - {0.000000, 0.000000, 0.000000, 0.000000}, - {0.000019, 5.811260, 4.399110, 7.336310}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.162000, 4.126087, 3.499517, 6.969847}, +{0.157760, 4.037673, 3.558663, 6.895640}, +{0.127943, 4.308060, 3.916680, 6.962907}, +{0.110555, 4.422860, 3.944640, 6.898343}, +{0.094532, 4.479287, 4.161790, 6.804273}, +{0.074032, 4.629857, 4.042727, 6.722910}, +{0.051644, 4.960970, 4.001523, 6.556783}, +{0.039513, 5.133963, 3.951247, 6.472487}, +{0.034188, 5.185183, 3.805350, 6.418810}, +{0.028981, 5.203517, 3.785043, 6.351090}, +{0.022543, 5.315690, 3.796553, 6.347457}, +{0.020300, 5.221910, 3.817927, 6.322733}, +{0.015400, 5.170127, 3.937963, 6.326643}, +{0.010147, 5.088577, 4.143093, 6.293030}, +{0.008239, 5.017160, 4.204780, 6.267220}, +{0.006386, 4.956723, 4.303120, 6.208533}, +{0.004876, 4.912990, 4.400863, 6.175370}, +{0.003707, 4.905997, 4.388617, 6.134007}, +{0.003089, 4.872320, 4.521937, 6.153827}, +{0.002479, 4.864330, 4.591423, 6.152587}, +{0.002180, 4.864427, 4.607133, 6.141223}, +{0.002556, 4.771863, 4.793583, 6.232397}, +{0.001316, 4.793543, 4.787927, 6.272543}, +{0.001169, 4.845383, 4.787190, 6.235333}, +{0.001000, 4.849327, 4.805003, 6.273347}, +{0.000830, 4.839947, 4.866000, 6.346927}, +{0.001131, 4.772140, 4.969497, 6.448050}, +{0.000553, 4.743423, 5.050670, 6.663760}, +{0.000466, 4.800883, 5.034373, 6.601250}, +{0.000400, 4.797313, 5.079183, 6.743547}, +{0.000333, 4.783170, 5.142737, 6.869933}, +{0.000355, 4.915657, 5.217510, 7.225673}, +{0.000186, 4.973477, 5.151287, 7.280497}, +{0.000113, 5.316010, 4.509893, 6.585287}, +{0.000091, 5.304703, 4.553107, 6.773803}, +{0.000076, 5.263460, 4.689990, 6.962153}, +{0.000064, 5.190947, 4.733550, 7.100820}, +{0.000053, 5.180677, 4.833283, 7.340667}, +{0.000047, 5.182963, 4.829380, 7.338863}, +{0.000032, 5.389257, 4.518127, 7.265003}, +{0.000020, 5.970297, 3.981997, 7.201180}, +{0.000000, 0.000000, 0.000000, 0.000000}, + + }; typedef struct encoder_state_t encoder_state_t; diff --git a/src/kvazaar.h b/src/kvazaar.h index 834206da..99d5ef3b 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -488,6 +488,10 @@ typedef struct kvz_config uint8_t combine_intra_cus; uint8_t force_inter; + + uint8_t intra_chroma_search; + + uint8_t fast_bipred; } kvz_config; /** diff --git a/src/rdo.c b/src/rdo.c index 84539697..7be86cf2 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -172,10 +172,10 @@ int kvz_init_rdcost_outfiles(const char *dir_path) // As long as QP is a two-digit number, template and produced string should // be equal in length ("%i" -> "22") assert(RD_SAMPLING_MAX_LAST_QP <= 99); - assert(strlen(fn_template) <= RD_SAMPLING_MAX_FN_LENGTH); strncpy(fn_template, dir_path, RD_SAMPLING_MAX_FN_LENGTH); strncat(fn_template, basename_tmpl, RD_SAMPLING_MAX_FN_LENGTH - strlen(dir_path)); + assert(strlen(fn_template) <= RD_SAMPLING_MAX_FN_LENGTH); for (qp = 0; qp <= RD_SAMPLING_MAX_LAST_QP; qp++) { pthread_mutex_t *curr = outfile_mutex + qp; @@ -233,7 +233,7 @@ int kvz_init_rdcost_outfiles(const char *dir_path) * * \returns bits needed to code input coefficients */ -static INLINE uint32_t get_coeff_cabac_cost( +static INLINE double get_coeff_cabac_cost( const encoder_state_t * const state, const coeff_t *coeff, int32_t width, @@ -257,8 +257,7 @@ static INLINE uint32_t get_coeff_cabac_cost( // Clear bytes and bits and set mode to "count" cabac_copy.only_count = 1; - int num_buffered_bytes = cabac_copy.num_buffered_bytes; - int bits_left = cabac_copy.bits_left; + double bits = 0; // Execute the coding function. // It is safe to drop the const modifier since state won't be modified @@ -269,14 +268,15 @@ static INLINE uint32_t get_coeff_cabac_cost( width, type, scan_mode, - 0); + 0, + &bits); if(cabac_copy.update) { memcpy((cabac_data_t *)&state->search_cabac, &cabac_copy, sizeof(cabac_copy)); } - return (bits_left - cabac_copy.bits_left) + ((cabac_copy.num_buffered_bytes - num_buffered_bytes) << 3); + return bits; } -static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, uint32_t ccc) +static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, double ccc) { pthread_mutex_t *mtx = outfile_mutex + qp; @@ -292,14 +292,14 @@ static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, uint32_t pthread_mutex_unlock(mtx); } -static INLINE void save_accuracy(int qp, uint32_t ccc, uint32_t fast_cost) +static INLINE void save_accuracy(int qp, double ccc, double fast_cost) { pthread_mutex_t *mtx = outfile_mutex + qp; assert(qp <= RD_SAMPLING_MAX_LAST_QP); pthread_mutex_lock(mtx); - fprintf(fastrd_learning_outfile[qp], "%u %u\n", fast_cost, ccc); + fprintf(fastrd_learning_outfile[qp], "%f %f\n", fast_cost, ccc); pthread_mutex_unlock(mtx); } @@ -312,7 +312,7 @@ static INLINE void save_accuracy(int qp, uint32_t ccc, uint32_t fast_cost) * * \returns number of bits needed to code coefficients */ -uint32_t kvz_get_coeff_cost(const encoder_state_t * const state, +double kvz_get_coeff_cost(const encoder_state_t * const state, const coeff_t *coeff, int32_t width, int32_t type, @@ -331,15 +331,15 @@ uint32_t kvz_get_coeff_cost(const encoder_state_t * const state, return UINT32_MAX; // Hush little compiler don't you cry, not really gonna return anything after assert(0) } else { uint64_t weights = kvz_fast_coeff_get_weights(state); - uint32_t fast_cost = kvz_fast_coeff_cost(coeff, width, weights); + double fast_cost = kvz_fast_coeff_cost(coeff, width, weights); if (check_accuracy) { - uint32_t ccc = get_coeff_cabac_cost(state, coeff, width, type, scan_mode); + double ccc = get_coeff_cabac_cost(state, coeff, width, type, scan_mode); save_accuracy(state->qp, ccc, fast_cost); } return fast_cost; } } else { - uint32_t ccc = get_coeff_cabac_cost(state, coeff, width, type, scan_mode); + double ccc = get_coeff_cabac_cost(state, coeff, width, type, scan_mode); if (save_cccs) { save_ccc(state->qp, coeff, width * width, ccc); } diff --git a/src/rdo.h b/src/rdo.h index 23453eee..794dc42b 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -54,7 +54,7 @@ void kvz_close_rdcost_outfiles(void); void kvz_rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width, int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth); -uint32_t kvz_get_coeff_cost(const encoder_state_t * const state, +double kvz_get_coeff_cost(const encoder_state_t * const state, const coeff_t *coeff, int32_t width, int32_t type, diff --git a/src/search.c b/src/search.c index 3c7ecb4a..381cd946 100644 --- a/src/search.c +++ b/src/search.c @@ -311,13 +311,13 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, // Add transform_tree cbf_luma bit cost. const int is_tr_split = tr_cu->tr_depth - tr_cu->depth; + int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); if (pred_cu->type == CU_INTRA || is_tr_split || cbf_is_set(tr_cu->cbf, depth, COLOR_U) || cbf_is_set(tr_cu->cbf, depth, COLOR_V)) { cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_luma[!is_tr_split]); - int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search"); } @@ -336,7 +336,8 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; - coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode); + if(is_set) + coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode); } double bits = tr_tree_bits + coeff_bits; @@ -366,6 +367,8 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, return 0; } + int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); + int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); // See luma for why the second condition if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) { const int tr_depth = depth - pred_cu->depth; @@ -373,11 +376,9 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_chroma[tr_depth]); cabac->cur_ctx = ctx; if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { - int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search"); } if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { - int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search"); } } @@ -412,8 +413,8 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); - coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order); - coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order); + if(u_is_set)coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order); + if(v_is_set)coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order); } double bits = tr_tree_bits + coeff_bits; @@ -518,7 +519,8 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state, int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; - coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode); + if(cb_flag_y) + coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode); } unsigned chroma_ssd = 0; @@ -540,8 +542,8 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state, int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); - coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], chroma_width, 2, scan_order); - coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], chroma_width, 2, scan_order); + if(cb_flag_u)coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], chroma_width, 2, scan_order); + if (cb_flag_v)coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], chroma_width, 2, scan_order); } } @@ -812,7 +814,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // rd2. Possibly because the luma mode search already takes chroma // into account, so there is less of a chanse of luma mode being // really bad for chroma. - if (ctrl->cfg.rdo == 3) { + if (ctrl->cfg.rdo >= 2 && ctrl->cfg.intra_chroma_search) { cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); } diff --git a/src/search_inter.c b/src/search_inter.c index 34da9f9a..9e624532 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1711,7 +1711,7 @@ static void search_pu_inter(encoder_state_t * const state, merge->unit[merge->size].skipped = false; double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0); - if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { + if(state->encoder_control->cfg.rdo >= 3 && cur_pu->part_size == SIZE_2Nx2N) { kvz_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits); } else { @@ -1739,14 +1739,14 @@ static void search_pu_inter(encoder_state_t * const state, bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400; if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) { for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) { - if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) { + if(cfg->rdo >= 3 && merge->unit[merge->keys[merge_key]].skipped) { merge->size = 1; merge->bits[0] = merge->bits[merge->keys[merge_key]]; merge->cost[0] = merge->cost[merge->keys[merge_key]]; merge->unit[0] = merge->unit[merge->keys[merge_key]]; merge->keys[0] = 0; } - else if(cfg->rdo < 2) { + else if(cfg->rdo < 3) { // Reconstruct blocks with merge candidate. // Check luma CBF. Then, check chroma CBFs if luma CBF is not set // and chroma exists. @@ -1849,7 +1849,7 @@ static void search_pu_inter(encoder_state_t * const state, for (int list = 0; list < 2; ++list) { // TODO: make configurable - int n_best = MIN(1, amvp[list].size); + int n_best = MIN(state->encoder_control->cfg.rdo >= 4 ? 2 : 1, amvp[list].size); if (cfg->fme_level > 0) { for (int i = 0; i < n_best; ++i) { @@ -1894,7 +1894,7 @@ static void search_pu_inter(encoder_state_t * const state, unipred_pu->inter.mv[list][1] = frac_mv.y; CU_SET_MV_CAND(unipred_pu, list, cu_mv_cand); - if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { + if (state->encoder_control->cfg.rdo >= 3 && cur_pu->part_size == SIZE_2Nx2N) { kvz_cu_cost_inter_rd2(state, x, y, depth, unipred_pu, lcu, &frac_cost, &frac_bits); } @@ -1917,7 +1917,7 @@ static void search_pu_inter(encoder_state_t * const state, amvp[list].size = n_best; } - if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N && cfg->fme_level == 0) { + if (state->encoder_control->cfg.rdo >= 3 && cur_pu->part_size == SIZE_2Nx2N && cfg->fme_level == 0) { if (amvp[0].size) kvz_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]); if (amvp[1].size) kvz_cu_cost_inter_rd2(state, x, y, depth, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]]); } @@ -2019,13 +2019,12 @@ static void search_pu_inter(encoder_state_t * const state, amvp[2].size++; } } - - // TODO: this probably should have a separate command line option - if (cfg->rdo == 3) search_pu_inter_bipred(info, depth, lcu, &amvp[2]); + + if (!cfg->fast_bipred) search_pu_inter_bipred(info, depth, lcu, &amvp[2]); assert(amvp[2].size <= MAX_UNIT_STATS_MAP_SIZE); kvz_sort_keys_by_cost(&amvp[2]); - if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { + if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 3 && cur_pu->part_size == SIZE_2Nx2N) { kvz_cu_cost_inter_rd2(state, x, y, depth, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]]); } } @@ -2238,9 +2237,21 @@ void kvz_search_cu_inter(encoder_state_t * const state, const int y_local = SUB_SCU(y); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); *cur_pu = *best_inter_pu; - - kvz_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), - true, state->encoder_control->chroma_format != KVZ_CSP_400); + // Calculate more accurate cost when needed + if (state->encoder_control->cfg.rdo == 2) { + kvz_cu_cost_inter_rd2(state, + x, y, depth, + cur_pu, + lcu, + inter_cost, + inter_bitcost); + kvz_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), + true, state->encoder_control->chroma_format != KVZ_CSP_400); + } + else { + kvz_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), + true, state->encoder_control->chroma_format != KVZ_CSP_400); + } if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) { assert(fracmv_within_tile(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1])); diff --git a/src/search_intra.c b/src/search_intra.c index ece46520..af2f235c 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -768,10 +768,11 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, // is always one of the modes, so 2 means the final decision is made // between luma mode and one other mode that looks the best // according to search_intra_chroma_rough. - const int8_t modes_in_depth[5] = { 1, 1, 1, 1, 2 }; - int num_modes = modes_in_depth[depth]; - - if (state->encoder_control->cfg.rdo == 3) { + int num_modes = 2; + if(state->encoder_control->cfg.rdo >= 4 && depth == 4) { + num_modes = 5; + } + if (state->encoder_control->cfg.rdo >= 5) { num_modes = 5; } @@ -818,7 +819,6 @@ void kvz_search_cu_intra(encoder_state_t * const state, int8_t *mode_out, double *cost_out) { const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; - const int8_t cu_width = LCU_WIDTH >> depth; const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth; cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); @@ -853,7 +853,7 @@ void kvz_search_cu_intra(encoder_state_t * const state, kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; int8_t number_of_modes; - bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 3); + bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 5); if (!skip_rough_search) { number_of_modes = search_intra_rough(state, ref_pixels, LCU_WIDTH, @@ -874,10 +874,14 @@ void kvz_search_cu_intra(encoder_state_t * const state, const int32_t rdo_level = state->encoder_control->cfg.rdo; if (rdo_level >= 2 || skip_rough_search) { int number_of_modes_to_search; - if (rdo_level == 3) { + if (rdo_level == 5) { number_of_modes_to_search = 35; - } else if (rdo_level == 2) { - number_of_modes_to_search = (cu_width == 4) ? 3 : 2; + } else if(rdo_level >= 4) { + const int rdo_candidate_modes[] = { 5, 5, 5, 6, 7 }; + assert(depth >= 0 && depth <= 4); + number_of_modes_to_search = rdo_candidate_modes[depth]; + } else if (rdo_level >= 2) { + number_of_modes_to_search = depth == 4 ? 3 : 2; } else { // Check only the predicted modes. number_of_modes_to_search = 0; diff --git a/src/strategies/avx2/encode_coding_tree-avx2.c b/src/strategies/avx2/encode_coding_tree-avx2.c index 67d8e9e0..9d060b50 100644 --- a/src/strategies/avx2/encode_coding_tree-avx2.c +++ b/src/strategies/avx2/encode_coding_tree-avx2.c @@ -252,7 +252,8 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, uint8_t width, uint8_t type, int8_t scan_mode, - int8_t tr_skip) + int8_t tr_skip, + double* bits_out) { const encoder_control_t * const encoder = state->encoder_control; int c1 = 1; @@ -260,6 +261,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, uint8_t last_coeff_y = 0; int32_t i; uint32_t sig_coeffgroup_nzs[8 * 8] = { 0 }; + double bits = 0; int8_t be_valid = encoder->cfg.signhide_enable; int32_t scan_pos_sig; @@ -361,7 +363,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, // transform skip flag if(width == 4 && encoder->cfg.trskip_enable) { cabac->cur_ctx = (type == 0) ? &(cabac->ctx.transform_skip_model_luma) : &(cabac->ctx.transform_skip_model_chroma); - CABAC_BIN(cabac, tr_skip, "transform_skip_flag"); + CABAC_FBITS_UPDATE(cabac, cabac->cur_ctx, tr_skip, bits, "transform_skip_flag"); } last_coeff_x = pos_last & (width - 1); @@ -374,7 +376,8 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, width, width, type, - scan_mode); + scan_mode, + bits_out); scan_pos_sig = scan_pos_last; @@ -406,8 +409,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, uint32_t sig_coeff_group = (sig_coeffgroup_nzs[cg_blk_pos] != 0); uint32_t ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_nzs, cg_pos_x, cg_pos_y, width); - cabac->cur_ctx = &base_coeff_group_ctx[ctx_sig]; - CABAC_BIN(cabac, sig_coeff_group, "coded_sub_block_flag"); + CABAC_FBITS_UPDATE(cabac, &base_coeff_group_ctx[ctx_sig], sig_coeff_group, bits, "coded_sub_block_flag"); } if (sig_coeffgroup_nzs[cg_blk_pos]) { @@ -464,8 +466,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, if (curr_esc_flag | num_non_zero) { ctx_sig = ctx_sig_buf[id]; - cabac->cur_ctx = &baseCtx[ctx_sig]; - CABAC_BIN(cabac, curr_sig, "sig_coeff_flag"); + CABAC_FBITS_UPDATE(cabac, &baseCtx[ctx_sig], curr_sig, bits, "sig_coeff_flag"); } if (curr_sig) { @@ -519,8 +520,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, uint32_t shift = idx << 1; uint32_t symbol = (coeffs_gt1_bits >> shift) & 1; - cabac->cur_ctx = &base_ctx_mod[c1]; - CABAC_BIN(cabac, symbol, "coeff_abs_level_greater1_flag"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_mod[c1], symbol, bits, "coeff_abs_level_greater1_flag"); c1 = (c1s_nextiter >> shift) & 3; } @@ -532,9 +532,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, if (first_c2_flag_idx != -1) { uint32_t shift = (first_c2_flag_idx << 1) + 1; uint8_t symbol = (coeffs_gt2_bits >> shift) & 1; - cabac->cur_ctx = &base_ctx_mod[0]; - - CABAC_BIN(cabac, symbol, "coeff_abs_level_greater2_flag"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_mod[0], symbol, bits, "coeff_abs_level_greater2_flag"); } } int32_t shiftamt = (be_valid && sign_hidden) ? 1 : 0; @@ -546,6 +544,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, } } CABAC_BINS_EP(cabac, coeff_signs, nnz, "coeff_sign_flag"); + if (cabac->only_count) bits += nnz; if (c1 == 0 || num_non_zero > C1FLAG_NUMBER) { @@ -586,7 +585,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, if (!cabac->only_count && (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFFS)) { kvz_cabac_write_coeff_remain_encry(state, cabac, level_diff, go_rice_param, base_level); } else { - kvz_cabac_write_coeff_remain(cabac, level_diff, go_rice_param); + bits += kvz_cabac_write_coeff_remain(cabac, level_diff, go_rice_param); } if (curr_abs_coeff > 3 * (1 << go_rice_param)) { @@ -602,6 +601,7 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, num_non_zero = 0; coeff_signs = 0; } + if (cabac->only_count) *bits_out += bits; } #endif // COMPILE_INTEL_AVX2 diff --git a/src/strategies/avx2/encode_coding_tree-avx2.h b/src/strategies/avx2/encode_coding_tree-avx2.h index 47f69d4f..dc57ee1d 100644 --- a/src/strategies/avx2/encode_coding_tree-avx2.h +++ b/src/strategies/avx2/encode_coding_tree-avx2.h @@ -47,7 +47,8 @@ void kvz_encode_coeff_nxn_avx2(encoder_state_t * const state, uint8_t width, uint8_t type, int8_t scan_mode, - int8_t tr_skip); + int8_t tr_skip, + double* bits_out); int kvz_strategy_register_encode_avx2(void* opaque, uint8_t bitdepth); diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index d78ca55d..2be114b4 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -805,7 +805,7 @@ static uint32_t coeff_abs_sum_avx2(const coeff_t *coeffs, const size_t length) return parts[0] + parts[1] + parts[2] + parts[3]; } -static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64_t weights) +static double fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64_t weights) { const __m256i zero = _mm256_setzero_si256(); const __m256i threes = _mm256_set1_epi16(3); @@ -859,7 +859,8 @@ static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64 __m256i sum4 = _mm256_add_epi64 (sum2, sum3); __m128i sum128 = _mm256_castsi256_si128 (sum4); - return (_mm_cvtsi128_si32(sum128) + (1 << 7)) >> 8; + uint32_t temp = _mm_cvtsi128_si32(sum128); + return (double)(temp) / 256.0; } #endif //COMPILE_INTEL_AVX2 && defined X86_64 diff --git a/src/strategies/generic/encode_coding_tree-generic.c b/src/strategies/generic/encode_coding_tree-generic.c index bed6cb7f..1d2b78c7 100644 --- a/src/strategies/generic/encode_coding_tree-generic.c +++ b/src/strategies/generic/encode_coding_tree-generic.c @@ -43,9 +43,11 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, uint8_t width, uint8_t type, int8_t scan_mode, - int8_t tr_skip) + int8_t tr_skip, + double* bits_out) { const encoder_control_t * const encoder = state->encoder_control; + double bits = 0; int c1 = 1; uint8_t last_coeff_x = 0; uint8_t last_coeff_y = 0; @@ -111,7 +113,7 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, // transform skip flag if(width == 4 && encoder->cfg.trskip_enable) { cabac->cur_ctx = (type == 0) ? &(cabac->ctx.transform_skip_model_luma) : &(cabac->ctx.transform_skip_model_chroma); - CABAC_BIN(cabac, tr_skip, "transform_skip_flag"); + CABAC_FBITS_UPDATE(cabac, cabac->cur_ctx, tr_skip, bits, "transform_skip_flag"); } last_coeff_x = pos_last & (width - 1); @@ -124,7 +126,8 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, width, width, type, - scan_mode); + scan_mode, + bits_out); scan_pos_sig = scan_pos_last; @@ -157,8 +160,7 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, uint32_t sig_coeff_group = (sig_coeffgroup_flag[cg_blk_pos] != 0); uint32_t ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); - cabac->cur_ctx = &base_coeff_group_ctx[ctx_sig]; - CABAC_BIN(cabac, sig_coeff_group, "coded_sub_block_flag"); + CABAC_FBITS_UPDATE(cabac, &base_coeff_group_ctx[ctx_sig], sig_coeff_group, bits, "coded_sub_block_flag"); } if (sig_coeffgroup_flag[cg_blk_pos]) { @@ -174,8 +176,7 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, if (scan_pos_sig > sub_pos || i == 0 || num_non_zero) { ctx_sig = kvz_context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y, log2_block_size, type); - cabac->cur_ctx = &baseCtx[ctx_sig]; - CABAC_BIN(cabac, sig, "sig_coeff_flag"); + CABAC_FBITS_UPDATE(cabac, &baseCtx[ctx_sig], sig, bits, "sig_coeff_flag"); } if (sig) { @@ -214,8 +215,7 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, for (idx = 0; idx < num_c1_flag; idx++) { uint32_t symbol = (abs_coeff[idx] > 1) ? 1 : 0; - cabac->cur_ctx = &base_ctx_mod[c1]; - CABAC_BIN(cabac, symbol, "coeff_abs_level_greater1_flag"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_mod[c1], symbol, bits, "coeff_abs_level_greater1_flag"); if (symbol) { c1 = 0; @@ -234,8 +234,7 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, if (first_c2_flag_idx != -1) { uint8_t symbol = (abs_coeff[first_c2_flag_idx] > 2) ? 1 : 0; - cabac->cur_ctx = &base_ctx_mod[0]; - CABAC_BIN(cabac, symbol, "coeff_abs_level_greater2_flag"); + CABAC_FBITS_UPDATE(cabac, &base_ctx_mod[0], symbol, bits, "coeff_abs_level_greater2_flag"); } } if (be_valid && sign_hidden) { @@ -245,11 +244,13 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, coeff_signs = coeff_signs ^ kvz_crypto_get_key(state->crypto_hdl, num_non_zero-1); } CABAC_BINS_EP(cabac, coeff_signs , (num_non_zero - 1), "coeff_sign_flag"); + if (cabac->only_count) bits += num_non_zero - 1; } else { if (!cabac->only_count) if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) coeff_signs = coeff_signs ^ kvz_crypto_get_key(state->crypto_hdl, num_non_zero); CABAC_BINS_EP(cabac, coeff_signs, num_non_zero, "coeff_sign_flag"); + if (cabac->only_count) bits += num_non_zero; } if (c1 == 0 || num_non_zero > C1FLAG_NUMBER) { @@ -263,9 +264,9 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFFS) kvz_cabac_write_coeff_remain_encry(state, cabac, abs_coeff[idx] - base_level, go_rice_param, base_level); else - kvz_cabac_write_coeff_remain(cabac, abs_coeff[idx] - base_level, go_rice_param); + bits += kvz_cabac_write_coeff_remain(cabac, abs_coeff[idx] - base_level, go_rice_param); } else - kvz_cabac_write_coeff_remain(cabac, abs_coeff[idx] - base_level, go_rice_param); + bits += kvz_cabac_write_coeff_remain(cabac, abs_coeff[idx] - base_level, go_rice_param); if (abs_coeff[idx] > 3 * (1 << go_rice_param)) { go_rice_param = MIN(go_rice_param + 1, 4); @@ -279,6 +280,7 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, } } } + if (cabac->only_count) *bits_out += bits; } int kvz_strategy_register_encode_generic(void* opaque, uint8_t bitdepth) diff --git a/src/strategies/generic/encode_coding_tree-generic.h b/src/strategies/generic/encode_coding_tree-generic.h index 1f5b2ea3..beed8bbc 100644 --- a/src/strategies/generic/encode_coding_tree-generic.h +++ b/src/strategies/generic/encode_coding_tree-generic.h @@ -47,7 +47,8 @@ void kvz_encode_coeff_nxn_generic(encoder_state_t * const state, uint8_t width, uint8_t type, int8_t scan_mode, - int8_t tr_skip); + int8_t tr_skip, + double* bits_out); int kvz_strategy_register_encode_generic(void* opaque, uint8_t bitdepth); diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index 24bd73de..96045fda 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -356,7 +356,7 @@ static INLINE void get_coeff_weights(uint64_t wts_packed, uint16_t *weights) weights[3] = (wts_packed >> 48) & 0xffff; } -static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, uint64_t weights) +static double fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, uint64_t weights) { uint32_t sum = 0; uint16_t weights_unpacked[4]; @@ -371,7 +371,7 @@ static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, uin } sum += weights_unpacked[curr_abs]; } - return (sum + (1 << 7)) >> 8; + return (double) sum / 256.0; } int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth) diff --git a/src/strategies/strategies-encode.h b/src/strategies/strategies-encode.h index 1c140aae..a3b27e49 100644 --- a/src/strategies/strategies-encode.h +++ b/src/strategies/strategies-encode.h @@ -52,7 +52,8 @@ typedef void (encode_coeff_nxn_func)(encoder_state_t * const state, uint8_t width, uint8_t type, int8_t scan_mode, - int8_t tr_skip); + int8_t tr_skip, + double *bits_out); // Declare function pointers. extern encode_coeff_nxn_func *kvz_encode_coeff_nxn; diff --git a/src/strategies/strategies-quant.h b/src/strategies/strategies-quant.h index 4e3966f7..7cc1d7f1 100644 --- a/src/strategies/strategies-quant.h +++ b/src/strategies/strategies-quant.h @@ -56,7 +56,7 @@ typedef int32_t (quant_residual_func)(encoder_state_t *const state, bool early_skip); typedef void (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height, int8_t type, int8_t block_type); -typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights); +typedef double (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights); typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length);