DCE tricks

Libav relies heavily on the compiler performing DCE in order to simplify the code.

Optional code selection

Optional code such as arch-specific optimization is usually called adding an always-true/always-false branch depending on a specific configure symbol.

av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
{
    ctx->prefetch = just_return;
    if (bpc <= 8) {
        ctx->emulated_edge_mc = ff_emulated_edge_mc_8;
    } else {
        ctx->emulated_edge_mc = ff_emulated_edge_mc_16;
    }

    if (ARCH_ARM)
        ff_videodsp_init_arm(ctx, bpc);
    if (ARCH_PPC)
        ff_videodsp_init_ppc(ctx, bpc);
    if (ARCH_X86)
        ff_videodsp_init_x86(ctx, bpc);
}

Common codepaths with variants

Many codepaths can differ by small branches scattered around a common structure.

static void foo(Context *c, uint8_t *dest, uint8_t *src)
{
    int pattern = c->context;
    int i;

    if (pattern == BAR) {
        .... something ....
    }

    .... common code....
    switch (c->state):
    case A:
        if (pattern == BAR) {
            .... something specific ....
        }
        break;
    case B:
        .... common ....
        break;
    case C:
        for (i = 0; i < c->loop; i++) {
            if (pattern != BAR) {
                .... something different ....
            }
            .... common code ....
            if (pattern == BAR) {
                .... something specific ....
            }
        }
    }

The branches might happen inside tight loops and kill the performance. In order to avoid the problem is possible duplicating the code and have two functions one for the pattern == BAR case and one for the other. In order to avoid source code duplication, an av_always_inline core function is implemented, using the _internal postfix, with an additional parameter for the branch clause and the former function is now just the branch calling the _internal hardcoding the branch parameter.

static av_always_inline void foo_internal(Context *c,
                                          uint8_t *dest, uint8_t *src,
                                          int is_bar)
{
    int i;

    if (is_bar) {
        .... something ....
    }

    .... common code....
    switch (c->state):
    case A:
        if (is_bar) {
            .... something specific ....
        }
        break;
    case B:
        .... common ....
        break;
    case C:
        for (i = 0; i < c->loop; i++) {
            if (!is_bar) {
                .... something different ....
            }
            .... common code ....
            if (is_bar) {
                .... something specific ....
            }
        }
    }

    .....
}

static void foo(Context *c, uint8_t *dst, uint8_t *src)
{
    if (c->pattern == BAR)
        foo_internal(c, dst, src, 1);
    else
        foo_internal(c, dst, src, 0);

}

Constant propagation, forced inlining and DCE ensure foo() does not have any branch beside the initial one.

Real code example:

static av_always_inline void MPV_motion_internal(MpegEncContext *s,
                                                 uint8_t *dest_y,
                                                 uint8_t *dest_cb,
                                                 uint8_t *dest_cr,
                                                 int dir,
                                                 uint8_t **ref_picture,
                                                 op_pixels_func (*pix_op)[4],
                                                 qpel_mc_func (*qpix_op)[16],
                                                 int is_mpeg12)
{
    int i;
    int mb_y = s->mb_y;

    prefetch_motion(s, ref_picture, dir);

    if (!is_mpeg12 && s->obmc && s->pict_type != AV_PICTURE_TYPE_B) {
        apply_obmc(s, dest_y, dest_cb, dest_cr, ref_picture, pix_op);
        return;
    ...

obmc does apply only on non-mpeg12 decoding, we can use DCE to omit the branch completely hardcoding is_mpeg12 to 1

void ff_MPV_motion(MpegEncContext *s,
                   uint8_t *dest_y, uint8_t *dest_cb,
                   uint8_t *dest_cr, int dir,
                   uint8_t **ref_picture,
                   op_pixels_func (*pix_op)[4],
                   qpel_mc_func (*qpix_op)[16])
{
#if !CONFIG_SMALL
    if (s->out_format == FMT_MPEG1)
        MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
                            ref_picture, pix_op, qpix_op, 1);
    else
#endif
        MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
                            ref_picture, pix_op, qpix_op, 0);
}

For performance reasons ff_MPV_motion could implement a dedicated codepath for FMT_MPEG1 omitting all the branches not involving mpeg12 decoding.