Add YUV444P12 support

2025-07-03 14:55:18 -04:00 · 2015-10-10 16:00:27 -07:00
parent 0ce1b51d16
commit f93b4dbf23
2 changed files with 38 additions and 1 deletions
--- a/cuburn/code/output.py
+++ b/cuburn/code/output.py
@ -188,5 +188,37 @@ __global__ void f32_to_yuv420p10(
    rctxs[rb_incr(rb->tail, tid)] = rctx;
 }
 // Convert from rgb444 to planar YUV 10-bit, using JPEG full-range primaries.
 // TODO(strobe): Share more code.
 __global__ void f32_to_yuv444p12(
    uint16_t *dst, const float4 *src,
    int gutter, int dstride, int sstride, int height,
    ringbuf *rb, mwc_st *rctxs)
 {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x > dstride || y > height) return;
    int isrc = sstride * (y + gutter) + x + gutter;
    int tid = blockDim.x * threadIdx.y + threadIdx.x;
    mwc_st rctx = rctxs[rb_incr(rb->head, tid)];
    float4 in = src[isrc];
    ushort3 out = make_ushort3(
        dclampf(rctx, 4095.0f, 0.299f      * in.x + 0.587f     * in.y + 0.114f     * in.z),
        dclampf(rctx, 4095.0f, -0.168736f  * in.x - 0.331264f  * in.y + 0.5f       * in.z + 0.5f),
        dclampf(rctx, 4095.0f, 0.5f        * in.x - 0.418688f  * in.y - 0.081312f  * in.z + 0.5f)
    );
    int idst = dstride * y + x;
    dst[idst] = out.x;
    idst += dstride * height;
    dst[idst] = out.y;
    idst += dstride * height;
    dst[idst] = out.z;
    rctxs[rb_incr(rb->tail, tid)] = rctx;
 }
 ''')
--- a/cuburn/output.py
+++ b/cuburn/output.py
@ -260,6 +260,11 @@ class VPxOutput(Output, ClsMod):
                self.out_filter = 'f32_to_yuv444p10'
                self.args += ['-b', '10', '--input-bit-depth=10',
                              '--profile=3', '--i444']
            elif pix_fmt == 'yuv444p12':
                assert codec == 'vp9'
                self.out_filter = 'f32_to_yuv444p12'
                self.args += ['-b', '12', '--input-bit-depth=12',
                              '--profile=3', '--i444']
            else:
                raise ValueError('Invalid pix_fmt: ' + pix_fmt)
        self.args += ['--codec=' + codec, '--cq-level=' + str(crf), '--fps=%d/1' % fps]
@ -273,7 +278,7 @@ class VPxOutput(Output, ClsMod):
    def copy(self, fb, dim, pool, stream=None):
        fmt = 'u1'
-        if self.pix_fmt in ('yuv444p10', 'yuv420p10'):
+        if self.pix_fmt in ('yuv444p10', 'yuv420p10', 'yuv444p12'):
            fmt = 'u2'
        dims =  (3, dim.h, dim.w)
        if self.pix_fmt == 'yuv420p10':