mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Add new SmearClip filter, and make it the default.
Also removes haloclip's separate gamma; instead it will use colorclip's gamma setting. Also expanded side buffer to full size.
This commit is contained in:
parent
8a6c238cf8
commit
31234b986e
@ -108,6 +108,26 @@ __global__ void den_blur_1c(float *dst, int pattern, int upsample) {
|
|||||||
}
|
}
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
|
||||||
|
fullblurlib = devlib(deps=[denblurlib], defs=r'''
|
||||||
|
__global__ void full_blur(float4 *dst, int pattern, int upsample) {
|
||||||
|
GET_IDX_2(xi, yi, gi);
|
||||||
|
float x = xi, y = yi;
|
||||||
|
|
||||||
|
float4 val = make_float4(0, 0, 0, 0);
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 0; i < 7; i++) {
|
||||||
|
float4 pix = tex_shear(chan4_src, pattern, x, y, (i - 3) << upsample);
|
||||||
|
val.x += pix.x * gauss_coefs[i];
|
||||||
|
val.y += pix.y * gauss_coefs[i];
|
||||||
|
val.z += pix.z * gauss_coefs[i];
|
||||||
|
val.w += pix.w * gauss_coefs[i];
|
||||||
|
}
|
||||||
|
dst[gi] = val;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], defs=r'''
|
bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], defs=r'''
|
||||||
/* sstd: spatial standard deviation (Gaussian filter)
|
/* sstd: spatial standard deviation (Gaussian filter)
|
||||||
* cstd: color standard deviation (Gaussian on the range [0, 1], where 1
|
* cstd: color standard deviation (Gaussian on the range [0, 1], where 1
|
||||||
@ -225,12 +245,11 @@ halocliplib = devlib(deps=[yuvlib, denblurlib], defs=r'''
|
|||||||
__global__ void apply_gamma(float *dst, float4 *src, float gamma) {
|
__global__ void apply_gamma(float *dst, float4 *src, float gamma) {
|
||||||
GET_IDX(i);
|
GET_IDX(i);
|
||||||
float4 pix = src[i];
|
float4 pix = src[i];
|
||||||
float ls = powf(fmaxf(0.0f, src[i].z), gamma);
|
dst[i] = powf(pix.x, gamma);
|
||||||
dst[i] = ls * pix.x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void
|
__global__ void
|
||||||
haloclip(float4 *pixbuf, const float *denbuf, float gamma) {
|
haloclip(float4 *pixbuf, const float *denbuf, float gamma_m_1) {
|
||||||
GET_IDX(i);
|
GET_IDX(i);
|
||||||
float4 pix = pixbuf[i];
|
float4 pix = pixbuf[i];
|
||||||
float areaval = denbuf[i];
|
float areaval = denbuf[i];
|
||||||
@ -240,12 +259,45 @@ haloclip(float4 *pixbuf, const float *denbuf, float gamma) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
float ls = powf(pix.z, gamma) / fmaxf(1.0f, areaval);
|
float ls = powf(pix.w, gamma_m_1) / fmaxf(1.0f, areaval);
|
||||||
|
|
||||||
scale_float4(pix, ls);
|
scale_float4(pix, ls);
|
||||||
|
|
||||||
yuvo2rgb(pix);
|
yuvo2rgb(pix);
|
||||||
|
pixbuf[i] = pix;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
smearcliplib = devlib(deps=[yuvlib, fullblurlib], defs=r'''
|
||||||
|
// Apply gamma to all four pixels. Subtract one from the result, and clamp at
|
||||||
|
// a minimum of 0.
|
||||||
|
__global__ void apply_gamma_full_hi(float4 *dst, float4 *src, float gamma_m_1) {
|
||||||
|
GET_IDX(i);
|
||||||
|
float4 pix = src[i];
|
||||||
|
float ls = 0.0f;
|
||||||
|
if (pix.w > 0.0f)
|
||||||
|
ls = fmaxf(0.0f, pix.w - 1.0f) / pix.w;
|
||||||
|
scale_float4(pix, ls);
|
||||||
|
dst[i] = pix;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void
|
||||||
|
smearclip(float4 *pixbuf, const float4 *smearbuf, float gamma_m_1) {
|
||||||
|
GET_IDX(i);
|
||||||
|
float4 pix = pixbuf[i];
|
||||||
|
float4 areaval = smearbuf[i];
|
||||||
|
|
||||||
|
pix.x += areaval.x;
|
||||||
|
pix.y += areaval.y;
|
||||||
|
pix.z += areaval.z;
|
||||||
|
pix.w += areaval.w;
|
||||||
|
|
||||||
|
if (pix.w <= 0) {
|
||||||
|
pixbuf[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
float ls = powf(pix.w, gamma_m_1);
|
||||||
|
scale_float4(pix, ls);
|
||||||
|
yuvo2rgb(pix);
|
||||||
pixbuf[i] = pix;
|
pixbuf[i] = pix;
|
||||||
}
|
}
|
||||||
''')
|
''')
|
||||||
|
@ -28,6 +28,8 @@ def mkdsc(dim, ch):
|
|||||||
format=cuda.array_format.FLOAT)
|
format=cuda.array_format.FLOAT)
|
||||||
|
|
||||||
class Filter(object):
|
class Filter(object):
|
||||||
|
# Set to True if the filter requires a full 4-channel side buffer
|
||||||
|
full_side = False
|
||||||
def apply(self, fb, gprof, params, dim, tc, stream=None):
|
def apply(self, fb, gprof, params, dim, tc, stream=None):
|
||||||
"""
|
"""
|
||||||
Queue the application of this filter. When the live stream finishes
|
Queue the application of this filter. When the live stream finishes
|
||||||
@ -92,23 +94,43 @@ class Logscale(Filter, ClsMod):
|
|||||||
class HaloClip(Filter, ClsMod):
|
class HaloClip(Filter, ClsMod):
|
||||||
lib = code.filters.halocliplib
|
lib = code.filters.halocliplib
|
||||||
def apply(self, fb, gprof, params, dim, tc, stream=None):
|
def apply(self, fb, gprof, params, dim, tc, stream=None):
|
||||||
gam = f32(1 / params.gamma(tc) - 1)
|
gam = f32(1 / gprof.filters.colorclip.gamma(tc) - 1)
|
||||||
|
|
||||||
dsc = mkdsc(dim, 1)
|
dsc = mkdsc(dim, 1)
|
||||||
tref = mktref(self.mod, 'chan1_src')
|
tref = mktref(self.mod, 'chan1_src')
|
||||||
|
|
||||||
set_blur_width(self.mod, fb.pool, stream=stream)
|
set_blur_width(self.mod, fb.pool, stream=stream)
|
||||||
launch2('apply_gamma', self.mod, stream, dim,
|
launch2('apply_gamma', self.mod, stream, dim,
|
||||||
fb.d_side, fb.d_front, gam)
|
fb.d_side, fb.d_front, f32(0.1))
|
||||||
tref.set_address_2d(fb.d_side, dsc, 4 * params.astride)
|
tref.set_address_2d(fb.d_side, dsc, 4 * dim.astride)
|
||||||
launch2('den_blur_1c', self.mod, stream, dim,
|
launch2('den_blur_1c', self.mod, stream, dim,
|
||||||
fb.d_back, i32(0), i32(0), texrefs=[tref])
|
fb.d_back, i32(2), i32(0), texrefs=[tref])
|
||||||
tref.set_address_2d(fb.d_back, dsc, 4 * params.astride)
|
tref.set_address_2d(fb.d_back, dsc, 4 * dim.astride)
|
||||||
launch2('den_blur_1c', self.mod, stream, dim,
|
launch2('den_blur_1c', self.mod, stream, dim,
|
||||||
fb.d_side, i32(1), i32(0), texrefs=[tref])
|
fb.d_side, i32(3), i32(0), texrefs=[tref])
|
||||||
|
|
||||||
launch2('haloclip', self.mod, stream, dim,
|
launch2('haloclip', self.mod, stream, dim,
|
||||||
fb.d_front, fb.d_side)
|
fb.d_front, fb.d_side, gam)
|
||||||
|
|
||||||
|
class SmearClip(Filter, ClsMod):
|
||||||
|
full_side = True
|
||||||
|
lib = code.filters.smearcliplib
|
||||||
|
def apply(self, fb, gprof, params, dim, tc, stream=None):
|
||||||
|
gam = f32(1 / gprof.filters.colorclip.gamma(tc) - 1)
|
||||||
|
dsc = mkdsc(dim, 4)
|
||||||
|
tref = mktref(self.mod, 'chan4_src')
|
||||||
|
|
||||||
|
set_blur_width(self.mod, fb.pool, params.width(tc), stream)
|
||||||
|
launch2('apply_gamma_full_hi', self.mod, stream, dim,
|
||||||
|
fb.d_side, fb.d_front, gam)
|
||||||
|
tref.set_address_2d(fb.d_side, dsc, 16 * dim.astride)
|
||||||
|
launch2('full_blur', self.mod, stream, dim,
|
||||||
|
fb.d_back, i32(2), i32(0), texrefs=[tref])
|
||||||
|
tref.set_address_2d(fb.d_back, dsc, 16 * dim.astride)
|
||||||
|
launch2('full_blur', self.mod, stream, dim,
|
||||||
|
fb.d_side, i32(3), i32(0), texrefs=[tref])
|
||||||
|
launch2('smearclip', self.mod, stream, dim,
|
||||||
|
fb.d_front, fb.d_side, gam)
|
||||||
|
|
||||||
class ColorClip(Filter, ClsMod):
|
class ColorClip(Filter, ClsMod):
|
||||||
lib = code.filters.colorcliplib
|
lib = code.filters.colorcliplib
|
||||||
@ -124,6 +146,6 @@ class ColorClip(Filter, ClsMod):
|
|||||||
|
|
||||||
# Ungainly but practical.
|
# Ungainly but practical.
|
||||||
filter_map = dict(bilateral=Bilateral, logscale=Logscale, haloclip=HaloClip,
|
filter_map = dict(bilateral=Bilateral, logscale=Logscale, haloclip=HaloClip,
|
||||||
colorclip=ColorClip)
|
colorclip=ColorClip, smearclip=SmearClip)
|
||||||
def create(gprof):
|
def create(gprof):
|
||||||
return [filter_map[f]() for f in gprof.filter_order]
|
return [filter_map[f]() for f in gprof.filter_order]
|
||||||
|
@ -51,7 +51,8 @@ filters = (
|
|||||||
, 'minimum': scalespline(0, max=1, d='Proportional min radius')
|
, 'minimum': scalespline(0, max=1, d='Proportional min radius')
|
||||||
, 'curve': scalespline(0.6, d='Power of filter radius with density')
|
, 'curve': scalespline(0.6, d='Power of filter radius with density')
|
||||||
}
|
}
|
||||||
, 'haloclip': {'gamma': scalespline(4)}
|
, 'haloclip': {}
|
||||||
|
, 'smearclip': {'width': scalespline(0.75, d='Spatial stdev of filter')}
|
||||||
, 'logscale': {'brightness': scalespline(4, d='Log-scale brightness')}
|
, 'logscale': {'brightness': scalespline(4, d='Log-scale brightness')}
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -96,7 +97,7 @@ edge.update(type='edge', author=author, blend=blend, link=link, time=time,
|
|||||||
anim = dict(base)
|
anim = dict(base)
|
||||||
anim.update(type='animation', authors=list_(author), link=link, time=time)
|
anim.update(type='animation', authors=list_(author), link=link, time=time)
|
||||||
|
|
||||||
default_filters = ['bilateral', 'logscale', 'colorclip']
|
default_filters = ['bilateral', 'logscale', 'smearclip']
|
||||||
# Yeah, now I'm just messing around.
|
# Yeah, now I'm just messing around.
|
||||||
prof_filters = dict([(fk, dict([(k, refscalar(1, '.'.join(['filters', fk, k])))
|
prof_filters = dict([(fk, dict([(k, refscalar(1, '.'.join(['filters', fk, k])))
|
||||||
for k in fv])) for fk, fv in filters.items()])
|
for k in fv])) for fk, fv in filters.items()])
|
||||||
|
@ -113,7 +113,7 @@ class Framebuffers(object):
|
|||||||
try:
|
try:
|
||||||
self.d_front = cuda.mem_alloc(16 * nbins)
|
self.d_front = cuda.mem_alloc(16 * nbins)
|
||||||
self.d_back = cuda.mem_alloc(16 * nbins)
|
self.d_back = cuda.mem_alloc(16 * nbins)
|
||||||
self.d_side = cuda.mem_alloc(8 * nbins)
|
self.d_side = cuda.mem_alloc(16 * nbins)
|
||||||
self.nbins = nbins
|
self.nbins = nbins
|
||||||
except cuda.MemoryError, e:
|
except cuda.MemoryError, e:
|
||||||
# If a frame that's too large sneaks by the task distributor, we
|
# If a frame that's too large sneaks by the task distributor, we
|
||||||
|
Loading…
Reference in New Issue
Block a user