src/filter/colgate/colgate.c - frei0r (0b26ea1)

Tree @0b26ea1 (Download .tar.gz)

colgate.c @0b26ea1 — raw · history · blame

/* colgate.c
 * Copyright (C) 2012 Steinar H. Gunderson <sgunderson@bigfoot.com>
 *
 * Color correction in LMS color space.
 *
 * This plugin is intended for the same uses as the balanc0r plugin,
 * but differs from balanc0r in two important aspects:
 *
 *   1. It operates in the LMS color space, which is a better approximation
 *      to the human color sensation than RGB is. This allows for more
 *      natural color changes. (Note that this inevitably requires that we
 *      we work in linear color space, not the nonlinear space pixels are
 *      typically stored in.)
 *
 *   2. Its choice of neutral color is not limited by the Planckian locus
 *      (typically expressed in Kelvins) like balanc0r is; you can choose
 *      any color, even those that are not typically regarded as “white”.
 *      If you want a color cast (e.g. a warmer scene), this can be
 *      adjusted with a separate control for color temperature.
 *
 * Color is a very complex topic, and this plugin makes no claims to
 * being 100% correct in any way; however, the results appear visually
 * much more meaningful to me than the balanc0r plugin does, although it
 * also uses significantly more CPU time.
 *
 * frei0r plugins are not given any meaningful information about input or
 * output color space. We assume sRGB, since that typically matches people's
 * viewing devices pretty well. sRGB in turn very closely matches ITU-R Rec.
 * BT.709, the standard color space for HDTV; they share the same RGB
 * primaries, and have a similar gamma (2.35 or 2.4, versus sRGB's curve that
 * is approximately a gamma curve at 2.2). SDTV uses a different color space,
 * ITU-R Rec. BT.601, which has somewhat different primaries and a gamma of
 * 2.2, but in practice, sRGB should be an okay approximation for this as well.
 *
 * The color matrices used are typically from Wikipedia, and the inverses
 * are computed by Octave if nothing else is mentioned.
 *
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * We use fixed point, since conversion back and forth to floating-point is
 * slow. (This also enables us to use LUTs in an efficient way for lookup
 * to and from sRGB, as opposed to a pow()-based solution, which is very slow.)
 *
 * We need to think a bit about the range and precision of the different
 * elements to get the best results here, since linear RGB can span quite
 * some range. So:
 *
 *  - Input pixels (which are in linear RGB, always 0..1) are stored as 1.15.
 *  - Matrix elements are s4.10.
 *
 * Matrix elements up to +/- 16 should give us some headroom; extreme color
 * adjustments typically have elements of 5-6.
 *
 * Our standard operation is input_pixel * matrix_element, which becomes s5.25.
 * We add three of them, which would seem to be able to overflow, but doesn't,
 * since the largest pixel is 1.0 (represented as 2^15):
 *
 *   3 * 2^15 * (2^14 - 1) ~= 3 * 2^29 < 2^31.
 *
 * It _is_ larger than 2^30, though, so we don't have any bits to spare here.
 */
#define INPUT_PIXEL_BITS 15
#define MATRIX_ELEMENT_FRAC_BITS 10
#define MATRIX_ELEMENT_BITS (4 + MATRIX_ELEMENT_FRAC_BITS)

#include <stdlib.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>

#ifdef __SSE2__
#include <emmintrin.h>
#endif

#include "frei0r.h"
#include "frei0r/math.h"

enum ParamIndex {
	NEUTRAL_COLOR,
	COLOR_TEMPERATURE,
};

// Row major (opposite of OpenGL).
typedef float Matrix3x3[9];

typedef struct colgate_instance
{
	unsigned width;
	unsigned height;
	f0r_param_color_t neutral_color;
	double color_temperature;

#ifdef __SSE2__
	__m128i premult_r[256];
	__m128i premult_g[256];
	__m128i premult_b[256];
#else
	int premult_r[256][3];
	int premult_g[256][3];
	int premult_b[256][3];
#endif
} colgate_instance_t;

// Assumes input value in [0..255]; output value is normalized.
static float convert_srgb_to_linear_rgb(float x)
{
	if (x < 255.0f * 0.04045f) {
		return x * (1.0f / (255.0f * 12.92f));
	} else {
		return pow((x + 255.0f * 0.055) * (1.0 / (255.0f * 1.055f)), 2.4);
	}
}

// Assumes normalized input value; output value in [0..255].
static float convert_linear_rgb_to_srgb(float x)
{
	if (x < 0.0031308f) {
		return (255.0f * 12.92f) * x;
	} else {
		return ((255.0f * 1.055f) * pow(x, 1.0f / 2.4f)) - (0.055 * 255.0f);
	}
}

/*
 * For linear -> sRGB, we need at least 13 bits to be able to distinguish all
 * input values; we go for 14 to get some extra accuracy. This results in an 16
 * kB LUT, but we generally don't need the L1 cache for a lot of other things
 * anyway, so hopefully the LUT can mostly stay in L1 cache.
 */
#define REVERSE_LUT_BITS 14
#define REVERSE_LUT_SIZE (1 << REVERSE_LUT_BITS)

static uint8_t linear_rgb_to_srgb_lut[REVERSE_LUT_SIZE];

static void fill_srgb_lut()
{
	int i;
	for (i = 0; i < REVERSE_LUT_SIZE; ++i) {
		// Subtract 0.5 to compensate for the fact that we don't round
		// (which, for our purposes, would entail _adding_ 0.5) at lookup time.
		float x = (i - 0.5) / (float)(REVERSE_LUT_SIZE);
		int srgb = lrintf(convert_linear_rgb_to_srgb(x));
		assert(srgb >= 0 && srgb <= 255);
		linear_rgb_to_srgb_lut[i] = srgb;
	}
}

// Multiply two 3x3 matrices.
static void multiply_3x3_matrices(const Matrix3x3 a, const Matrix3x3 b, Matrix3x3 result)
{
	result[0] = a[0] * b[0] + a[1] * b[3] + a[2] * b[6];
	result[3] = a[3] * b[0] + a[4] * b[3] + a[5] * b[6];
	result[6] = a[6] * b[0] + a[7] * b[3] + a[8] * b[6];

	result[1] = a[0] * b[1] + a[1] * b[4] + a[2] * b[7];
	result[4] = a[3] * b[1] + a[4] * b[4] + a[5] * b[7];
	result[7] = a[6] * b[1] + a[7] * b[4] + a[8] * b[7];

	result[2] = a[0] * b[2] + a[1] * b[5] + a[2] * b[8];
	result[5] = a[3] * b[2] + a[4] * b[5] + a[5] * b[8];
	result[8] = a[6] * b[2] + a[7] * b[5] + a[8] * b[8];
}

// Multiply a 3x3 matrix with a three-element float vector.
static void multiply_3x3_matrix_float3(const Matrix3x3 M, float x0, float x1, float x2, float *y0, float *y1, float *y2)
{
	*y0 = M[0] * x0 + M[1] * x1 + M[2] * x2;
	*y1 = M[3] * x0 + M[4] * x1 + M[5] * x2;
	*y2 = M[6] * x0 + M[7] * x1 + M[8] * x2;
}

// Convert a linear RGB value in s6.25 fixed-point to sRGB (between 0 to 255, inclusive).
static inline uint8_t convert_linear_rgb_to_srgb_fp(int x)
{
	if (x < 0) {
		return 0;
	}
	if (x >= (REVERSE_LUT_SIZE << (INPUT_PIXEL_BITS + MATRIX_ELEMENT_FRAC_BITS - REVERSE_LUT_BITS))) {
		return 255;
	}
	return linear_rgb_to_srgb_lut[((unsigned)x) >> (INPUT_PIXEL_BITS + MATRIX_ELEMENT_FRAC_BITS - REVERSE_LUT_BITS)];
}

// Temperature is in Kelvin. Formula from http://en.wikipedia.org/wiki/Planckian_locus#Approximation .
void convert_color_temperature_to_xyz(float T, float *x, float *y, float *z)
{
	double invT = 1.0 / T;
	double xc, yc;

	if (T <= 4000.0f) {
		xc = ((-0.2661239e9 * invT - 0.2343580e6) * invT + 0.8776956e3) * invT + 0.179910;
	} else {
		xc = ((-3.0258469e9 * invT + 2.1070379e6) * invT + 0.2226347e3) * invT + 0.240390;
	}

	if (T <= 2222.0f) {
		yc = ((-1.1063814 * xc - 1.34811020) * xc + 2.18555832) * xc - 0.20219683;
	} else if (T <= 4000.0f) {
		yc = ((-0.9549476 * xc - 1.37418593) * xc + 2.09137015) * xc - 0.16748867;
	} else {
		yc = (( 3.0817580 * xc - 5.87338670) * xc + 3.75112997) * xc - 0.37001483;
	}

	*x = xc;
	*y = yc;
	*z = 1.0 - xc - yc;
}

// sRGB primaries.
static const Matrix3x3 rgb_to_xyz_matrix = {
	0.4124, 0.3576, 0.1805,
	0.2126, 0.7152, 0.0722,
	0.0193, 0.1192, 0.9505,
};
static const Matrix3x3 xyz_to_rgb_matrix = {
	 3.240625, -1.537208, -0.498629,
	-0.968931,  1.875756,  0.041518,
	 0.055710, -0.204021,  1.056996,
};

static void convert_linear_rgb_to_linear_xyz(float r, float g, float b, float *x, float *y, float *z)
{
	multiply_3x3_matrix_float3(rgb_to_xyz_matrix, r, g, b, x, y, z);
}

static void convert_linear_xyz_to_linear_rgb(float x, float y, float z, float *r, float *g, float *b)
{
	multiply_3x3_matrix_float3(xyz_to_rgb_matrix, x, y, z, r, g, b);
}

/*
 * There are several different LMS spaces, at least according to Wikipedia.
 * Through practical testing, I've found most of them (like the CIECAM02 model)
 * to yield a result that is too reddish in practice. This is the RLAB space,
 * normalized to D65, which means that the standard D65 illuminant
 * (x=0.31271, y=0.32902, z=1-y-x) gives L=M=S under this transformation.
 * This makes sense because sRGB (which is used to derive those XYZ values
 * in the first place) assumes the D65 illuminant, and so the D65 illuminant
 * also gives R=G=B in sRGB.
 */
static const Matrix3x3 xyz_to_lms_matrix = {
	 0.4002, 0.7076, -0.0808,
	-0.2263, 1.1653,  0.0457,
	    0.0,    0.0,  0.9182,
};
static const Matrix3x3 lms_to_xyz_matrix = {
	1.86007, -1.12948,  0.21990,
	0.36122,  0.63880, -0.00001,
	0.00000,  0.00000,  1.08909,
};

static void convert_linear_xyz_to_linear_lms(float x, float y, float z, float *l, float *m, float *s)
{
	multiply_3x3_matrix_float3(xyz_to_lms_matrix, x, y, z, l, m, s);
}

static void convert_linear_lms_to_linear_xyz(float l, float m, float s, float *x, float *y, float *z)
{
	multiply_3x3_matrix_float3(lms_to_xyz_matrix, l, m, s, x, y, z);
}

/*
 * For a given reference color (given in XYZ space),
 * compute scaling factors for L, M and S. What we want at the output is equal L, M and S
 * for the reference color (making it a neutral illuminant), or sL ref_L = sM ref_M = sS ref_S.
 * This removes two degrees of freedom for our system, and we only need to find fL.
 *
 * A reasonable last constraint would be to preserve Y, approximately the brightness,
 * for the reference color. Since L'=M'=S' and the Y row of the LMS-to-XYZ matrix
 * sums to unity, we know that Y'=L', and it's easy to find the fL that sets Y'=Y.
 */
static void compute_lms_scaling_factors(float x, float y, float z, float *scale_l, float *scale_m, float *scale_s)
{
	float l, m, s;
	convert_linear_xyz_to_linear_lms(x, y, z, &l, &m, &s);

	*scale_l = y / l;
	*scale_m = *scale_l * (l / m);
	*scale_s = *scale_l * (l / s);
}

static void compute_correction_matrix(colgate_instance_t *o)
{
	int i;

	/*
	 * Find out what the given neutral color would be in LMS space,
	 * and use that value to build a correction factor for each component
	 * so that the neutral color really becomes gray (in LMS).
	 */
	float ref_r = o->neutral_color.r * 255.0f;
	float ref_g = o->neutral_color.g * 255.0f;
	float ref_b = o->neutral_color.b * 255.0f;

	float linear_r = convert_srgb_to_linear_rgb(ref_r);
	float linear_g = convert_srgb_to_linear_rgb(ref_g);
	float linear_b = convert_srgb_to_linear_rgb(ref_b);

	float x, y, z;
	convert_linear_rgb_to_linear_xyz(linear_r, linear_g, linear_b, &x, &y, &z);

	float l, m, s;
	convert_linear_xyz_to_linear_lms(x, y, z, &l, &m, &s);

	float l_scale, m_scale, s_scale;
	compute_lms_scaling_factors(x, y, z, &l_scale, &m_scale, &s_scale);

	/*
	 * Now apply the color balance. Simply put, we find the chromacity point
	 * for the desired white temperature, see what LMS scaling factors they
	 * would have given us, and then reverse that transform. For T=6500K,
	 * the default, this gives us nearly an identity transform (but only nearly,
	 * since the D65 illuminant does not exactly match the results of T=6500K);
	 * we normalize so that T=6500K really is a no-op.
	 */
	float white_x, white_y, white_z, l_scale_white, m_scale_white, s_scale_white;
	convert_color_temperature_to_xyz(o->color_temperature, &white_x, &white_y, &white_z);
	compute_lms_scaling_factors(white_x, white_y, white_z, &l_scale_white, &m_scale_white, &s_scale_white);

	float ref_x, ref_y, ref_z, l_scale_ref, m_scale_ref, s_scale_ref;
	convert_color_temperature_to_xyz(6500.0f, &ref_x, &ref_y, &ref_z);
	compute_lms_scaling_factors(ref_x, ref_y, ref_z, &l_scale_ref, &m_scale_ref, &s_scale_ref);

	l_scale *= l_scale_ref / l_scale_white;
	m_scale *= m_scale_ref / m_scale_white;
	s_scale *= s_scale_ref / s_scale_white;

	/*
	 * Concatenate all the different linear operations into a single 3x3 matrix.
	 * Note that since we postmultiply our vectors, the order of the matrices
	 * has to be the opposite of the execution order.
	 */
	Matrix3x3 temp, temp2, corr_matrix;
	Matrix3x3 lms_scale_matrix = {
		l_scale,    0.0f,    0.0f,
		   0.0f, m_scale,    0.0f,
		   0.0f,    0.0f, s_scale,
	};
	multiply_3x3_matrices(xyz_to_rgb_matrix, lms_to_xyz_matrix, temp);
	multiply_3x3_matrices(temp, lms_scale_matrix, temp2);
	multiply_3x3_matrices(temp2, xyz_to_lms_matrix, temp);
	multiply_3x3_matrices(temp, rgb_to_xyz_matrix, corr_matrix);

	// Scale for fixed-point, and clamp. We clamp the matrix elements
	// instead of the actual fixed-point numbers below, to make sure
	// we get consistent results over the entire range.
	for (i = 0; i < 9; ++i) {
		corr_matrix[i] *= (float)(1 << MATRIX_ELEMENT_FRAC_BITS);
		if (corr_matrix[i] < -(1 << MATRIX_ELEMENT_BITS)) {
			corr_matrix[i] = -(1 << MATRIX_ELEMENT_BITS);
		}
		if (corr_matrix[i] > (1 << MATRIX_ELEMENT_BITS) - 1) {
			corr_matrix[i] = (1 << MATRIX_ELEMENT_BITS) - 1;
		}
	}

	// Precompute some of the multiplications (after conversion from sRGB)
	// to save some time per-pixel later. Each of these contain the given color
	// converted to linear space and then multiplied by three different factors,
	// given by the matrix.
	for (i = 0; i < 256; ++i) {
		int x = convert_srgb_to_linear_rgb(i) * (float)(1 << INPUT_PIXEL_BITS);

		int r0 = lrintf(x * corr_matrix[0]);
		int r1 = lrintf(x * corr_matrix[3]);
		int r2 = lrintf(x * corr_matrix[6]);

		int g0 = lrintf(x * corr_matrix[1]);
		int g1 = lrintf(x * corr_matrix[4]);
		int g2 = lrintf(x * corr_matrix[7]);

		int b0 = lrintf(x * corr_matrix[2]);
		int b1 = lrintf(x * corr_matrix[5]);
		int b2 = lrintf(x * corr_matrix[8]);

#if __SSE2__
		o->premult_r[i] = _mm_setr_epi32(r0, r1, r2, 0);
		o->premult_g[i] = _mm_setr_epi32(g0, g1, g2, 0);
		o->premult_b[i] = _mm_setr_epi32(b0, b1, b2, 0);
#else
		o->premult_r[i][0] = r0;
		o->premult_r[i][1] = r1;
		o->premult_r[i][2] = r2;

		o->premult_g[i][0] = g0;
		o->premult_g[i][1] = g1;
		o->premult_g[i][2] = g2;

		o->premult_b[i][0] = b0;
		o->premult_b[i][1] = b1;
		o->premult_b[i][2] = b2;
#endif
	}
}

int f0r_init()
{
	fill_srgb_lut();
	return 1;
}

void f0r_deinit()
{
}

void f0r_get_plugin_info(f0r_plugin_info_t *colordistance_info)
{
	colordistance_info->name = "White Balance (LMS space)";
	colordistance_info->author = "Steinar H. Gunderson";
	colordistance_info->plugin_type = F0R_PLUGIN_TYPE_FILTER;
	colordistance_info->color_model = F0R_COLOR_MODEL_RGBA8888;
	colordistance_info->frei0r_version = FREI0R_MAJOR_VERSION;
	colordistance_info->major_version = 0;
	colordistance_info->minor_version = 1;
	colordistance_info->num_params = 2;
	colordistance_info->explanation = "Do simple color correction, in a physically meaningful way";
}

void f0r_get_param_info(f0r_param_info_t *info, int param_index)
{
	switch (param_index) {
	case NEUTRAL_COLOR:
		info->name = "Neutral Color";
		info->type = F0R_PARAM_COLOR;
		info->explanation = "Choose a color from the source image that should be white.";
		break;

	case COLOR_TEMPERATURE:
		info->name = "Color Temperature";
		info->type = F0R_PARAM_DOUBLE;
		info->explanation = "Choose an output color temperature, if different from 6500 K.";
		break;
	}

}

f0r_instance_t f0r_construct(unsigned width, unsigned height)
{
	colgate_instance_t *inst = (colgate_instance_t *)calloc(1, sizeof(*inst));
	inst->width = width;
	inst->height = height;
	inst->neutral_color.r = 0.5;
	inst->neutral_color.g = 0.5;
	inst->neutral_color.b = 0.5;
	inst->color_temperature = 6500.0;
	compute_correction_matrix(inst);
	return (f0r_instance_t)inst;
}

void f0r_destruct(f0r_instance_t instance)
{
	free(instance);
}

void f0r_set_param_value(f0r_instance_t instance, f0r_param_t param, int param_index)
{
	assert(instance);
	colgate_instance_t *inst = (colgate_instance_t *)instance;

	switch (param_index) {
	case NEUTRAL_COLOR:
		inst->neutral_color = *((f0r_param_color_t *)param);
		compute_correction_matrix(inst);
		break;

	case COLOR_TEMPERATURE:
		// Map frei0r range [0, 1] to temperature range [0, 15000].
		inst->color_temperature = *((double *)param) * 15000.0;
		if (inst->color_temperature < 1000.0 || inst->color_temperature > 15000.0) {
			inst->color_temperature = 6500.0;
		}
		compute_correction_matrix(inst);
		break;
	}
}

void f0r_get_param_value(f0r_instance_t instance, f0r_param_t param, int param_index)
{
	assert(instance);
	colgate_instance_t *inst = (colgate_instance_t*)instance;

	switch (param_index) {
	case NEUTRAL_COLOR:
		*((f0r_param_color_t *)param) = inst->neutral_color;
		break;

	case COLOR_TEMPERATURE:
		// Map temperature range [0, 15000] to frei0r range [0, 1].
		*((double *)param) = inst->color_temperature / 15000.0;
		break;
	}
}

void f0r_update(f0r_instance_t instance, double time, const uint32_t *inframe, uint32_t *outframe)
{
	assert(instance);
	colgate_instance_t *inst = (colgate_instance_t *)instance;
	unsigned len = inst->width * inst->height;
	unsigned char *dst = (unsigned char *)outframe;
	const unsigned char *src = (unsigned char *)inframe;
	unsigned i;

#ifdef __SSE2__
	__m128i zero = _mm_setzero_si128();
	__m128i max = _mm_set1_epi16(REVERSE_LUT_SIZE - 1);
	for (i = 0; i < len; ++i) {
		__m128i l1 = inst->premult_r[*src++];
		__m128i l2 = inst->premult_g[*src++];
		__m128i l3 = inst->premult_b[*src++];
		__m128i result = _mm_add_epi32(l3, _mm_add_epi32(l1, l2));

		// Shift into the right range, and then clamp to [min, max].
		// We convert to 16-bit values since we have min/max instructions
		// there (without needing SSE4), and because it allows us
		// to extract the values with one less SSE shift/move.
		result = _mm_srai_epi32(result, INPUT_PIXEL_BITS + MATRIX_ELEMENT_FRAC_BITS - REVERSE_LUT_BITS);
		result = _mm_packs_epi32(result, result);
		result = _mm_max_epi16(result, zero);
		result = _mm_min_epi16(result, max);

		unsigned new_rg = _mm_cvtsi128_si32(result);
		result = _mm_srli_si128(result, 4);
		unsigned new_b = _mm_cvtsi128_si32(result);

		*dst++ = linear_rgb_to_srgb_lut[new_rg & 0xffff];
		*dst++ = linear_rgb_to_srgb_lut[new_rg >> 16];
		*dst++ = linear_rgb_to_srgb_lut[new_b];
		*dst++ = *src++;  // Copy alpha.
	}
#else
	for (i = 0; i < len; ++i) {
		unsigned old_r = *src++;
		unsigned old_g = *src++;
		unsigned old_b = *src++;

		int new_r = inst->premult_r[old_r][0] + inst->premult_g[old_g][0] + inst->premult_b[old_b][0];
		int new_g = inst->premult_r[old_r][1] + inst->premult_g[old_g][1] + inst->premult_b[old_b][1];
		int new_b = inst->premult_r[old_r][2] + inst->premult_g[old_g][2] + inst->premult_b[old_b][2];

		*dst++ = convert_linear_rgb_to_srgb_fp(new_r);
		*dst++ = convert_linear_rgb_to_srgb_fp(new_g);
		*dst++ = convert_linear_rgb_to_srgb_fp(new_b);
		*dst++ = *src++;  // Copy alpha.
	}
#endif
}