/*
* kaleid0sc0pe.cpp
* Copyright (C) 2020-2023 Brendan Hack (github@bendys.com)
* This file is part of a Frei0r plugin that applies a kaleidoscope
* effect.
* Version 1.1 july 2023
*
* The kaleidoscope class implementation.
*
* This source code is free software; you can redistribute it and/or
* modify it under the terms of the GNU Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Please refer
* to the GNU Public License for more details.
*
* You should have received a copy of the GNU Public License along
* with this source code; if not, write to: Free Software Foundation,
* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#include "kaleid0sc0pe.h"
#include "frei0r/math.h"
#include <memory>
#include <cstring>
#ifndef NO_FUTURE
#include <future>
#endif
#ifdef __SSE2__
#define USE_SSE2
#include "sse_mathfun_extension.h"
#undef USE_SSE2
#ifdef HAS_INTEL_INTRINSICS
#include <immintrin.h>
#endif
#ifdef HAS_SIN_INTRINSIC
#define _mm_call_sin_ps _mm_sin_ps
#else
#define _mm_call_sin_ps sin_ps
#endif
#ifdef HAS_COS_INTRINSIC
#define _mm_call_cos_ps _mm_cos_ps
#else
#define _mm_call_cos_ps cos_ps
#endif
#ifdef HAS_ATAN2_INTRINSIC
#define _mm_call_atan2_ps _mm_atan2_ps
#else
#define _mm_call_atan2_ps atan2_ps
#endif
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#ifndef M_2PI
#define M_2PI 6.28318530717958647693
#endif
#ifndef MF_PI
#define MF_PI 3.14159265358979323846f
#endif
#ifndef MF_2PI
#define MF_2PI 6.28318530717958647693f
#endif
namespace std
{
void default_delete<libkaleid0sc0pe::IKaleid0sc0pe>::operator()(libkaleid0sc0pe::IKaleid0sc0pe *p)
{
delete p;
}
} // namespace std
namespace libkaleid0sc0pe {
IKaleid0sc0pe *IKaleid0sc0pe::create(std::uint32_t width, std::uint32_t height, std::uint32_t component_size, std::uint32_t num_components, std::uint32_t stride)
{
return new Kaleid0sc0pe(width, height, component_size, num_components, stride);
}
Kaleid0sc0pe::Kaleid0sc0pe(std::uint32_t width, std::uint32_t height, std::uint32_t component_size, std::uint32_t num_components, std::uint32_t stride):
m_width(width),
m_height(height),
m_component_size(component_size),
m_num_components(num_components),
m_stride(stride ? stride : width * component_size * num_components),
m_pixel_size(component_size * num_components),
m_aspect(width/static_cast<float>(height)),
m_origin_x(0.5f),
m_origin_y(0.5f),
m_origin_native_x(m_origin_x * width),
m_origin_native_y(m_origin_y * height),
m_segmentation(16),
m_segment_direction(Direction::NONE),
m_preferred_corner(Corner::BR),
m_preferred_search_dir(Direction::CLOCKWISE),
m_edge_reflect(true),
m_background_colour(nullptr),
m_edge_threshold(0),
m_source_segment_angle(-1),
m_n_segments(0),
m_start_angle(0),
m_segment_width(0),
m_n_threads(0)
{
#ifdef __SSE2__
m_sse_width = _mm_set1_ps(static_cast<float>(m_width));
m_sse_height = _mm_set1_ps(static_cast<float>(m_height));
m_sse_aspect = _mm_set1_ps(m_width / static_cast<float>(m_height));
m_sse_ps_0 = _mm_set1_ps(0.0f);
m_sse_ps_1 = _mm_set1_ps(1.0f);
m_sse_ps_1 = _mm_set1_ps(1.0f);
m_sse_ps_2 = _mm_set1_ps(2.0f);
m_sse_epi32_1 = _mm_set1_epi32(1);
m_sse_epi32_2 = _mm_set1_epi32(2);
m_sse_shift_1 = _mm_cvtsi32_si128(1);
#endif
}
std::int32_t Kaleid0sc0pe::set_origin(float x, float y)
{
if (x < 0 || y < 0 || x > 1 || y > 1) {
return -2;
}
m_origin_x = x;
m_origin_y = y;
m_origin_native_x = m_origin_x * m_width;
m_origin_native_y = m_origin_y * m_height;
m_n_segments = 0;
return 0;
}
float Kaleid0sc0pe::get_origin_x() const
{
return m_origin_x;
}
float Kaleid0sc0pe::get_origin_y() const
{
return m_origin_y;
}
std::int32_t Kaleid0sc0pe::set_segmentation(std::uint32_t segmentation)
{
if (segmentation == 0) {
return -2;
}
m_segmentation = segmentation;
m_n_segments = 0;
return 0;
}
std::uint32_t Kaleid0sc0pe::get_segmentation() const
{
return m_segmentation;
}
std::int32_t Kaleid0sc0pe::set_edge_threshold(std::uint32_t threshold)
{
m_edge_threshold = threshold;
return 0;
}
std::uint32_t Kaleid0sc0pe::get_edge_threshold() const
{
return m_edge_threshold;
}
std::int32_t Kaleid0sc0pe::set_preferred_corner(Corner corner)
{
m_preferred_corner = corner;
m_n_segments = 0;
return 0;
}
Kaleid0sc0pe::Corner Kaleid0sc0pe::get_preferred_corner() const
{
return m_preferred_corner;
}
std::int32_t Kaleid0sc0pe::set_preferred_corner_search_direction(Direction direction)
{
if (direction == Direction::NONE) {
return -2;
}
m_preferred_search_dir = direction;
m_n_segments = 0;
return 0;
}
Kaleid0sc0pe::Direction Kaleid0sc0pe::get_preferred_corner_search_direction() const
{
return m_preferred_search_dir;
}
std::int32_t Kaleid0sc0pe::set_reflect_edges(bool reflect)
{
m_edge_reflect = reflect;
return 0;
}
bool Kaleid0sc0pe::get_reflect_edges() const
{
return m_edge_reflect;
}
std::int32_t Kaleid0sc0pe::set_background_colour(void* colour)
{
m_background_colour = colour;
return 0;
}
void* Kaleid0sc0pe::get_background_colour() const
{
return m_background_colour;
}
std::int32_t Kaleid0sc0pe::set_source_segment(float angle)
{
m_source_segment_angle = angle;
return 0;
}
float Kaleid0sc0pe::get_source_segment() const
{
return m_source_segment_angle;
}
static double distance_sq(double x1, double y1, double x2, double y2)
{
return std::pow(x1 - x2, 2) + std::pow(y1 - y2, 2);
}
std::int32_t inc_idx(std::int32_t start_idx, std::int32_t inc, std::int32_t max)
{
start_idx += inc;
return (start_idx < 0) ? max - 1 : start_idx % max;
}
void Kaleid0sc0pe::init()
{
m_n_segments = m_segmentation * 2;
m_segment_width = MF_PI * 2 / m_n_segments;
if (m_source_segment_angle < 0) {
// find origin rotation
std::uint32_t corners[4][2] = {
{ 0, 0 },
{ 1, 0 },
{ 1, 1 },
{ 0, 1 }
};
std::int32_t start_idx(0);
switch (m_preferred_corner) {
case Corner::TL: start_idx = 0; break;
case Corner::TR: start_idx = 1; break;
case Corner::BR: start_idx = 2; break;
case Corner::BL: start_idx = 3; break;
}
std::int32_t dir = m_preferred_search_dir == Direction::CLOCKWISE ? 1 : -1;
std::uint32_t idx = start_idx;
float origin_x = m_origin_x;
float origin_y = m_origin_y;
double dist = distance_sq(origin_x, origin_y, corners[idx][0], corners[idx][1]);
std::int32_t corner = idx;
idx = inc_idx(idx, dir, 4);
while (idx != start_idx) {
double d = distance_sq(origin_x, origin_y, corners[idx][0], corners[idx][1]);
if (d > dist) {
dist = d;
corner = idx;
}
idx = inc_idx(idx, dir, 4);
}
float start_line_x = corners[corner][0] - origin_x;
float start_line_y = corners[corner][1] - origin_y;
m_start_angle = std::atan2(start_line_y, start_line_x) - (m_segment_direction == Direction::NONE ?
0 :
(m_segment_width / (m_segment_direction == Direction::CLOCKWISE ? -2 : 2)));
} else {
m_start_angle = -m_source_segment_angle;
}
#ifdef __SSE2__
m_sse_origin_native_x = _mm_set1_ps(m_origin_x * m_width);
m_sse_origin_native_y = _mm_set1_ps(m_origin_y * m_height);
m_sse_start_angle = _mm_set1_ps(m_start_angle);
m_sse_segment_width = _mm_set1_ps(m_segment_width);
m_sse_half_segment_width = _mm_set1_ps(m_segment_width/2);
#endif
}
#ifdef __SSE2__
Kaleid0sc0pe::Reflect_info Kaleid0sc0pe::calculate_reflect_info(__m128i* x, __m128i* y)
{
Reflect_info info;
to_screen(&info.screen_x, &info.screen_y, x, y);
info.angle = _mm_sub_ps(_mm_call_atan2_ps(info.screen_y, info.screen_x), m_sse_start_angle);
info.reference_angle = _mm_add_ps(_mm_and_ps(info.angle, *(v4sf*)_ps_inv_sign_mask), m_sse_half_segment_width);
// we do a max with 0 since atan2_ps will return nan for atan2(0,0) which ends up with a negative reference angle.
info.segment_number_i = _mm_cvttps_epi32(_mm_max_ps(_mm_div_ps(info.reference_angle, m_sse_segment_width), m_sse_ps_0));
info.segment_number = _mm_cvtepi32_ps(info.segment_number_i);
return info;
}
void Kaleid0sc0pe::to_screen(__m128* x, __m128* y, __m128i* sx, __m128i* sy)
{
*x = _mm_cvtepi32_ps(*sx);
*x = _mm_sub_ps(*x, m_sse_origin_native_x);
*y = _mm_cvtepi32_ps(*sy);
*y = _mm_sub_ps(*y, m_sse_origin_native_y);
*y = _mm_mul_ps(*y, m_sse_aspect);
}
void Kaleid0sc0pe::from_screen(__m128* x, __m128* y)
{
*x = _mm_add_ps(*x, m_sse_origin_native_x);
*y = _mm_div_ps(*y, m_sse_aspect);
*y = _mm_add_ps(*y, m_sse_origin_native_y);
}
void Kaleid0sc0pe::rotate(int x, int y, __m128 *source_x, __m128 *source_y)
{
ALIGN16_BEG int ALIGN16_END mx[4] = { x, x + 1, x + 2, x + 3 };
ALIGN16_BEG int ALIGN16_END my[4] = { y, y, y, y };
Reflect_info info = calculate_reflect_info((__m128i*)mx, (__m128i*)my);
__m128 reflection_angle = _mm_mul_ps(info.segment_number, m_sse_segment_width);
__m128i segi_p1 = _mm_add_epi32(info.segment_number_i, m_sse_epi32_1);
__m128 refl_factor = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srl_epi32(segi_p1, m_sse_shift_1), _mm_srl_epi32(info.segment_number_i, m_sse_shift_1)));
reflection_angle = _mm_sub_ps(reflection_angle, _mm_mul_ps(refl_factor, _mm_sub_ps(m_sse_segment_width, _mm_mul_ps(m_sse_ps_2, _mm_sub_ps(info.reference_angle, reflection_angle)))));
reflection_angle = _mm_mul_ps(reflection_angle, _mm_sub_ps(m_sse_ps_0, _mm_or_ps(_mm_and_ps(info.angle, *(v4sf*)_ps_sign_mask), m_sse_ps_1)));
reflection_angle = _mm_mul_ps(reflection_angle, _mm_and_ps(_mm_cmpge_ps(info.segment_number, m_sse_ps_1), m_sse_ps_1));
__m128 cos_angle = _mm_call_cos_ps(reflection_angle);
__m128 sin_angle = _mm_call_sin_ps(reflection_angle);
*source_x = _mm_sub_ps(_mm_mul_ps(info.screen_x, cos_angle), _mm_mul_ps(info.screen_y, sin_angle));
*source_y = _mm_add_ps(_mm_mul_ps(info.screen_y, cos_angle), _mm_mul_ps(info.screen_x, sin_angle));
from_screen(source_x, source_y);
}
#else
Kaleid0sc0pe::Reflect_info Kaleid0sc0pe::calculate_reflect_info(std::uint32_t x, std::uint32_t y)
{
Reflect_info info;
to_screen(&(info.screen_x), &(info.screen_y), x, y);
info.angle = std::atan2(info.screen_y, info.screen_x) - m_start_angle;
info.reference_angle = std::fabs(info.angle) + m_segment_width / 2;
info.segment_number = std::uint32_t(info.reference_angle / m_segment_width);
return info;
}
void Kaleid0sc0pe::to_screen(float *x, float *y, std::uint32_t sx, std::uint32_t sy)
{
*x = sx - m_origin_native_x;
*y = (sy - m_origin_native_y) * m_aspect;
}
void Kaleid0sc0pe::from_screen(float *x, float *y)
{
*x = *x + m_origin_native_x;
*y = *y / m_aspect + m_origin_native_y;
}
#endif
const std::uint8_t *Kaleid0sc0pe::lookup(const std::uint8_t* p, std::uint32_t x, std::uint32_t y)
{
return p + m_stride * static_cast<std::size_t>(y) + m_pixel_size * static_cast<std::size_t>(x);
}
std::uint8_t* Kaleid0sc0pe::lookup(std::uint8_t* p, std::uint32_t x, std::uint32_t y)
{
return p + m_stride * static_cast<std::size_t>(y) + m_pixel_size * static_cast<std::size_t>(x);
}
void Kaleid0sc0pe::process_bg(float x, float y, const std::uint8_t* in, std::uint8_t* out)
{
if (x < 0 && -x <= m_edge_threshold) {
x = 0;
}
else if (x >= m_width && x < m_width + m_edge_threshold) {
x = m_width - 1.0f;
}
if (y < 0 && -y <= m_edge_threshold) {
y = 0;
}
else if (y >= m_height && y < m_height + m_edge_threshold) {
y = m_height - 1.0f;
}
if (static_cast<std::uint32_t>(x) >= 0 && static_cast<std::uint32_t>(x) < m_width &&
static_cast<std::uint32_t>(y) >= 0 && static_cast<std::uint32_t>(y) < m_height) {
std::memcpy(out, lookup(in, static_cast<std::uint32_t>(x), static_cast<std::uint32_t>(y)), m_pixel_size);
}
else if (m_background_colour) {
std::memcpy(out, reinterpret_cast<const std::uint8_t*>(m_background_colour), m_pixel_size);
}
}
#ifdef __SSE2__
void Kaleid0sc0pe::process_block(Block* block)
{
for (std::int32_t y = block->y_start; y <= static_cast<std::int32_t>(block->y_end); ++y) {
for (std::int32_t x = block->x_start; x <= static_cast<std::int32_t>(block->x_end); x += 4) {
std::uint8_t* out = lookup(block->out_frame, x, y);
__m128 source_x;
__m128 source_y;
// rotate points to source_x,source_y
rotate(x, y, &source_x, &source_y);
// reflect back into image if necessary
source_x = _mm_and_ps(source_x, *(v4sf*)_ps_inv_sign_mask);
__m128 ge_width = _mm_cmpge_ps(source_x, m_sse_width);
source_x = _mm_or_ps(_mm_and_ps(_mm_sub_ps(m_sse_width, _mm_sub_ps(source_x, m_sse_width)), ge_width), _mm_andnot_ps(ge_width, source_x));
// same for y
source_y = _mm_and_ps(source_y, *(v4sf*)_ps_inv_sign_mask);
__m128 ge_height = _mm_cmpge_ps(source_y, m_sse_height);
source_y = _mm_or_ps(_mm_and_ps(_mm_sub_ps(m_sse_height, _mm_sub_ps(source_y, m_sse_height)), ge_height), _mm_andnot_ps(ge_height,source_y));
__m128i source_xi = _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(source_x, _mm_setzero_ps()), _mm_sub_ps(m_sse_width, m_sse_ps_1)));
__m128i source_yi = _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(source_y, _mm_setzero_ps()), _mm_sub_ps(m_sse_height, m_sse_ps_1)));
std::int32_t* sx = reinterpret_cast<std::int32_t*>(&source_xi);
std::int32_t* sy = reinterpret_cast<std::int32_t*>(&source_yi);
for (int i = 0; i < 4; ++i) {
sx[i] = CLAMP(sx[i], 0, static_cast<std::int32_t>(m_width) - 1);
sy[i] = CLAMP(sy[i], 0, static_cast<std::int32_t>(m_height) - 1);
}
std::memcpy(out, lookup(block->in_frame, sx[0], sy[0]), m_pixel_size);
out += m_pixel_size;
std::memcpy(out, lookup(block->in_frame, sx[1], sy[1]), m_pixel_size);
out += m_pixel_size;
std::memcpy(out, lookup(block->in_frame, sx[2], sy[2]), m_pixel_size);
out += m_pixel_size;
std::memcpy(out, lookup(block->in_frame, sx[3], sy[3]), m_pixel_size);
}
}
}
void Kaleid0sc0pe::process_block_bg(Block* block)
{
for (std::int32_t y = block->y_start; y <= static_cast<std::int32_t>(block->y_end); ++y) {
for (std::int32_t x = block->x_start; x <= static_cast<std::int32_t>(block->x_end); x += 4) {
std::uint8_t* out = lookup(block->out_frame, x, y);
__m128 source_x;
__m128 source_y;
// rotate points to source_x,source_y
rotate(x, y, &source_x, &source_y);
float* sx = reinterpret_cast<float*>(&source_x);
float* sy = reinterpret_cast<float*>(&source_y);
process_bg(sx[0], sy[0], block->in_frame, out);
out += m_pixel_size;
process_bg(sx[1], sy[1], block->in_frame, out);
out += m_pixel_size;
process_bg(sx[2], sy[2], block->in_frame, out);
out += m_pixel_size;
process_bg(sx[3], sy[3], block->in_frame, out);
}
}
}
#else
void Kaleid0sc0pe::process_block(Block *block)
{
for (std::uint32_t y = block->y_start; y <= block->y_end; ++y) {
for (std::uint32_t x = block->x_start; x <= block->x_end; ++x) {
std::uint8_t* out = lookup(block->out_frame, x, y);
Reflect_info info = calculate_reflect_info(x, y);
if (info.segment_number) {
float reflection_angle = (info.segment_number * m_segment_width);
reflection_angle -= info.segment_number % 2 ? (m_segment_width - 2 * (info.reference_angle - reflection_angle)) : 0;
reflection_angle *= std::signbit(info.angle) ? 1 : -1;
float cos_angle = std::cos(reflection_angle);
float sin_angle = std::sin(reflection_angle);
float source_x = info.screen_x * cos_angle - info.screen_y * sin_angle;
float source_y = info.screen_y * cos_angle + info.screen_x * sin_angle;
from_screen(&source_x, &source_y);
if (m_edge_reflect) {
if (source_x < 0) {
source_x = -source_x;
} else if (source_x > m_width - 10e-4f) {
source_x = m_width - (source_x - m_width + 10e-4f);
} if (source_y < 0) {
source_y = -source_y;
} else if (source_y > m_height - 10e-4f) {
source_y = m_height - (source_y - m_height + 10e-4f);
}
source_x = CLAMP(source_x, 0.0f, static_cast<float>(m_width - 1));
source_y = CLAMP(source_y, 0.0f, static_cast<float>(m_height - 1));
std::memcpy(out, lookup(block->in_frame, static_cast<std::uint32_t>(source_x), static_cast<std::uint32_t>(source_y)), m_pixel_size);
} else {
process_bg(source_x, source_y, block->in_frame, out);
}
} else {
std::memcpy(out, lookup(block->in_frame, x, y), m_pixel_size);
}
}
}
}
#endif
std::uint8_t colours[63][3] = {
{ 0x00, 0xFF, 0x00 },
{ 0x00, 0x00, 0xFF },
{ 0xFF, 0x00, 0x00 },
{ 0x01, 0xFF, 0xFE },
{ 0xFF, 0xA6, 0xFE },
{ 0xFF, 0xDB, 0x66 },
{ 0x00, 0x64, 0x01 },
{ 0x01, 0x00, 0x67 },
{ 0x95, 0x00, 0x3A },
{ 0x00, 0x7D, 0xB5 },
{ 0xFF, 0x00, 0xF6 },
{ 0xFF, 0xEE, 0xE8 },
{ 0x77, 0x4D, 0x00 },
{ 0x90, 0xFB, 0x92 },
{ 0x00, 0x76, 0xFF },
{ 0xD5, 0xFF, 0x00 },
{ 0xFF, 0x93, 0x7E },
{ 0x6A, 0x82, 0x6C },
{ 0xFF, 0x02, 0x9D },
{ 0xFE, 0x89, 0x00 },
{ 0x7A, 0x47, 0x82 },
{ 0x7E, 0x2D, 0xD2 },
{ 0x85, 0xA9, 0x00 },
{ 0xFF, 0x00, 0x56 },
{ 0xA4, 0x24, 0x00 },
{ 0x00, 0xAE, 0x7E },
{ 0x68, 0x3D, 0x3B },
{ 0xBD, 0xC6, 0xFF },
{ 0x26, 0x34, 0x00 },
{ 0xBD, 0xD3, 0x93 },
{ 0x00, 0xB9, 0x17 },
{ 0x9E, 0x00, 0x8E },
{ 0x00, 0x15, 0x44 },
{ 0xC2, 0x8C, 0x9F },
{ 0xFF, 0x74, 0xA3 },
{ 0x01, 0xD0, 0xFF },
{ 0x00, 0x47, 0x54 },
{ 0xE5, 0x6F, 0xFE },
{ 0x78, 0x82, 0x31 },
{ 0x0E, 0x4C, 0xA1 },
{ 0x91, 0xD0, 0xCB },
{ 0xBE, 0x99, 0x70 },
{ 0x96, 0x8A, 0xE8 },
{ 0xBB, 0x88, 0x00 },
{ 0x43, 0x00, 0x2C },
{ 0xDE, 0xFF, 0x74 },
{ 0x00, 0xFF, 0xC6 },
{ 0xFF, 0xE5, 0x02 },
{ 0x62, 0x0E, 0x00 },
{ 0x00, 0x8F, 0x9C },
{ 0x98, 0xFF, 0x52 },
{ 0x75, 0x44, 0xB1 },
{ 0xB5, 0x00, 0xFF },
{ 0x00, 0xFF, 0x78 },
{ 0xFF, 0x6E, 0x41 },
{ 0x00, 0x5F, 0x39 },
{ 0x6B, 0x68, 0x82 },
{ 0x5F, 0xAD, 0x4E },
{ 0xA7, 0x57, 0x40 },
{ 0xA5, 0xFF, 0xD2 },
{ 0xFF, 0xB1, 0x67 },
{ 0x00, 0x9B, 0xFF },
{ 0xE8, 0x5E, 0xBE }
};
std::int32_t Kaleid0sc0pe::set_segment_direction(Direction direction)
{
m_segment_direction = direction;
m_n_segments = 0;
return 0;
}
libkaleid0sc0pe::Kaleid0sc0pe::Direction Kaleid0sc0pe::get_segment_direction() const
{
return m_segment_direction;
}
std::int32_t Kaleid0sc0pe::process(const void* in_frame, void* out_frame)
{
if (in_frame == nullptr || out_frame == nullptr) {
return -2;
}
#ifdef __SSE2__
if (m_width % 4 != 0) {
return -2;
}
#endif
if (m_n_segments == 0) {
init();
}
#ifdef NO_FUTURE
Block block(reinterpret_cast<const std::uint8_t*>(in_frame),
reinterpret_cast<std::uint8_t*>(out_frame),
0, 0,
m_width - 1, m_height - 1);
#ifdef __SSE2__
if (m_edge_reflect) {
process_block(&block);
} else {
process_block_bg(&block);
}
#else
process_block(&block);
#endif
#else
if (m_n_threads == 1) {
Block block(reinterpret_cast<const std::uint8_t*>(in_frame),
reinterpret_cast<std::uint8_t*>(out_frame),
0, 0,
m_width - 1, m_height - 1);
#ifdef __SSE2__
if (m_edge_reflect) {
process_block(&block);
} else {
process_block_bg(&block);
}
#else
process_block(&block);
#endif
} else {
std::uint32_t n_threads = m_n_threads == 0 ? std::thread::hardware_concurrency() : m_n_threads;
std::vector<std::future<void>> futures;
std::vector<std::unique_ptr<Block>> blocks;
std::uint32_t block_height = m_height / n_threads;
std::uint32_t y_start = 0;
std::uint32_t y_end = m_height - block_height * (n_threads - 1) - 1;
for (std::uint32_t i = 0; i < n_threads; ++i) {
blocks.emplace_back(new Block(
reinterpret_cast<const std::uint8_t*>(in_frame),
reinterpret_cast<std::uint8_t*>(out_frame),
0, y_start,
m_width - 1, y_end));
#ifdef __SSE2__
futures.push_back(std::async(std::launch::async, m_edge_reflect ? &Kaleid0sc0pe::process_block : &Kaleid0sc0pe::process_block_bg, this, blocks[i].get()));
#else
futures.push_back(std::async(std::launch::async, &Kaleid0sc0pe::process_block, this, blocks[i].get()));
#endif
y_start = y_end + 1;
y_end += block_height;
}
for (auto& f : futures) {
f.wait();
}
}
#endif
return 0;
}
std::int32_t Kaleid0sc0pe::set_threading(std::uint32_t threading)
{
m_n_threads = threading;
return 0;
}
std::uint32_t Kaleid0sc0pe::get_threading() const
{
return m_n_threads;
}
std::int32_t Kaleid0sc0pe::visualise(void* out_frame)
{
if (out_frame == nullptr) {
return -2;
}
#ifdef __SSE2__
if (m_width % 4 != 0) {
return -2;
}
#endif
if (m_n_segments == 0) {
init();
}
for (std::uint32_t y = 0; y < m_height; ++y) {
#ifdef __SSE2__
for (std::uint32_t x = 0; x < m_width; x+=4) {
#else
for (std::uint32_t x = 0; x < m_width; ++x) {
#endif
std::uint8_t* out = lookup(reinterpret_cast<std::uint8_t*>(out_frame), x, y);
#ifdef __SSE2__
ALIGN16_BEG int ALIGN16_END mx[4] = { static_cast<int>(x), static_cast<int>(x) + 1, static_cast<int>(x) + 2, static_cast<int>(x) + 3};
ALIGN16_BEG int ALIGN16_END my[4] = { static_cast<int>(y), static_cast<int>(y), static_cast<int>(y), static_cast<int>(y) };
Reflect_info info = calculate_reflect_info((__m128i*)mx, (__m128i*)my);
//float* segment_number = reinterpret_cast<float*>(&info.segment_number);
std::int32_t* segment_number = reinterpret_cast<std::int32_t*>(&info.segment_number_i);
std::uint32_t col_idx = (*segment_number) % 63;
out[0] = colours[col_idx][0];
out[1] = colours[col_idx][1];
out[2] = colours[col_idx][2];
if (m_num_components > 3) {
out[3] = 0xff;
out++;
}
segment_number++;
out += 3;
col_idx = (*segment_number) % 63;
out[0] = colours[col_idx][0];
out[1] = colours[col_idx][1];
out[2] = colours[col_idx][2];
if (m_num_components > 3) {
out[3] = 0xff;
out++;
}
segment_number++;
out += 3;
col_idx = (*segment_number) % 63;
out[0] = colours[col_idx][0];
out[1] = colours[col_idx][1];
out[2] = colours[col_idx][2];
if (m_num_components > 3) {
out[3] = 0xff;
out++;
}
segment_number++;
out += 3;
col_idx = (*segment_number) % 63;
out[0] = colours[col_idx][0];
out[1] = colours[col_idx][1];
out[2] = colours[col_idx][2];
if (m_num_components > 3) {
out[3] = 0xff;
out++;
}
#else
Reflect_info info = calculate_reflect_info(x, y);
std::uint32_t col_idx = info.segment_number % 63;
out[0] = colours[col_idx][0];
out[1] = colours[col_idx][1];
out[2] = colours[col_idx][2];
if (m_num_components > 3) {
out[3] = 0xff;
}
#endif
}
}
return 0;
}
} // namespace libkaleid0sc0pe