/* test_colors.c
 * vi:ts=4 sw=4
 *
 * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may obtain
 * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

#include <freerdp/config.h>

#include <winpr/sysinfo.h>
#include <freerdp/utils/profiler.h>

#include "prim_test.h"

/* ------------------------------------------------------------------------- */
static BOOL test_RGBToRGB_16s8u_P3AC4R_func(prim_size_t roi, DWORD DstFormat)
{
	INT16* r = NULL;
	INT16* g = NULL;
	INT16* b = NULL;
	BYTE* out1 = NULL;
	BYTE* out2 = NULL;
	BOOL failed = FALSE;
	const INT16* ptrs[3];
	const UINT32 rgbStride = roi.width * 2;
	const UINT32 dstStride = roi.width * 4;
	PROFILER_DEFINE(genericProf)
	PROFILER_DEFINE(optProf)
	PROFILER_CREATE(genericProf, "RGBToRGB_16s8u_P3AC4R-GENERIC")
	PROFILER_CREATE(optProf, "RGBToRGB_16s8u_P3AC4R-OPTIMIZED")
	r = winpr_aligned_calloc(1, 1ULL * rgbStride * roi.height, 16);
	g = winpr_aligned_calloc(1, 1ULL * rgbStride * roi.height, 16);
	b = winpr_aligned_calloc(1, 1ULL * rgbStride * roi.height, 16);
	out1 = winpr_aligned_calloc(1, 1ULL * dstStride * roi.height, 16);
	out2 = winpr_aligned_calloc(1, 1ULL * dstStride * roi.height, 16);

	if (!r || !g || !b || !out1 || !out2)
		goto fail;

#if 0
	{
		for (UINT32 y = 0; y < roi.height; y++)
		{
			for (UINT32 x = 0; x < roi.width; x++)
			{
				r[y * roi.width + x] = 0x01;
				g[y * roi.width + x] = 0x02;
				b[y * roi.width + x] = 0x04;
			}
		}
	}
#else
	winpr_RAND(r, 1ULL * rgbStride * roi.height);
	winpr_RAND(g, 1ULL * rgbStride * roi.height);
	winpr_RAND(b, 1ULL * rgbStride * roi.height);
#endif
	ptrs[0] = r;
	ptrs[1] = g;
	ptrs[2] = b;
	PROFILER_ENTER(genericProf)

	if (generic->RGBToRGB_16s8u_P3AC4R(ptrs, rgbStride, out1, dstStride, DstFormat, &roi) !=
	    PRIMITIVES_SUCCESS)
		goto fail;

	PROFILER_EXIT(genericProf)
	PROFILER_ENTER(optProf)

	if (optimized->RGBToRGB_16s8u_P3AC4R(ptrs, rgbStride, out2, dstStride, DstFormat, &roi) !=
	    PRIMITIVES_SUCCESS)
		goto fail;

	PROFILER_EXIT(optProf)

	if (memcmp(out1, out2, 1ULL * dstStride * roi.height) != 0)
	{
		for (UINT64 i = 0; i < 1ull * roi.width * roi.height; ++i)
		{
			const UINT32 o1 = FreeRDPReadColor(out1 + 4 * i, DstFormat);
			const UINT32 o2 = FreeRDPReadColor(out2 + 4 * i, DstFormat);

			if (o1 != o2)
			{
				printf("RGBToRGB_16s8u_P3AC4R FAIL: out1[%" PRIu64 "]=0x%08" PRIx8 " out2[%" PRIu64
				       "]=0x%08" PRIx8 "\n",
				       i, out1[i], i, out2[i]);
				failed = TRUE;
			}
		}
	}

	printf("Results for %" PRIu32 "x%" PRIu32 " [%s]", roi.width, roi.height,
	       FreeRDPGetColorFormatName(DstFormat));
	PROFILER_PRINT_HEADER
	PROFILER_PRINT(genericProf)
	PROFILER_PRINT(optProf)
	PROFILER_PRINT_FOOTER
fail:
	PROFILER_FREE(genericProf)
	PROFILER_FREE(optProf)
	winpr_aligned_free(r);
	winpr_aligned_free(g);
	winpr_aligned_free(b);
	winpr_aligned_free(out1);
	winpr_aligned_free(out2);
	return !failed;
}

/* ------------------------------------------------------------------------- */
static BOOL test_RGBToRGB_16s8u_P3AC4R_speed(void)
{
	union
	{
		const INT16** cpv;
		INT16** pv;
	} cnv;
	const prim_size_t roi64x64 = { 64, 64 };
	INT16 ALIGN(r[4096 + 1]);
	INT16 ALIGN(g[4096 + 1]);
	INT16 ALIGN(b[4096 + 1]);
	UINT32 ALIGN(dst[4096 + 1]);
	INT16* ptrs[3];
	winpr_RAND(r, sizeof(r));
	winpr_RAND(g, sizeof(g));
	winpr_RAND(b, sizeof(b));

	/* clear upper bytes */
	for (int i = 0; i < 4096; ++i)
	{
		r[i] &= 0x00FFU;
		g[i] &= 0x00FFU;
		b[i] &= 0x00FFU;
	}

	ptrs[0] = r + 1;
	ptrs[1] = g + 1;
	ptrs[2] = b + 1;

	cnv.pv = ptrs;
	if (!speed_test("RGBToRGB_16s8u_P3AC4R", "aligned", g_Iterations,
	                generic->RGBToRGB_16s8u_P3AC4R, optimized->RGBToRGB_16s8u_P3AC4R, cnv.cpv,
	                64 * 2, (BYTE*)dst, 64 * 4, &roi64x64))
		return FALSE;

	if (!speed_test("RGBToRGB_16s8u_P3AC4R", "unaligned", g_Iterations,
	                generic->RGBToRGB_16s8u_P3AC4R, optimized->RGBToRGB_16s8u_P3AC4R, cnv.cpv,
	                64 * 2, ((BYTE*)dst) + 1, 64 * 4, &roi64x64))
		return FALSE;

	return TRUE;
}

/* ========================================================================= */
static BOOL test_yCbCrToRGB_16s16s_P3P3_func(void)
{
	pstatus_t status = 0;
	INT16 ALIGN(y[4096]) = { 0 };
	INT16 ALIGN(cb[4096]) = { 0 };
	INT16 ALIGN(cr[4096]) = { 0 };
	INT16 ALIGN(r1[4096]) = { 0 };
	INT16 ALIGN(g1[4096]) = { 0 };
	INT16 ALIGN(b1[4096]) = { 0 };
	INT16 ALIGN(r2[4096]) = { 0 };
	INT16 ALIGN(g2[4096]) = { 0 };
	INT16 ALIGN(b2[4096]) = { 0 };
	const INT16* in[3];
	INT16* out1[3];
	INT16* out2[3];
	prim_size_t roi = { 64, 64 };
	winpr_RAND(y, sizeof(y));
	winpr_RAND(cb, sizeof(cb));
	winpr_RAND(cr, sizeof(cr));

	/* Normalize to 11.5 fixed radix */
	for (int i = 0; i < 4096; ++i)
	{
		y[i] &= 0x1FE0U;
		cb[i] &= 0x1FE0U;
		cr[i] &= 0x1FE0U;
	}

	in[0] = y;
	in[1] = cb;
	in[2] = cr;
	out1[0] = r1;
	out1[1] = g1;
	out1[2] = b1;
	out2[0] = r2;
	out2[1] = g2;
	out2[2] = b2;
	status = generic->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out1, 64 * 2, &roi);

	if (status != PRIMITIVES_SUCCESS)
		return FALSE;

	status = optimized->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out2, 64 * 2, &roi);

	if (status != PRIMITIVES_SUCCESS)
		return FALSE;

	for (int i = 0; i < 4096; ++i)
	{
		if ((ABS(r1[i] - r2[i]) > 1) || (ABS(g1[i] - g2[i]) > 1) || (ABS(b1[i] - b2[i]) > 1))
		{
			printf("YCbCrToRGB-SSE FAIL[%d]: %" PRId16 ",%" PRId16 ",%" PRId16 " vs %" PRId16
			       ",%" PRId16 ",%" PRId16 "\n",
			       i, r1[i], g1[i], b1[i], r2[i], g2[i], b2[i]);
			return FALSE;
		}
	}

	return TRUE;
}

/* ------------------------------------------------------------------------- */
static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
{
	prim_size_t roi = { 64, 64 };
	INT16 ALIGN(y[4096]);
	INT16 ALIGN(cb[4096]);
	INT16 ALIGN(cr[4096]);
	INT16 ALIGN(r[4096]);
	INT16 ALIGN(g[4096]);
	INT16 ALIGN(b[4096]);
	const INT16* input[3];
	INT16* output[3];
	winpr_RAND(y, sizeof(y));
	winpr_RAND(cb, sizeof(cb));
	winpr_RAND(cr, sizeof(cr));

	/* Normalize to 11.5 fixed radix */
	for (int i = 0; i < 4096; ++i)
	{
		y[i] &= 0x1FE0U;
		cb[i] &= 0x1FE0U;
		cr[i] &= 0x1FE0U;
	}

	input[0] = y;
	input[1] = cb;
	input[2] = cr;
	output[0] = r;
	output[1] = g;
	output[2] = b;

	if (!speed_test("yCbCrToRGB_16s16s_P3P3", "aligned", g_Iterations,
	                (speed_test_fkt)generic->yCbCrToRGB_16s16s_P3P3,
	                (speed_test_fkt)optimized->yCbCrToRGB_16s16s_P3P3, input, 64 * 2, output,
	                64 * 2, &roi))
		return FALSE;

	return TRUE;
}

int TestPrimitivesColors(int argc, char* argv[])
{
	const DWORD formats[] = { PIXEL_FORMAT_ARGB32, PIXEL_FORMAT_XRGB32, PIXEL_FORMAT_ABGR32,
		                      PIXEL_FORMAT_XBGR32, PIXEL_FORMAT_RGBA32, PIXEL_FORMAT_RGBX32,
		                      PIXEL_FORMAT_BGRA32, PIXEL_FORMAT_BGRX32 };
	prim_size_t roi = { 1920 / 4, 1080 / 4 };
	WINPR_UNUSED(argc);
	WINPR_UNUSED(argv);
	prim_test_setup(FALSE);

	for (UINT32 x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
	{
		if (!test_RGBToRGB_16s8u_P3AC4R_func(roi, formats[x]))
			return 1;

#if 0

		if (g_TestPrimitivesPerformance)
		{
			if (!test_RGBToRGB_16s8u_P3AC4R_speed())
				return 1;
		}

		if (!test_yCbCrToRGB_16s16s_P3P3_func())
			return 1;

		if (g_TestPrimitivesPerformance)
		{
			if (!test_yCbCrToRGB_16s16s_P3P3_speed())
				return 1;
		}

#endif
	}

	return 0;
}
