coreboot-libre-fam15h-rdimm/3rdparty/chromeec/common/math_util.c

/* Copyright 2014 The Chromium OS Authors. All rights reserved.
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

/* Common math functions. */

#include "common.h"
#include "math.h"
#include "math_util.h"
#include "util.h"

/* Some useful math functions.  Use with integers only! */
#define SQ(x) ((x) * (x))

/* For cosine lookup table, define the increment and the size of the table. */
#define COSINE_LUT_INCR_DEG	5
#define COSINE_LUT_SIZE		((180 / COSINE_LUT_INCR_DEG) + 1)

/* Lookup table for the value of cosine from 0 degrees to 180 degrees. */
static const fp_t cos_lut[] = {
	FLOAT_TO_FP( 1.00000), FLOAT_TO_FP( 0.99619), FLOAT_TO_FP( 0.98481),
	FLOAT_TO_FP( 0.96593), FLOAT_TO_FP( 0.93969), FLOAT_TO_FP( 0.90631),
	FLOAT_TO_FP( 0.86603), FLOAT_TO_FP( 0.81915), FLOAT_TO_FP( 0.76604),
	FLOAT_TO_FP( 0.70711), FLOAT_TO_FP( 0.64279), FLOAT_TO_FP( 0.57358),
	FLOAT_TO_FP( 0.50000), FLOAT_TO_FP( 0.42262), FLOAT_TO_FP( 0.34202),
	FLOAT_TO_FP( 0.25882), FLOAT_TO_FP( 0.17365), FLOAT_TO_FP( 0.08716),
	FLOAT_TO_FP( 0.00000), FLOAT_TO_FP(-0.08716), FLOAT_TO_FP(-0.17365),
	FLOAT_TO_FP(-0.25882), FLOAT_TO_FP(-0.34202), FLOAT_TO_FP(-0.42262),
	FLOAT_TO_FP(-0.50000), FLOAT_TO_FP(-0.57358), FLOAT_TO_FP(-0.64279),
	FLOAT_TO_FP(-0.70711), FLOAT_TO_FP(-0.76604), FLOAT_TO_FP(-0.81915),
	FLOAT_TO_FP(-0.86603), FLOAT_TO_FP(-0.90631), FLOAT_TO_FP(-0.93969),
	FLOAT_TO_FP(-0.96593), FLOAT_TO_FP(-0.98481), FLOAT_TO_FP(-0.99619),
	FLOAT_TO_FP(-1.00000),
};
BUILD_ASSERT(ARRAY_SIZE(cos_lut) == COSINE_LUT_SIZE);


fp_t arc_cos(fp_t x)
{
	int i;

	/* Cap x if out of range. */
	if (x < FLOAT_TO_FP(-1.0))
		x = FLOAT_TO_FP(-1.0);
	else if (x > FLOAT_TO_FP(1.0))
		x = FLOAT_TO_FP(1.0);

	/*
	 * Increment through lookup table to find index and then linearly
	 * interpolate for precision.
	 */
	/* TODO(crosbug.com/p/25600): Optimize with binary search. */
	for (i = 0; i < COSINE_LUT_SIZE-1; i++) {
		if (x >= cos_lut[i+1]) {
			const fp_t interp = fp_div(cos_lut[i] - x,
						   cos_lut[i] - cos_lut[i + 1]);

			return fp_mul(INT_TO_FP(COSINE_LUT_INCR_DEG),
				      INT_TO_FP(i) + interp);
		}
	}

	/*
	 * Shouldn't be possible to get here because inputs are clipped to
	 * [-1, 1] and the cos_lut[] table goes over the same range. If we
	 * are here, throw an assert.
	 */
	ASSERT(0);

	return 0;
}

/**
 * Integer square root.
 */
#ifdef CONFIG_FPU
/*
 * Use library sqrtf instruction, if available, since it's usually much faster
 * and smaller. On Cortex-M4, this becomes a single instruction which takes
 * 14 cycles to execute. This produces identical results to binary search,
 * except when the floating point representation of the square root rounds up
 * to an integer.
 */
static inline int int_sqrtf(fp_inter_t x)
{
	return sqrtf(x);
}

/* If the platform support FPU, just return sqrtf. */
fp_t fp_sqrtf(fp_t x)
{
	return sqrtf(x);
}
#else
static int int_sqrtf(fp_inter_t x)
{
	int rmax = INT32_MAX;
	int rmin = 0;

	/* Short cut if x is 32-bit value */
	if (x < rmax)
		rmax = 0x7fff;

	/*
	 * Just binary-search.  There are better algorithms, but we call this
	 * infrequently enough it doesn't matter.
	 */
	if (x <= 0)
		return 0;  /* Yeah, for imaginary numbers too */
	else if (x > (fp_inter_t)rmax * rmax)
		return rmax;

	while (1) {
		int r = (rmax + rmin) / 2;
		fp_inter_t r2 = (fp_inter_t)r * r;

		if (r2 > x) {
			/* Guessed too high */
			rmax = r;
		} else if (r2 < x) {
			/* Guessed too low.  Watch out for rounding! */
			if (rmin == r)
				return r;

			rmin = r;
		} else {
			/* Bullseye! */
			return r;
		}
	}
}

fp_t fp_sqrtf(fp_t x)
{
	fp_inter_t preshift_x = (fp_inter_t)x << FP_BITS;

	return int_sqrtf(preshift_x);
}
#endif /* CONFIG_FPU */

int vector_magnitude(const intv3_t v)
{
	fp_inter_t sum =   (fp_inter_t)v[0] * v[0] +
			(fp_inter_t)v[1] * v[1] +
			(fp_inter_t)v[2] * v[2];

	return int_sqrtf(sum);
}

/* cross_product only works if the vectors magnitudes are around 1<<16. */
void cross_product(const intv3_t v1, const intv3_t v2, intv3_t v)
{
	v[X] = (fp_inter_t)v1[Y] * v2[Z] - (fp_inter_t)v1[Z] * v2[Y];
	v[Y] = (fp_inter_t)v1[Z] * v2[X] - (fp_inter_t)v1[X] * v2[Z];
	v[Z] = (fp_inter_t)v1[X] * v2[Y] - (fp_inter_t)v1[Y] * v2[X];
}

fp_inter_t dot_product(const intv3_t v1, const intv3_t v2)
{
	return (fp_inter_t)v1[X] * v2[X] +
	       (fp_inter_t)v1[Y] * v2[Y] +
	       (fp_inter_t)v1[Z] * v2[Z];
}

void vector_scale(intv3_t v, fp_t s)
{
	v[X] = fp_mul(v[X], s);
	v[Y] = fp_mul(v[Y], s);
	v[Z] = fp_mul(v[Z], s);
}

fp_t cosine_of_angle_diff(const intv3_t v1, const intv3_t v2)
{
	fp_inter_t dotproduct;
	fp_inter_t denominator;

	/*
	 * Angle between two vectors is acos(A dot B / |A|*|B|). To return
	 * cosine of angle between vectors, then don't do acos operation.
	 */
	dotproduct = dot_product(v1, v2);

	denominator = (fp_inter_t)vector_magnitude(v1) * vector_magnitude(v2);

	/* Check for divide by 0 although extremely unlikely. */
	if (!denominator)
		return 0;

	/*
	 * We must shift the dot product first, so that we can represent
	 * fractions.  The answer is always a number with magnitude < 1.0, so
	 * if we don't shift, it will always round down to 0.
	 *
	 * Note that overflow is possible if the dot product is large (that is,
	 * if the vector components are of size (31 - FP_BITS/2) bits.  If that
	 * ever becomes a problem, we could detect this by counting the leading
	 * zeroes of the dot product and shifting the denominator down
	 * partially instead of shifting the dot product up.  With the current
	 * FP_BITS=16, that happens if the vector components are ~2^23.  Which
	 * we're a long way away from; the vector components used in
	 * accelerometer calculations are ~2^11.
	 */
	return fp_div(dotproduct, denominator);
}

/*
 * rotate a vector v
 *  - support input v and output res are the same vector
 */
void rotate(const intv3_t v, const mat33_fp_t R, intv3_t res)
{
	fp_inter_t t[3];

	if (R == NULL) {
		if (v != res)
			memcpy(res, v, sizeof(intv3_t));
		return;
	}


	/* Rotate */
	t[0] =	(fp_inter_t)v[0] * R[0][0] +
		(fp_inter_t)v[1] * R[1][0] +
		(fp_inter_t)v[2] * R[2][0];
	t[1] =	(fp_inter_t)v[0] * R[0][1] +
		(fp_inter_t)v[1] * R[1][1] +
		(fp_inter_t)v[2] * R[2][1];
	t[2] =	(fp_inter_t)v[0] * R[0][2] +
		(fp_inter_t)v[1] * R[1][2] +
		(fp_inter_t)v[2] * R[2][2];

	/* Scale by fixed point shift when writing back to result */
	res[0] = FP_TO_INT(t[0]);
	res[1] = FP_TO_INT(t[1]);
	res[2] = FP_TO_INT(t[2]);
}

void rotate_inv(const intv3_t v, const mat33_fp_t R, intv3_t res)
{
	fp_inter_t t[3];
	fp_t deter;

	if (R == NULL) {
		if (v != res)
			memcpy(res, v, sizeof(intv3_t));
		return;
	}

	deter = fp_mul(R[0][0], (fp_mul(R[1][1], R[2][2]) -
				 fp_mul(R[2][1], R[1][2]))) -
		fp_mul(R[0][1], (fp_mul(R[1][0], R[2][2]) -
				 fp_mul(R[1][2], R[2][0]))) +
		fp_mul(R[0][2], (fp_mul(R[1][0], R[2][1]) -
				 fp_mul(R[1][1], R[2][0])));

	/*
	 * invert the matrix: from
	 * http://stackoverflow.com/questions/983999/
	 * simple-3x3-matrix-inverse-code-c
	 */
	t[0] =  (fp_inter_t)v[0] * (fp_mul(R[1][1], R[2][2]) -
				 fp_mul(R[2][1], R[1][2])) -
		(fp_inter_t)v[1] * (fp_mul(R[1][0], R[2][2]) -
				 fp_mul(R[1][2], R[2][0])) +
		(fp_inter_t)v[2] * (fp_mul(R[1][0], R[2][1]) -
				 fp_mul(R[2][0], R[1][1]));

	t[1] =	(fp_inter_t)v[0] * (fp_mul(R[0][1], R[2][2]) -
				 fp_mul(R[0][2], R[2][1])) * -1 +
		(fp_inter_t)v[1] * (fp_mul(R[0][0], R[2][2]) -
				 fp_mul(R[0][2], R[2][0])) -
		(fp_inter_t)v[2] * (fp_mul(R[0][0], R[2][1]) -
				 fp_mul(R[2][0], R[0][1]));

	t[2] =	(fp_inter_t)v[0] * (fp_mul(R[0][1], R[1][2]) -
				 fp_mul(R[0][2], R[1][1])) -
		(fp_inter_t)v[1] * (fp_mul(R[0][0], R[1][2]) -
				 fp_mul(R[1][0], R[0][2])) +
		(fp_inter_t)v[2] * (fp_mul(R[0][0], R[1][1]) -
				 fp_mul(R[1][0], R[0][1]));

	/* Scale by fixed point shift when writing back to result */
	res[0] = FP_TO_INT(fp_div(t[0], deter));
	res[1] = FP_TO_INT(fp_div(t[1], deter));
	res[2] = FP_TO_INT(fp_div(t[2], deter));
}
Initial commit 2024-03-04 11:14:53 +01:00			`/* Copyright 2014 The Chromium OS Authors. All rights reserved.`
			`* Use of this source code is governed by a BSD-style license that can be`
			`* found in the LICENSE file.`
			`*/`

			`/* Common math functions. */`

			`#include "common.h"`
			`#include "math.h"`
			`#include "math_util.h"`
			`#include "util.h"`

			`/* Some useful math functions. Use with integers only! */`
			`#define SQ(x) ((x) * (x))`

			`/* For cosine lookup table, define the increment and the size of the table. */`
			`#define COSINE_LUT_INCR_DEG 5`
			`#define COSINE_LUT_SIZE ((180 / COSINE_LUT_INCR_DEG) + 1)`

			`/* Lookup table for the value of cosine from 0 degrees to 180 degrees. */`
			`static const fp_t cos_lut[] = {`
			`FLOAT_TO_FP( 1.00000), FLOAT_TO_FP( 0.99619), FLOAT_TO_FP( 0.98481),`
			`FLOAT_TO_FP( 0.96593), FLOAT_TO_FP( 0.93969), FLOAT_TO_FP( 0.90631),`
			`FLOAT_TO_FP( 0.86603), FLOAT_TO_FP( 0.81915), FLOAT_TO_FP( 0.76604),`
			`FLOAT_TO_FP( 0.70711), FLOAT_TO_FP( 0.64279), FLOAT_TO_FP( 0.57358),`
			`FLOAT_TO_FP( 0.50000), FLOAT_TO_FP( 0.42262), FLOAT_TO_FP( 0.34202),`
			`FLOAT_TO_FP( 0.25882), FLOAT_TO_FP( 0.17365), FLOAT_TO_FP( 0.08716),`
			`FLOAT_TO_FP( 0.00000), FLOAT_TO_FP(-0.08716), FLOAT_TO_FP(-0.17365),`
			`FLOAT_TO_FP(-0.25882), FLOAT_TO_FP(-0.34202), FLOAT_TO_FP(-0.42262),`
			`FLOAT_TO_FP(-0.50000), FLOAT_TO_FP(-0.57358), FLOAT_TO_FP(-0.64279),`
			`FLOAT_TO_FP(-0.70711), FLOAT_TO_FP(-0.76604), FLOAT_TO_FP(-0.81915),`
			`FLOAT_TO_FP(-0.86603), FLOAT_TO_FP(-0.90631), FLOAT_TO_FP(-0.93969),`
			`FLOAT_TO_FP(-0.96593), FLOAT_TO_FP(-0.98481), FLOAT_TO_FP(-0.99619),`
			`FLOAT_TO_FP(-1.00000),`
			`};`
			`BUILD_ASSERT(ARRAY_SIZE(cos_lut) == COSINE_LUT_SIZE);`


			`fp_t arc_cos(fp_t x)`
			`{`
			`int i;`

			`/* Cap x if out of range. */`
			`if (x < FLOAT_TO_FP(-1.0))`
			`x = FLOAT_TO_FP(-1.0);`
			`else if (x > FLOAT_TO_FP(1.0))`
			`x = FLOAT_TO_FP(1.0);`

			`/*`
			`* Increment through lookup table to find index and then linearly`
			`* interpolate for precision.`
			`*/`
			`/* TODO(crosbug.com/p/25600): Optimize with binary search. */`
			`for (i = 0; i < COSINE_LUT_SIZE-1; i++) {`
			`if (x >= cos_lut[i+1]) {`
			`const fp_t interp = fp_div(cos_lut[i] - x,`
			`cos_lut[i] - cos_lut[i + 1]);`

			`return fp_mul(INT_TO_FP(COSINE_LUT_INCR_DEG),`
			`INT_TO_FP(i) + interp);`
			`}`
			`}`

			`/*`
			`* Shouldn't be possible to get here because inputs are clipped to`
			`* [-1, 1] and the cos_lut[] table goes over the same range. If we`
			`* are here, throw an assert.`
			`*/`
			`ASSERT(0);`

			`return 0;`
			`}`

			`/**`
			`* Integer square root.`
			`*/`
			`#ifdef CONFIG_FPU`
			`/*`
			`* Use library sqrtf instruction, if available, since it's usually much faster`
			`* and smaller. On Cortex-M4, this becomes a single instruction which takes`
			`* 14 cycles to execute. This produces identical results to binary search,`
			`* except when the floating point representation of the square root rounds up`
			`* to an integer.`
			`*/`
			`static inline int int_sqrtf(fp_inter_t x)`
			`{`
			`return sqrtf(x);`
			`}`

			`/* If the platform support FPU, just return sqrtf. */`
			`fp_t fp_sqrtf(fp_t x)`
			`{`
			`return sqrtf(x);`
			`}`
			`#else`
			`static int int_sqrtf(fp_inter_t x)`
			`{`
			`int rmax = INT32_MAX;`
			`int rmin = 0;`

			`/* Short cut if x is 32-bit value */`
			`if (x < rmax)`
			`rmax = 0x7fff;`

			`/*`
			`* Just binary-search. There are better algorithms, but we call this`
			`* infrequently enough it doesn't matter.`
			`*/`
			`if (x <= 0)`
			`return 0; /* Yeah, for imaginary numbers too */`
			`else if (x > (fp_inter_t)rmax * rmax)`
			`return rmax;`

			`while (1) {`
			`int r = (rmax + rmin) / 2;`
			`fp_inter_t r2 = (fp_inter_t)r * r;`

			`if (r2 > x) {`
			`/* Guessed too high */`
			`rmax = r;`
			`} else if (r2 < x) {`
			`/* Guessed too low. Watch out for rounding! */`
			`if (rmin == r)`
			`return r;`

			`rmin = r;`
			`} else {`
			`/* Bullseye! */`
			`return r;`
			`}`
			`}`
			`}`

			`fp_t fp_sqrtf(fp_t x)`
			`{`
			`fp_inter_t preshift_x = (fp_inter_t)x << FP_BITS;`

			`return int_sqrtf(preshift_x);`
			`}`
			`#endif /* CONFIG_FPU */`

			`int vector_magnitude(const intv3_t v)`
			`{`
			`fp_inter_t sum = (fp_inter_t)v[0] * v[0] +`
			`(fp_inter_t)v[1] * v[1] +`
			`(fp_inter_t)v[2] * v[2];`

			`return int_sqrtf(sum);`
			`}`

			`/* cross_product only works if the vectors magnitudes are around 1<<16. */`
			`void cross_product(const intv3_t v1, const intv3_t v2, intv3_t v)`
			`{`
			`v[X] = (fp_inter_t)v1[Y] * v2[Z] - (fp_inter_t)v1[Z] * v2[Y];`
			`v[Y] = (fp_inter_t)v1[Z] * v2[X] - (fp_inter_t)v1[X] * v2[Z];`
			`v[Z] = (fp_inter_t)v1[X] * v2[Y] - (fp_inter_t)v1[Y] * v2[X];`
			`}`

			`fp_inter_t dot_product(const intv3_t v1, const intv3_t v2)`
			`{`
			`return (fp_inter_t)v1[X] * v2[X] +`
			`(fp_inter_t)v1[Y] * v2[Y] +`
			`(fp_inter_t)v1[Z] * v2[Z];`
			`}`

			`void vector_scale(intv3_t v, fp_t s)`
			`{`
			`v[X] = fp_mul(v[X], s);`
			`v[Y] = fp_mul(v[Y], s);`
			`v[Z] = fp_mul(v[Z], s);`
			`}`

			`fp_t cosine_of_angle_diff(const intv3_t v1, const intv3_t v2)`
			`{`
			`fp_inter_t dotproduct;`
			`fp_inter_t denominator;`

			`/*`
			`* Angle between two vectors is acos(A dot B / \|A\|*\|B\|). To return`
			`* cosine of angle between vectors, then don't do acos operation.`
			`*/`
			`dotproduct = dot_product(v1, v2);`

			`denominator = (fp_inter_t)vector_magnitude(v1) * vector_magnitude(v2);`

			`/* Check for divide by 0 although extremely unlikely. */`
			`if (!denominator)`
			`return 0;`

			`/*`
			`* We must shift the dot product first, so that we can represent`
			`* fractions. The answer is always a number with magnitude < 1.0, so`
			`* if we don't shift, it will always round down to 0.`
			`*`
			`* Note that overflow is possible if the dot product is large (that is,`
			`* if the vector components are of size (31 - FP_BITS/2) bits. If that`
			`* ever becomes a problem, we could detect this by counting the leading`
			`* zeroes of the dot product and shifting the denominator down`
			`* partially instead of shifting the dot product up. With the current`
			`* FP_BITS=16, that happens if the vector components are ~2^23. Which`
			`* we're a long way away from; the vector components used in`
			`* accelerometer calculations are ~2^11.`
			`*/`
			`return fp_div(dotproduct, denominator);`
			`}`

			`/*`
			`* rotate a vector v`
			`* - support input v and output res are the same vector`
			`*/`
			`void rotate(const intv3_t v, const mat33_fp_t R, intv3_t res)`
			`{`
			`fp_inter_t t[3];`

			`if (R == NULL) {`
			`if (v != res)`
			`memcpy(res, v, sizeof(intv3_t));`
			`return;`
			`}`


			`/* Rotate */`
			`t[0] = (fp_inter_t)v[0] * R[0][0] +`
			`(fp_inter_t)v[1] * R[1][0] +`
			`(fp_inter_t)v[2] * R[2][0];`
			`t[1] = (fp_inter_t)v[0] * R[0][1] +`
			`(fp_inter_t)v[1] * R[1][1] +`
			`(fp_inter_t)v[2] * R[2][1];`
			`t[2] = (fp_inter_t)v[0] * R[0][2] +`
			`(fp_inter_t)v[1] * R[1][2] +`
			`(fp_inter_t)v[2] * R[2][2];`

			`/* Scale by fixed point shift when writing back to result */`
			`res[0] = FP_TO_INT(t[0]);`
			`res[1] = FP_TO_INT(t[1]);`
			`res[2] = FP_TO_INT(t[2]);`
			`}`

			`void rotate_inv(const intv3_t v, const mat33_fp_t R, intv3_t res)`
			`{`
			`fp_inter_t t[3];`
			`fp_t deter;`

			`if (R == NULL) {`
			`if (v != res)`
			`memcpy(res, v, sizeof(intv3_t));`
			`return;`
			`}`

			`deter = fp_mul(R[0][0], (fp_mul(R[1][1], R[2][2]) -`
			`fp_mul(R[2][1], R[1][2]))) -`
			`fp_mul(R[0][1], (fp_mul(R[1][0], R[2][2]) -`
			`fp_mul(R[1][2], R[2][0]))) +`
			`fp_mul(R[0][2], (fp_mul(R[1][0], R[2][1]) -`
			`fp_mul(R[1][1], R[2][0])));`

			`/*`
			`* invert the matrix: from`
			`* http://stackoverflow.com/questions/983999/`
			`* simple-3x3-matrix-inverse-code-c`
			`*/`
			`t[0] = (fp_inter_t)v[0] * (fp_mul(R[1][1], R[2][2]) -`
			`fp_mul(R[2][1], R[1][2])) -`
			`(fp_inter_t)v[1] * (fp_mul(R[1][0], R[2][2]) -`
			`fp_mul(R[1][2], R[2][0])) +`
			`(fp_inter_t)v[2] * (fp_mul(R[1][0], R[2][1]) -`
			`fp_mul(R[2][0], R[1][1]));`

			`t[1] = (fp_inter_t)v[0] * (fp_mul(R[0][1], R[2][2]) -`
			`fp_mul(R[0][2], R[2][1])) * -1 +`
			`(fp_inter_t)v[1] * (fp_mul(R[0][0], R[2][2]) -`
			`fp_mul(R[0][2], R[2][0])) -`
			`(fp_inter_t)v[2] * (fp_mul(R[0][0], R[2][1]) -`
			`fp_mul(R[2][0], R[0][1]));`

			`t[2] = (fp_inter_t)v[0] * (fp_mul(R[0][1], R[1][2]) -`
			`fp_mul(R[0][2], R[1][1])) -`
			`(fp_inter_t)v[1] * (fp_mul(R[0][0], R[1][2]) -`
			`fp_mul(R[1][0], R[0][2])) +`
			`(fp_inter_t)v[2] * (fp_mul(R[0][0], R[1][1]) -`
			`fp_mul(R[1][0], R[0][1]));`

			`/* Scale by fixed point shift when writing back to result */`
			`res[0] = FP_TO_INT(fp_div(t[0], deter));`
			`res[1] = FP_TO_INT(fp_div(t[1], deter));`
			`res[2] = FP_TO_INT(fp_div(t[2], deter));`
			`}`