coreboot-libre-fam15h-rdimm/3rdparty/chromeec/common/math_util.c

288 lines
7.8 KiB
C
Raw Normal View History

2024-03-04 11:14:53 +01:00
/* Copyright 2014 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
/* Common math functions. */
#include "common.h"
#include "math.h"
#include "math_util.h"
#include "util.h"
/* Some useful math functions. Use with integers only! */
#define SQ(x) ((x) * (x))
/* For cosine lookup table, define the increment and the size of the table. */
#define COSINE_LUT_INCR_DEG 5
#define COSINE_LUT_SIZE ((180 / COSINE_LUT_INCR_DEG) + 1)
/* Lookup table for the value of cosine from 0 degrees to 180 degrees. */
static const fp_t cos_lut[] = {
FLOAT_TO_FP( 1.00000), FLOAT_TO_FP( 0.99619), FLOAT_TO_FP( 0.98481),
FLOAT_TO_FP( 0.96593), FLOAT_TO_FP( 0.93969), FLOAT_TO_FP( 0.90631),
FLOAT_TO_FP( 0.86603), FLOAT_TO_FP( 0.81915), FLOAT_TO_FP( 0.76604),
FLOAT_TO_FP( 0.70711), FLOAT_TO_FP( 0.64279), FLOAT_TO_FP( 0.57358),
FLOAT_TO_FP( 0.50000), FLOAT_TO_FP( 0.42262), FLOAT_TO_FP( 0.34202),
FLOAT_TO_FP( 0.25882), FLOAT_TO_FP( 0.17365), FLOAT_TO_FP( 0.08716),
FLOAT_TO_FP( 0.00000), FLOAT_TO_FP(-0.08716), FLOAT_TO_FP(-0.17365),
FLOAT_TO_FP(-0.25882), FLOAT_TO_FP(-0.34202), FLOAT_TO_FP(-0.42262),
FLOAT_TO_FP(-0.50000), FLOAT_TO_FP(-0.57358), FLOAT_TO_FP(-0.64279),
FLOAT_TO_FP(-0.70711), FLOAT_TO_FP(-0.76604), FLOAT_TO_FP(-0.81915),
FLOAT_TO_FP(-0.86603), FLOAT_TO_FP(-0.90631), FLOAT_TO_FP(-0.93969),
FLOAT_TO_FP(-0.96593), FLOAT_TO_FP(-0.98481), FLOAT_TO_FP(-0.99619),
FLOAT_TO_FP(-1.00000),
};
BUILD_ASSERT(ARRAY_SIZE(cos_lut) == COSINE_LUT_SIZE);
fp_t arc_cos(fp_t x)
{
int i;
/* Cap x if out of range. */
if (x < FLOAT_TO_FP(-1.0))
x = FLOAT_TO_FP(-1.0);
else if (x > FLOAT_TO_FP(1.0))
x = FLOAT_TO_FP(1.0);
/*
* Increment through lookup table to find index and then linearly
* interpolate for precision.
*/
/* TODO(crosbug.com/p/25600): Optimize with binary search. */
for (i = 0; i < COSINE_LUT_SIZE-1; i++) {
if (x >= cos_lut[i+1]) {
const fp_t interp = fp_div(cos_lut[i] - x,
cos_lut[i] - cos_lut[i + 1]);
return fp_mul(INT_TO_FP(COSINE_LUT_INCR_DEG),
INT_TO_FP(i) + interp);
}
}
/*
* Shouldn't be possible to get here because inputs are clipped to
* [-1, 1] and the cos_lut[] table goes over the same range. If we
* are here, throw an assert.
*/
ASSERT(0);
return 0;
}
/**
* Integer square root.
*/
#ifdef CONFIG_FPU
/*
* Use library sqrtf instruction, if available, since it's usually much faster
* and smaller. On Cortex-M4, this becomes a single instruction which takes
* 14 cycles to execute. This produces identical results to binary search,
* except when the floating point representation of the square root rounds up
* to an integer.
*/
static inline int int_sqrtf(fp_inter_t x)
{
return sqrtf(x);
}
/* If the platform support FPU, just return sqrtf. */
fp_t fp_sqrtf(fp_t x)
{
return sqrtf(x);
}
#else
static int int_sqrtf(fp_inter_t x)
{
int rmax = INT32_MAX;
int rmin = 0;
/* Short cut if x is 32-bit value */
if (x < rmax)
rmax = 0x7fff;
/*
* Just binary-search. There are better algorithms, but we call this
* infrequently enough it doesn't matter.
*/
if (x <= 0)
return 0; /* Yeah, for imaginary numbers too */
else if (x > (fp_inter_t)rmax * rmax)
return rmax;
while (1) {
int r = (rmax + rmin) / 2;
fp_inter_t r2 = (fp_inter_t)r * r;
if (r2 > x) {
/* Guessed too high */
rmax = r;
} else if (r2 < x) {
/* Guessed too low. Watch out for rounding! */
if (rmin == r)
return r;
rmin = r;
} else {
/* Bullseye! */
return r;
}
}
}
fp_t fp_sqrtf(fp_t x)
{
fp_inter_t preshift_x = (fp_inter_t)x << FP_BITS;
return int_sqrtf(preshift_x);
}
#endif /* CONFIG_FPU */
int vector_magnitude(const intv3_t v)
{
fp_inter_t sum = (fp_inter_t)v[0] * v[0] +
(fp_inter_t)v[1] * v[1] +
(fp_inter_t)v[2] * v[2];
return int_sqrtf(sum);
}
/* cross_product only works if the vectors magnitudes are around 1<<16. */
void cross_product(const intv3_t v1, const intv3_t v2, intv3_t v)
{
v[X] = (fp_inter_t)v1[Y] * v2[Z] - (fp_inter_t)v1[Z] * v2[Y];
v[Y] = (fp_inter_t)v1[Z] * v2[X] - (fp_inter_t)v1[X] * v2[Z];
v[Z] = (fp_inter_t)v1[X] * v2[Y] - (fp_inter_t)v1[Y] * v2[X];
}
fp_inter_t dot_product(const intv3_t v1, const intv3_t v2)
{
return (fp_inter_t)v1[X] * v2[X] +
(fp_inter_t)v1[Y] * v2[Y] +
(fp_inter_t)v1[Z] * v2[Z];
}
void vector_scale(intv3_t v, fp_t s)
{
v[X] = fp_mul(v[X], s);
v[Y] = fp_mul(v[Y], s);
v[Z] = fp_mul(v[Z], s);
}
fp_t cosine_of_angle_diff(const intv3_t v1, const intv3_t v2)
{
fp_inter_t dotproduct;
fp_inter_t denominator;
/*
* Angle between two vectors is acos(A dot B / |A|*|B|). To return
* cosine of angle between vectors, then don't do acos operation.
*/
dotproduct = dot_product(v1, v2);
denominator = (fp_inter_t)vector_magnitude(v1) * vector_magnitude(v2);
/* Check for divide by 0 although extremely unlikely. */
if (!denominator)
return 0;
/*
* We must shift the dot product first, so that we can represent
* fractions. The answer is always a number with magnitude < 1.0, so
* if we don't shift, it will always round down to 0.
*
* Note that overflow is possible if the dot product is large (that is,
* if the vector components are of size (31 - FP_BITS/2) bits. If that
* ever becomes a problem, we could detect this by counting the leading
* zeroes of the dot product and shifting the denominator down
* partially instead of shifting the dot product up. With the current
* FP_BITS=16, that happens if the vector components are ~2^23. Which
* we're a long way away from; the vector components used in
* accelerometer calculations are ~2^11.
*/
return fp_div(dotproduct, denominator);
}
/*
* rotate a vector v
* - support input v and output res are the same vector
*/
void rotate(const intv3_t v, const mat33_fp_t R, intv3_t res)
{
fp_inter_t t[3];
if (R == NULL) {
if (v != res)
memcpy(res, v, sizeof(intv3_t));
return;
}
/* Rotate */
t[0] = (fp_inter_t)v[0] * R[0][0] +
(fp_inter_t)v[1] * R[1][0] +
(fp_inter_t)v[2] * R[2][0];
t[1] = (fp_inter_t)v[0] * R[0][1] +
(fp_inter_t)v[1] * R[1][1] +
(fp_inter_t)v[2] * R[2][1];
t[2] = (fp_inter_t)v[0] * R[0][2] +
(fp_inter_t)v[1] * R[1][2] +
(fp_inter_t)v[2] * R[2][2];
/* Scale by fixed point shift when writing back to result */
res[0] = FP_TO_INT(t[0]);
res[1] = FP_TO_INT(t[1]);
res[2] = FP_TO_INT(t[2]);
}
void rotate_inv(const intv3_t v, const mat33_fp_t R, intv3_t res)
{
fp_inter_t t[3];
fp_t deter;
if (R == NULL) {
if (v != res)
memcpy(res, v, sizeof(intv3_t));
return;
}
deter = fp_mul(R[0][0], (fp_mul(R[1][1], R[2][2]) -
fp_mul(R[2][1], R[1][2]))) -
fp_mul(R[0][1], (fp_mul(R[1][0], R[2][2]) -
fp_mul(R[1][2], R[2][0]))) +
fp_mul(R[0][2], (fp_mul(R[1][0], R[2][1]) -
fp_mul(R[1][1], R[2][0])));
/*
* invert the matrix: from
* http://stackoverflow.com/questions/983999/
* simple-3x3-matrix-inverse-code-c
*/
t[0] = (fp_inter_t)v[0] * (fp_mul(R[1][1], R[2][2]) -
fp_mul(R[2][1], R[1][2])) -
(fp_inter_t)v[1] * (fp_mul(R[1][0], R[2][2]) -
fp_mul(R[1][2], R[2][0])) +
(fp_inter_t)v[2] * (fp_mul(R[1][0], R[2][1]) -
fp_mul(R[2][0], R[1][1]));
t[1] = (fp_inter_t)v[0] * (fp_mul(R[0][1], R[2][2]) -
fp_mul(R[0][2], R[2][1])) * -1 +
(fp_inter_t)v[1] * (fp_mul(R[0][0], R[2][2]) -
fp_mul(R[0][2], R[2][0])) -
(fp_inter_t)v[2] * (fp_mul(R[0][0], R[2][1]) -
fp_mul(R[2][0], R[0][1]));
t[2] = (fp_inter_t)v[0] * (fp_mul(R[0][1], R[1][2]) -
fp_mul(R[0][2], R[1][1])) -
(fp_inter_t)v[1] * (fp_mul(R[0][0], R[1][2]) -
fp_mul(R[1][0], R[0][2])) +
(fp_inter_t)v[2] * (fp_mul(R[0][0], R[1][1]) -
fp_mul(R[1][0], R[0][1]));
/* Scale by fixed point shift when writing back to result */
res[0] = FP_TO_INT(fp_div(t[0], deter));
res[1] = FP_TO_INT(fp_div(t[1], deter));
res[2] = FP_TO_INT(fp_div(t[2], deter));
}