288 lines
7.8 KiB
C
288 lines
7.8 KiB
C
/* Copyright 2014 The Chromium OS Authors. All rights reserved.
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
* found in the LICENSE file.
|
|
*/
|
|
|
|
/* Common math functions. */
|
|
|
|
#include "common.h"
|
|
#include "math.h"
|
|
#include "math_util.h"
|
|
#include "util.h"
|
|
|
|
/* Some useful math functions. Use with integers only! */
|
|
#define SQ(x) ((x) * (x))
|
|
|
|
/* For cosine lookup table, define the increment and the size of the table. */
|
|
#define COSINE_LUT_INCR_DEG 5
|
|
#define COSINE_LUT_SIZE ((180 / COSINE_LUT_INCR_DEG) + 1)
|
|
|
|
/* Lookup table for the value of cosine from 0 degrees to 180 degrees. */
|
|
static const fp_t cos_lut[] = {
|
|
FLOAT_TO_FP( 1.00000), FLOAT_TO_FP( 0.99619), FLOAT_TO_FP( 0.98481),
|
|
FLOAT_TO_FP( 0.96593), FLOAT_TO_FP( 0.93969), FLOAT_TO_FP( 0.90631),
|
|
FLOAT_TO_FP( 0.86603), FLOAT_TO_FP( 0.81915), FLOAT_TO_FP( 0.76604),
|
|
FLOAT_TO_FP( 0.70711), FLOAT_TO_FP( 0.64279), FLOAT_TO_FP( 0.57358),
|
|
FLOAT_TO_FP( 0.50000), FLOAT_TO_FP( 0.42262), FLOAT_TO_FP( 0.34202),
|
|
FLOAT_TO_FP( 0.25882), FLOAT_TO_FP( 0.17365), FLOAT_TO_FP( 0.08716),
|
|
FLOAT_TO_FP( 0.00000), FLOAT_TO_FP(-0.08716), FLOAT_TO_FP(-0.17365),
|
|
FLOAT_TO_FP(-0.25882), FLOAT_TO_FP(-0.34202), FLOAT_TO_FP(-0.42262),
|
|
FLOAT_TO_FP(-0.50000), FLOAT_TO_FP(-0.57358), FLOAT_TO_FP(-0.64279),
|
|
FLOAT_TO_FP(-0.70711), FLOAT_TO_FP(-0.76604), FLOAT_TO_FP(-0.81915),
|
|
FLOAT_TO_FP(-0.86603), FLOAT_TO_FP(-0.90631), FLOAT_TO_FP(-0.93969),
|
|
FLOAT_TO_FP(-0.96593), FLOAT_TO_FP(-0.98481), FLOAT_TO_FP(-0.99619),
|
|
FLOAT_TO_FP(-1.00000),
|
|
};
|
|
BUILD_ASSERT(ARRAY_SIZE(cos_lut) == COSINE_LUT_SIZE);
|
|
|
|
|
|
fp_t arc_cos(fp_t x)
|
|
{
|
|
int i;
|
|
|
|
/* Cap x if out of range. */
|
|
if (x < FLOAT_TO_FP(-1.0))
|
|
x = FLOAT_TO_FP(-1.0);
|
|
else if (x > FLOAT_TO_FP(1.0))
|
|
x = FLOAT_TO_FP(1.0);
|
|
|
|
/*
|
|
* Increment through lookup table to find index and then linearly
|
|
* interpolate for precision.
|
|
*/
|
|
/* TODO(crosbug.com/p/25600): Optimize with binary search. */
|
|
for (i = 0; i < COSINE_LUT_SIZE-1; i++) {
|
|
if (x >= cos_lut[i+1]) {
|
|
const fp_t interp = fp_div(cos_lut[i] - x,
|
|
cos_lut[i] - cos_lut[i + 1]);
|
|
|
|
return fp_mul(INT_TO_FP(COSINE_LUT_INCR_DEG),
|
|
INT_TO_FP(i) + interp);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Shouldn't be possible to get here because inputs are clipped to
|
|
* [-1, 1] and the cos_lut[] table goes over the same range. If we
|
|
* are here, throw an assert.
|
|
*/
|
|
ASSERT(0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Integer square root.
|
|
*/
|
|
#ifdef CONFIG_FPU
|
|
/*
|
|
* Use library sqrtf instruction, if available, since it's usually much faster
|
|
* and smaller. On Cortex-M4, this becomes a single instruction which takes
|
|
* 14 cycles to execute. This produces identical results to binary search,
|
|
* except when the floating point representation of the square root rounds up
|
|
* to an integer.
|
|
*/
|
|
static inline int int_sqrtf(fp_inter_t x)
|
|
{
|
|
return sqrtf(x);
|
|
}
|
|
|
|
/* If the platform support FPU, just return sqrtf. */
|
|
fp_t fp_sqrtf(fp_t x)
|
|
{
|
|
return sqrtf(x);
|
|
}
|
|
#else
|
|
static int int_sqrtf(fp_inter_t x)
|
|
{
|
|
int rmax = INT32_MAX;
|
|
int rmin = 0;
|
|
|
|
/* Short cut if x is 32-bit value */
|
|
if (x < rmax)
|
|
rmax = 0x7fff;
|
|
|
|
/*
|
|
* Just binary-search. There are better algorithms, but we call this
|
|
* infrequently enough it doesn't matter.
|
|
*/
|
|
if (x <= 0)
|
|
return 0; /* Yeah, for imaginary numbers too */
|
|
else if (x > (fp_inter_t)rmax * rmax)
|
|
return rmax;
|
|
|
|
while (1) {
|
|
int r = (rmax + rmin) / 2;
|
|
fp_inter_t r2 = (fp_inter_t)r * r;
|
|
|
|
if (r2 > x) {
|
|
/* Guessed too high */
|
|
rmax = r;
|
|
} else if (r2 < x) {
|
|
/* Guessed too low. Watch out for rounding! */
|
|
if (rmin == r)
|
|
return r;
|
|
|
|
rmin = r;
|
|
} else {
|
|
/* Bullseye! */
|
|
return r;
|
|
}
|
|
}
|
|
}
|
|
|
|
fp_t fp_sqrtf(fp_t x)
|
|
{
|
|
fp_inter_t preshift_x = (fp_inter_t)x << FP_BITS;
|
|
|
|
return int_sqrtf(preshift_x);
|
|
}
|
|
#endif /* CONFIG_FPU */
|
|
|
|
int vector_magnitude(const intv3_t v)
|
|
{
|
|
fp_inter_t sum = (fp_inter_t)v[0] * v[0] +
|
|
(fp_inter_t)v[1] * v[1] +
|
|
(fp_inter_t)v[2] * v[2];
|
|
|
|
return int_sqrtf(sum);
|
|
}
|
|
|
|
/* cross_product only works if the vectors magnitudes are around 1<<16. */
|
|
void cross_product(const intv3_t v1, const intv3_t v2, intv3_t v)
|
|
{
|
|
v[X] = (fp_inter_t)v1[Y] * v2[Z] - (fp_inter_t)v1[Z] * v2[Y];
|
|
v[Y] = (fp_inter_t)v1[Z] * v2[X] - (fp_inter_t)v1[X] * v2[Z];
|
|
v[Z] = (fp_inter_t)v1[X] * v2[Y] - (fp_inter_t)v1[Y] * v2[X];
|
|
}
|
|
|
|
fp_inter_t dot_product(const intv3_t v1, const intv3_t v2)
|
|
{
|
|
return (fp_inter_t)v1[X] * v2[X] +
|
|
(fp_inter_t)v1[Y] * v2[Y] +
|
|
(fp_inter_t)v1[Z] * v2[Z];
|
|
}
|
|
|
|
void vector_scale(intv3_t v, fp_t s)
|
|
{
|
|
v[X] = fp_mul(v[X], s);
|
|
v[Y] = fp_mul(v[Y], s);
|
|
v[Z] = fp_mul(v[Z], s);
|
|
}
|
|
|
|
fp_t cosine_of_angle_diff(const intv3_t v1, const intv3_t v2)
|
|
{
|
|
fp_inter_t dotproduct;
|
|
fp_inter_t denominator;
|
|
|
|
/*
|
|
* Angle between two vectors is acos(A dot B / |A|*|B|). To return
|
|
* cosine of angle between vectors, then don't do acos operation.
|
|
*/
|
|
dotproduct = dot_product(v1, v2);
|
|
|
|
denominator = (fp_inter_t)vector_magnitude(v1) * vector_magnitude(v2);
|
|
|
|
/* Check for divide by 0 although extremely unlikely. */
|
|
if (!denominator)
|
|
return 0;
|
|
|
|
/*
|
|
* We must shift the dot product first, so that we can represent
|
|
* fractions. The answer is always a number with magnitude < 1.0, so
|
|
* if we don't shift, it will always round down to 0.
|
|
*
|
|
* Note that overflow is possible if the dot product is large (that is,
|
|
* if the vector components are of size (31 - FP_BITS/2) bits. If that
|
|
* ever becomes a problem, we could detect this by counting the leading
|
|
* zeroes of the dot product and shifting the denominator down
|
|
* partially instead of shifting the dot product up. With the current
|
|
* FP_BITS=16, that happens if the vector components are ~2^23. Which
|
|
* we're a long way away from; the vector components used in
|
|
* accelerometer calculations are ~2^11.
|
|
*/
|
|
return fp_div(dotproduct, denominator);
|
|
}
|
|
|
|
/*
|
|
* rotate a vector v
|
|
* - support input v and output res are the same vector
|
|
*/
|
|
void rotate(const intv3_t v, const mat33_fp_t R, intv3_t res)
|
|
{
|
|
fp_inter_t t[3];
|
|
|
|
if (R == NULL) {
|
|
if (v != res)
|
|
memcpy(res, v, sizeof(intv3_t));
|
|
return;
|
|
}
|
|
|
|
|
|
/* Rotate */
|
|
t[0] = (fp_inter_t)v[0] * R[0][0] +
|
|
(fp_inter_t)v[1] * R[1][0] +
|
|
(fp_inter_t)v[2] * R[2][0];
|
|
t[1] = (fp_inter_t)v[0] * R[0][1] +
|
|
(fp_inter_t)v[1] * R[1][1] +
|
|
(fp_inter_t)v[2] * R[2][1];
|
|
t[2] = (fp_inter_t)v[0] * R[0][2] +
|
|
(fp_inter_t)v[1] * R[1][2] +
|
|
(fp_inter_t)v[2] * R[2][2];
|
|
|
|
/* Scale by fixed point shift when writing back to result */
|
|
res[0] = FP_TO_INT(t[0]);
|
|
res[1] = FP_TO_INT(t[1]);
|
|
res[2] = FP_TO_INT(t[2]);
|
|
}
|
|
|
|
void rotate_inv(const intv3_t v, const mat33_fp_t R, intv3_t res)
|
|
{
|
|
fp_inter_t t[3];
|
|
fp_t deter;
|
|
|
|
if (R == NULL) {
|
|
if (v != res)
|
|
memcpy(res, v, sizeof(intv3_t));
|
|
return;
|
|
}
|
|
|
|
deter = fp_mul(R[0][0], (fp_mul(R[1][1], R[2][2]) -
|
|
fp_mul(R[2][1], R[1][2]))) -
|
|
fp_mul(R[0][1], (fp_mul(R[1][0], R[2][2]) -
|
|
fp_mul(R[1][2], R[2][0]))) +
|
|
fp_mul(R[0][2], (fp_mul(R[1][0], R[2][1]) -
|
|
fp_mul(R[1][1], R[2][0])));
|
|
|
|
/*
|
|
* invert the matrix: from
|
|
* http://stackoverflow.com/questions/983999/
|
|
* simple-3x3-matrix-inverse-code-c
|
|
*/
|
|
t[0] = (fp_inter_t)v[0] * (fp_mul(R[1][1], R[2][2]) -
|
|
fp_mul(R[2][1], R[1][2])) -
|
|
(fp_inter_t)v[1] * (fp_mul(R[1][0], R[2][2]) -
|
|
fp_mul(R[1][2], R[2][0])) +
|
|
(fp_inter_t)v[2] * (fp_mul(R[1][0], R[2][1]) -
|
|
fp_mul(R[2][0], R[1][1]));
|
|
|
|
t[1] = (fp_inter_t)v[0] * (fp_mul(R[0][1], R[2][2]) -
|
|
fp_mul(R[0][2], R[2][1])) * -1 +
|
|
(fp_inter_t)v[1] * (fp_mul(R[0][0], R[2][2]) -
|
|
fp_mul(R[0][2], R[2][0])) -
|
|
(fp_inter_t)v[2] * (fp_mul(R[0][0], R[2][1]) -
|
|
fp_mul(R[2][0], R[0][1]));
|
|
|
|
t[2] = (fp_inter_t)v[0] * (fp_mul(R[0][1], R[1][2]) -
|
|
fp_mul(R[0][2], R[1][1])) -
|
|
(fp_inter_t)v[1] * (fp_mul(R[0][0], R[1][2]) -
|
|
fp_mul(R[1][0], R[0][2])) +
|
|
(fp_inter_t)v[2] * (fp_mul(R[0][0], R[1][1]) -
|
|
fp_mul(R[1][0], R[0][1]));
|
|
|
|
/* Scale by fixed point shift when writing back to result */
|
|
res[0] = FP_TO_INT(fp_div(t[0], deter));
|
|
res[1] = FP_TO_INT(fp_div(t[1], deter));
|
|
res[2] = FP_TO_INT(fp_div(t[2], deter));
|
|
}
|