Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
NE10_physics.neon.c
1 /*
2  * Copyright 2014-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : physics/NE10_physics.c
30  */
31 
32 #include "NE10_types.h"
33 
34 #ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
35 extern void ne10_physics_compute_aabb_vertex4_vec2f_neon (ne10_mat2x2f_t *aabb,
36  ne10_vec2f_t *vertices,
37  ne10_mat2x2f_t *xf,
38  ne10_vec2f_t *radius,
39  ne10_uint32_t vertex_count)
40 asm ("ne10_physics_compute_aabb_vertex4_vec2f_neon");
41 #endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
42 
43 static inline ne10_vec2f_t ne10_mul_matvec_float (ne10_mat2x2f_t T, ne10_vec2f_t v)
44 {
45  ne10_vec2f_t tmp;
46  ne10_float32_t x = (T.c2.r2 * v.x - T.c2.r1 * v.y) + T.c1.r1;
47  ne10_float32_t y = (T.c2.r1 * v.x + T.c2.r2 * v.y) + T.c1.r2;
48  tmp.x = x;
49  tmp.y = y;
50  return tmp;
51 }
52 
53 static inline ne10_float32_t min (float a, ne10_float32_t b)
54 {
55  return a < b ? a : b;
56 }
57 
58 static inline ne10_vec2f_t min_2f (ne10_vec2f_t a, ne10_vec2f_t b)
59 {
60  ne10_vec2f_t tmp = {min (a.x, b.x), min (a.y, b.y) };
61  return tmp;
62 }
63 
64 static inline ne10_float32_t max (float a, ne10_float32_t b)
65 {
66  return a > b ? a : b;
67 }
68 
69 static inline ne10_vec2f_t max_2f (ne10_vec2f_t a, ne10_vec2f_t b)
70 {
71  ne10_vec2f_t tmp = {max (a.x, b.x), max (a.y, b.y) };
72  return tmp;
73 }
74 
86 #ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
87 void ne10_physics_compute_aabb_vec2f_neon (ne10_mat2x2f_t *aabb,
88  ne10_vec2f_t *vertices,
89  ne10_mat2x2f_t *xf,
90  ne10_vec2f_t *radius,
91  ne10_uint32_t vertex_count)
92 {
93  ne10_int32_t residual_loops = (vertex_count & 0x3);
94  ne10_int32_t main_loops = vertex_count - residual_loops;
95 
96  if (main_loops > 0)
97  {
98  ne10_physics_compute_aabb_vertex4_vec2f_neon (aabb, vertices, xf, radius, main_loops);
99  }
100 
101  if (residual_loops > 0)
102  {
103  ne10_vec2f_t lower;
104  ne10_vec2f_t upper;
105  ne10_vec2f_t lower2;
106  ne10_vec2f_t upper2;
107  ne10_vec2f_t v;
108  ne10_int32_t i;
109 
110  if (main_loops == 0)
111  {
112  lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
113  upper = lower;
114  }
115  else
116  {
117  lower2.x = aabb->c1.r1 + radius->x;
118  lower2.y = aabb->c1.r2 + radius->y;
119  upper2.x = aabb->c2.r1 - radius->x;
120  upper2.y = aabb->c2.r2 - radius->y;
121  lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
122  upper = lower;
123  lower = min_2f (lower, lower2);
124  upper = max_2f (upper, upper2);
125  }
126 
127  for (i = main_loops + 1; i < vertex_count; ++i)
128  {
129  v = ne10_mul_matvec_float (*xf, vertices[i]);
130  lower = min_2f (lower, v);
131  upper = max_2f (upper, v);
132  }
133 
134  aabb->c1.r1 = lower.x - radius->x;
135  aabb->c1.r2 = lower.y - radius->y;
136  aabb->c2.r1 = upper.x + radius->x;
137  aabb->c2.r2 = upper.y + radius->y;
138  }
139 }
140 #endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
a 2-tuple of ne10_float32_t values.
Definition: NE10_types.h:87