Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Yuwei Xiao
eigen
Commits
cc5d7ff5
Commit
cc5d7ff5
authored
Feb 10, 2015
by
Benoit Steiner
Browse files
Added vectorized implementation of the exponential function for ARM/NEON
parent
c3f3580b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Eigen/Core
View file @
cc5d7ff5
...
...
@@ -300,6 +300,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/AltiVec/Complex.h"
#elif defined EIGEN_VECTORIZE_NEON
#include "src/Core/arch/NEON/PacketMath.h"
#include "src/Core/arch/NEON/MathFunctions.h"
#include "src/Core/arch/NEON/Complex.h"
#endif
...
...
Eigen/src/Core/arch/NEON/MathFunctions.h
0 → 100644
View file @
cc5d7ff5
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* The sin, cos, exp, and log functions of this file come from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/
#ifndef EIGEN_MATH_FUNCTIONS_NEON_H
#define EIGEN_MATH_FUNCTIONS_NEON_H
namespace
Eigen
{
namespace
internal
{
template
<
>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet4f
pexp
<
Packet4f
>
(
const
Packet4f
&
_x
)
{
Packet4f
x
=
_x
;
Packet4f
tmp
,
fx
;
_EIGEN_DECLARE_CONST_Packet4f
(
1
,
1.0
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
half
,
0.5
f
);
_EIGEN_DECLARE_CONST_Packet4i
(
0x7f
,
0x7f
);
_EIGEN_DECLARE_CONST_Packet4f
(
exp_hi
,
88.3762626647950
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
exp_lo
,
-
88.3762626647949
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_LOG2EF
,
1.44269504088896341
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_C1
,
0.693359375
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_C2
,
-
2.12194440e-4
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_p0
,
1.9875691500E-4
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_p1
,
1.3981999507E-3
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_p2
,
8.3334519073E-3
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_p3
,
4.1665795894E-2
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_p4
,
1.6666665459E-1
f
);
_EIGEN_DECLARE_CONST_Packet4f
(
cephes_exp_p5
,
5.0000001201E-1
f
);
x
=
vminq_f32
(
x
,
p4f_exp_hi
);
x
=
vmaxq_f32
(
x
,
p4f_exp_lo
);
/* express exp(x) as exp(g + n*log(2)) */
fx
=
vmlaq_f32
(
p4f_half
,
x
,
p4f_cephes_LOG2EF
);
/* perform a floorf */
tmp
=
vcvtq_f32_s32
(
vcvtq_s32_f32
(
fx
));
/* if greater, substract 1 */
Packet4ui
mask
=
vcgtq_f32
(
tmp
,
fx
);
mask
=
vandq_u32
(
mask
,
vreinterpretq_u32_f32
(
p4f_1
));
fx
=
vsubq_f32
(
tmp
,
vreinterpretq_f32_u32
(
mask
));
tmp
=
vmulq_f32
(
fx
,
p4f_cephes_exp_C1
);
Packet4f
z
=
vmulq_f32
(
fx
,
p4f_cephes_exp_C2
);
x
=
vsubq_f32
(
x
,
tmp
);
x
=
vsubq_f32
(
x
,
z
);
Packet4f
y
=
vmulq_f32
(
p4f_cephes_exp_p0
,
x
);
z
=
vmulq_f32
(
x
,
x
);
y
=
vaddq_f32
(
y
,
p4f_cephes_exp_p1
);
y
=
vmulq_f32
(
y
,
x
);
y
=
vaddq_f32
(
y
,
p4f_cephes_exp_p2
);
y
=
vmulq_f32
(
y
,
x
);
y
=
vaddq_f32
(
y
,
p4f_cephes_exp_p3
);
y
=
vmulq_f32
(
y
,
x
);
y
=
vaddq_f32
(
y
,
p4f_cephes_exp_p4
);
y
=
vmulq_f32
(
y
,
x
);
y
=
vaddq_f32
(
y
,
p4f_cephes_exp_p5
);
y
=
vmulq_f32
(
y
,
z
);
y
=
vaddq_f32
(
y
,
x
);
y
=
vaddq_f32
(
y
,
p4f_1
);
/* build 2^n */
int32x4_t
mm
;
mm
=
vcvtq_s32_f32
(
fx
);
mm
=
vaddq_s32
(
mm
,
p4i_0x7f
);
mm
=
vshlq_n_s32
(
mm
,
23
);
Packet4f
pow2n
=
vreinterpretq_f32_s32
(
mm
);
y
=
vmulq_f32
(
y
,
pow2n
);
return
y
;
}
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_NEON_H
Eigen/src/Core/arch/NEON/PacketMath.h
View file @
cc5d7ff5
...
...
@@ -88,7 +88,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasSin
=
0
,
HasCos
=
0
,
HasLog
=
0
,
HasExp
=
0
,
HasExp
=
1
,
HasSqrt
=
0
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment