@@ -17,7 +17,7 @@ pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
17
17
#[ allow( improper_ctypes) ]
18
18
extern "C" {
19
19
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fabd.v1f64" ) ]
20
- fn vabd_f64_ ( a : float64x1_t , a : float64x1_t ) -> float64x1_t ;
20
+ fn vabd_f64_ ( a : float64x1_t , b : float64x1_t ) -> float64x1_t ;
21
21
}
22
22
vabd_f64_ ( a, b)
23
23
}
@@ -30,7 +30,7 @@ pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
30
30
#[ allow( improper_ctypes) ]
31
31
extern "C" {
32
32
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fabd.v2f64" ) ]
33
- fn vabdq_f64_ ( a : float64x2_t , a : float64x2_t ) -> float64x2_t ;
33
+ fn vabdq_f64_ ( a : float64x2_t , b : float64x2_t ) -> float64x2_t ;
34
34
}
35
35
vabdq_f64_ ( a, b)
36
36
}
@@ -1055,7 +1055,7 @@ pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
1055
1055
#[ allow( improper_ctypes) ]
1056
1056
extern "C" {
1057
1057
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmax.v1f64" ) ]
1058
- fn vmax_f64_ ( a : float64x1_t , a : float64x1_t ) -> float64x1_t ;
1058
+ fn vmax_f64_ ( a : float64x1_t , b : float64x1_t ) -> float64x1_t ;
1059
1059
}
1060
1060
vmax_f64_ ( a, b)
1061
1061
}
@@ -1068,7 +1068,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
1068
1068
#[ allow( improper_ctypes) ]
1069
1069
extern "C" {
1070
1070
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmax.v2f64" ) ]
1071
- fn vmaxq_f64_ ( a : float64x2_t , a : float64x2_t ) -> float64x2_t ;
1071
+ fn vmaxq_f64_ ( a : float64x2_t , b : float64x2_t ) -> float64x2_t ;
1072
1072
}
1073
1073
vmaxq_f64_ ( a, b)
1074
1074
}
@@ -1081,7 +1081,7 @@ pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
1081
1081
#[ allow( improper_ctypes) ]
1082
1082
extern "C" {
1083
1083
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmin.v1f64" ) ]
1084
- fn vmin_f64_ ( a : float64x1_t , a : float64x1_t ) -> float64x1_t ;
1084
+ fn vmin_f64_ ( a : float64x1_t , b : float64x1_t ) -> float64x1_t ;
1085
1085
}
1086
1086
vmin_f64_ ( a, b)
1087
1087
}
@@ -1094,11 +1094,69 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
1094
1094
#[ allow( improper_ctypes) ]
1095
1095
extern "C" {
1096
1096
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmin.v2f64" ) ]
1097
- fn vminq_f64_ ( a : float64x2_t , a : float64x2_t ) -> float64x2_t ;
1097
+ fn vminq_f64_ ( a : float64x2_t , b : float64x2_t ) -> float64x2_t ;
1098
1098
}
1099
1099
vminq_f64_ ( a, b)
1100
1100
}
1101
1101
1102
+ /// Calculates the square root of each lane.
1103
+ #[ inline]
1104
+ #[ target_feature( enable = "neon" ) ]
1105
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1106
+ pub unsafe fn vsqrt_f32 ( a : float32x2_t ) -> float32x2_t {
1107
+ simd_sqrt ( a)
1108
+ }
1109
+
1110
+ /// Calculates the square root of each lane.
1111
+ #[ inline]
1112
+ #[ target_feature( enable = "neon" ) ]
1113
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1114
+ pub unsafe fn vsqrtq_f32 ( a : float32x4_t ) -> float32x4_t {
1115
+ simd_sqrt ( a)
1116
+ }
1117
+
1118
+ /// Calculates the square root of each lane.
1119
+ #[ inline]
1120
+ #[ target_feature( enable = "neon" ) ]
1121
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1122
+ pub unsafe fn vsqrt_f64 ( a : float64x1_t ) -> float64x1_t {
1123
+ simd_sqrt ( a)
1124
+ }
1125
+
1126
+ /// Calculates the square root of each lane.
1127
+ #[ inline]
1128
+ #[ target_feature( enable = "neon" ) ]
1129
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1130
+ pub unsafe fn vsqrtq_f64 ( a : float64x2_t ) -> float64x2_t {
1131
+ simd_sqrt ( a)
1132
+ }
1133
+
1134
+ /// Reciprocal square-root estimate.
1135
+ #[ inline]
1136
+ #[ target_feature( enable = "neon" ) ]
1137
+ #[ cfg_attr( test, assert_instr( frsqrte) ) ]
1138
+ pub unsafe fn vrsqrte_f64 ( a : float64x1_t ) -> float64x1_t {
1139
+ #[ allow( improper_ctypes) ]
1140
+ extern "C" {
1141
+ #[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.frsqrte.v1f64" ) ]
1142
+ fn vrsqrte_f64_ ( a : float64x1_t ) -> float64x1_t ;
1143
+ }
1144
+ vrsqrte_f64_ ( a)
1145
+ }
1146
+
1147
+ /// Reciprocal square-root estimate.
1148
+ #[ inline]
1149
+ #[ target_feature( enable = "neon" ) ]
1150
+ #[ cfg_attr( test, assert_instr( frsqrte) ) ]
1151
+ pub unsafe fn vrsqrteq_f64 ( a : float64x2_t ) -> float64x2_t {
1152
+ #[ allow( improper_ctypes) ]
1153
+ extern "C" {
1154
+ #[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.frsqrte.v2f64" ) ]
1155
+ fn vrsqrteq_f64_ ( a : float64x2_t ) -> float64x2_t ;
1156
+ }
1157
+ vrsqrteq_f64_ ( a)
1158
+ }
1159
+
1102
1160
#[ cfg( test) ]
1103
1161
mod test {
1104
1162
use super :: * ;
@@ -2165,4 +2223,52 @@ mod test {
2165
2223
let r: f64x2 = transmute ( vminq_f64 ( transmute ( a) , transmute ( b) ) ) ;
2166
2224
assert_eq ! ( r, e) ;
2167
2225
}
2226
+
2227
+ #[ simd_test( enable = "neon" ) ]
2228
+ unsafe fn test_vsqrt_f32 ( ) {
2229
+ let a: f32x2 = f32x2:: new ( 4.0 , 9.0 ) ;
2230
+ let e: f32x2 = f32x2:: new ( 2.0 , 3.0 ) ;
2231
+ let r: f32x2 = transmute ( vsqrt_f32 ( transmute ( a) ) ) ;
2232
+ assert_eq ! ( r, e) ;
2233
+ }
2234
+
2235
+ #[ simd_test( enable = "neon" ) ]
2236
+ unsafe fn test_vsqrtq_f32 ( ) {
2237
+ let a: f32x4 = f32x4:: new ( 4.0 , 9.0 , 16.0 , 25.0 ) ;
2238
+ let e: f32x4 = f32x4:: new ( 2.0 , 3.0 , 4.0 , 5.0 ) ;
2239
+ let r: f32x4 = transmute ( vsqrtq_f32 ( transmute ( a) ) ) ;
2240
+ assert_eq ! ( r, e) ;
2241
+ }
2242
+
2243
+ #[ simd_test( enable = "neon" ) ]
2244
+ unsafe fn test_vsqrt_f64 ( ) {
2245
+ let a: f64 = 4.0 ;
2246
+ let e: f64 = 2.0 ;
2247
+ let r: f64 = transmute ( vsqrt_f64 ( transmute ( a) ) ) ;
2248
+ assert_eq ! ( r, e) ;
2249
+ }
2250
+
2251
+ #[ simd_test( enable = "neon" ) ]
2252
+ unsafe fn test_vsqrtq_f64 ( ) {
2253
+ let a: f64x2 = f64x2:: new ( 4.0 , 9.0 ) ;
2254
+ let e: f64x2 = f64x2:: new ( 2.0 , 3.0 ) ;
2255
+ let r: f64x2 = transmute ( vsqrtq_f64 ( transmute ( a) ) ) ;
2256
+ assert_eq ! ( r, e) ;
2257
+ }
2258
+
2259
+ #[ simd_test( enable = "neon" ) ]
2260
+ unsafe fn test_vrsqrte_f64 ( ) {
2261
+ let a: f64 = 1.0 ;
2262
+ let e: f64 = 0.998046875 ;
2263
+ let r: f64 = transmute ( vrsqrte_f64 ( transmute ( a) ) ) ;
2264
+ assert_eq ! ( r, e) ;
2265
+ }
2266
+
2267
+ #[ simd_test( enable = "neon" ) ]
2268
+ unsafe fn test_vrsqrteq_f64 ( ) {
2269
+ let a: f64x2 = f64x2:: new ( 1.0 , 2.0 ) ;
2270
+ let e: f64x2 = f64x2:: new ( 0.998046875 , 0.705078125 ) ;
2271
+ let r: f64x2 = transmute ( vrsqrteq_f64 ( transmute ( a) ) ) ;
2272
+ assert_eq ! ( r, e) ;
2273
+ }
2168
2274
}
0 commit comments