1 | /* |
---|
2 | |
---|
3 | This WSM5 microphysics accelerated for the NVIDIA GPU. It is experimental and |
---|
4 | is not supported as part of WRF. There is additional information available |
---|
5 | at http://www.mmm.ucar.edu/people/michalakes. Requests for assistance will be |
---|
6 | considered only on a case by case basis, favoring active collaborators. |
---|
7 | |
---|
8 | Required: a Linux x86 or x86_64 system with a CUDA-enabled NVIDIA GPU installed |
---|
9 | as a co-processor as well as the CUDA libraries on a directory in your system, |
---|
10 | for example: |
---|
11 | |
---|
12 | /usr/local/cuda/lib/libcublas.so |
---|
13 | |
---|
14 | included in the CUDA SDK 1.1 from NVIDIA (see nvidia.com). |
---|
15 | |
---|
16 | To use with WRF: |
---|
17 | |
---|
18 | 1) Compile this file and companion file as: |
---|
19 | |
---|
20 | gcc -c wsm5.cu.c |
---|
21 | gcc -c wsm5_gpu.cu.c |
---|
22 | |
---|
23 | producing wsm5.cu.o and wsm5_gpu.cu.o |
---|
24 | |
---|
25 | 2) configure WRF, generating a configure.wrf file for your system |
---|
26 | Note that serial and dmpar work with the GPU, but smpar |
---|
27 | and dm+sm may not. |
---|
28 | |
---|
29 | 3) Modify configure.wrf: |
---|
30 | |
---|
31 | a) add -DTEST_ON_GPU_RK -DRUN_ON_GPU to ARCH_LOCAL |
---|
32 | b) add ../phys/wsm5.cu.o and ../phys/wsm5_gpu.cu.o to LIB_LOCAL |
---|
33 | (define LIB_LOCAL it does not already exist) |
---|
34 | c) add -L/usr/local/cuda/lib -lcuda -lcudart to LIB_LOCAL |
---|
35 | (or wherever the cuda lib is on your system) |
---|
36 | |
---|
37 | 3) Compile wrf as usual. |
---|
38 | |
---|
39 | Note: The GPU code is compiled for a maximum number of 41 vertical levels |
---|
40 | If you need a larger number, contact below. |
---|
41 | |
---|
42 | 20080721, JM (michalak@ucar.edu) |
---|
43 | |
---|
44 | */ |
---|
45 | |
---|
46 | # 1 "/tmp/tmpxft_00001ecc_00000000-0.c" |
---|
47 | # 1 "<built-in>" |
---|
48 | # 1 "<command line>" |
---|
49 | # 1 "/tmp/tmpxft_00001ecc_00000000-0.c" |
---|
50 | # 1 "y.cu" |
---|
51 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
52 | struct _Z7textureIcLi1EL19cudaTextureReadMode0EE; |
---|
53 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
54 | struct _Z7textureIaLi1EL19cudaTextureReadMode0EE; |
---|
55 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
56 | struct _Z7textureIhLi1EL19cudaTextureReadMode0EE; |
---|
57 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
58 | struct _Z7textureI5char1Li1EL19cudaTextureReadMode0EE; |
---|
59 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
60 | struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode0EE; |
---|
61 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
62 | struct _Z7textureI5char2Li1EL19cudaTextureReadMode0EE; |
---|
63 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
64 | struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode0EE; |
---|
65 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
66 | struct _Z7textureI5char3Li1EL19cudaTextureReadMode0EE; |
---|
67 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
68 | struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode0EE; |
---|
69 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
70 | struct _Z7textureI5char4Li1EL19cudaTextureReadMode0EE; |
---|
71 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
72 | struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode0EE; |
---|
73 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
74 | struct _Z7textureIsLi1EL19cudaTextureReadMode0EE; |
---|
75 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
76 | struct _Z7textureItLi1EL19cudaTextureReadMode0EE; |
---|
77 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
78 | struct _Z7textureI6short1Li1EL19cudaTextureReadMode0EE; |
---|
79 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
80 | struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode0EE; |
---|
81 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
82 | struct _Z7textureI6short2Li1EL19cudaTextureReadMode0EE; |
---|
83 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
84 | struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode0EE; |
---|
85 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
86 | struct _Z7textureI6short3Li1EL19cudaTextureReadMode0EE; |
---|
87 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
88 | struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode0EE; |
---|
89 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
90 | struct _Z7textureI6short4Li1EL19cudaTextureReadMode0EE; |
---|
91 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
92 | struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode0EE; |
---|
93 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
94 | struct _Z7textureIiLi1EL19cudaTextureReadMode0EE; |
---|
95 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
96 | struct _Z7textureIjLi1EL19cudaTextureReadMode0EE; |
---|
97 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
98 | struct _Z7textureI4int1Li1EL19cudaTextureReadMode0EE; |
---|
99 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
100 | struct _Z7textureI5uint1Li1EL19cudaTextureReadMode0EE; |
---|
101 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
102 | struct _Z7textureI4int2Li1EL19cudaTextureReadMode0EE; |
---|
103 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
104 | struct _Z7textureI5uint2Li1EL19cudaTextureReadMode0EE; |
---|
105 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
106 | struct _Z7textureI4int3Li1EL19cudaTextureReadMode0EE; |
---|
107 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
108 | struct _Z7textureI5uint3Li1EL19cudaTextureReadMode0EE; |
---|
109 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
110 | struct _Z7textureI4int4Li1EL19cudaTextureReadMode0EE; |
---|
111 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
112 | struct _Z7textureI5uint4Li1EL19cudaTextureReadMode0EE; |
---|
113 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
114 | struct _Z7textureIcLi1EL19cudaTextureReadMode1EE; |
---|
115 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
116 | struct _Z7textureIaLi1EL19cudaTextureReadMode1EE; |
---|
117 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
118 | struct _Z7textureIhLi1EL19cudaTextureReadMode1EE; |
---|
119 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
120 | struct _Z7textureI5char1Li1EL19cudaTextureReadMode1EE; |
---|
121 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
122 | struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode1EE; |
---|
123 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
124 | struct _Z7textureI5char2Li1EL19cudaTextureReadMode1EE; |
---|
125 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
126 | struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode1EE; |
---|
127 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
128 | struct _Z7textureI5char3Li1EL19cudaTextureReadMode1EE; |
---|
129 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
130 | struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode1EE; |
---|
131 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
132 | struct _Z7textureI5char4Li1EL19cudaTextureReadMode1EE; |
---|
133 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
134 | struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode1EE; |
---|
135 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
136 | struct _Z7textureIsLi1EL19cudaTextureReadMode1EE; |
---|
137 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
138 | struct _Z7textureItLi1EL19cudaTextureReadMode1EE; |
---|
139 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
140 | struct _Z7textureI6short1Li1EL19cudaTextureReadMode1EE; |
---|
141 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
142 | struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode1EE; |
---|
143 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
144 | struct _Z7textureI6short2Li1EL19cudaTextureReadMode1EE; |
---|
145 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
146 | struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode1EE; |
---|
147 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
148 | struct _Z7textureI6short3Li1EL19cudaTextureReadMode1EE; |
---|
149 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
150 | struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode1EE; |
---|
151 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
152 | struct _Z7textureI6short4Li1EL19cudaTextureReadMode1EE; |
---|
153 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
154 | struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode1EE; |
---|
155 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
156 | struct _Z7textureIcLi2EL19cudaTextureReadMode0EE; |
---|
157 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
158 | struct _Z7textureIaLi2EL19cudaTextureReadMode0EE; |
---|
159 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
160 | struct _Z7textureIhLi2EL19cudaTextureReadMode0EE; |
---|
161 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
162 | struct _Z7textureI5char1Li2EL19cudaTextureReadMode0EE; |
---|
163 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
164 | struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode0EE; |
---|
165 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
166 | struct _Z7textureI5char2Li2EL19cudaTextureReadMode0EE; |
---|
167 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
168 | struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode0EE; |
---|
169 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
170 | struct _Z7textureI5char3Li2EL19cudaTextureReadMode0EE; |
---|
171 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
172 | struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode0EE; |
---|
173 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
174 | struct _Z7textureI5char4Li2EL19cudaTextureReadMode0EE; |
---|
175 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
176 | struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode0EE; |
---|
177 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
178 | struct _Z7textureIsLi2EL19cudaTextureReadMode0EE; |
---|
179 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
180 | struct _Z7textureItLi2EL19cudaTextureReadMode0EE; |
---|
181 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
182 | struct _Z7textureI6short1Li2EL19cudaTextureReadMode0EE; |
---|
183 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
184 | struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode0EE; |
---|
185 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
186 | struct _Z7textureI6short2Li2EL19cudaTextureReadMode0EE; |
---|
187 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
188 | struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode0EE; |
---|
189 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
190 | struct _Z7textureI6short3Li2EL19cudaTextureReadMode0EE; |
---|
191 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
192 | struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode0EE; |
---|
193 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
194 | struct _Z7textureI6short4Li2EL19cudaTextureReadMode0EE; |
---|
195 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
196 | struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode0EE; |
---|
197 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
198 | struct _Z7textureIiLi2EL19cudaTextureReadMode0EE; |
---|
199 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
200 | struct _Z7textureIjLi2EL19cudaTextureReadMode0EE; |
---|
201 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
202 | struct _Z7textureI4int1Li2EL19cudaTextureReadMode0EE; |
---|
203 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
204 | struct _Z7textureI5uint1Li2EL19cudaTextureReadMode0EE; |
---|
205 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
206 | struct _Z7textureI4int2Li2EL19cudaTextureReadMode0EE; |
---|
207 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
208 | struct _Z7textureI5uint2Li2EL19cudaTextureReadMode0EE; |
---|
209 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
210 | struct _Z7textureI4int3Li2EL19cudaTextureReadMode0EE; |
---|
211 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
212 | struct _Z7textureI5uint3Li2EL19cudaTextureReadMode0EE; |
---|
213 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
214 | struct _Z7textureI4int4Li2EL19cudaTextureReadMode0EE; |
---|
215 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
216 | struct _Z7textureI5uint4Li2EL19cudaTextureReadMode0EE; |
---|
217 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
218 | struct _Z7textureIiLi1EL19cudaTextureReadMode1EE; |
---|
219 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
220 | struct _Z7textureIjLi1EL19cudaTextureReadMode1EE; |
---|
221 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
222 | struct _Z7textureI4int1Li1EL19cudaTextureReadMode1EE; |
---|
223 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
224 | struct _Z7textureI5uint1Li1EL19cudaTextureReadMode1EE; |
---|
225 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
226 | struct _Z7textureI4int2Li1EL19cudaTextureReadMode1EE; |
---|
227 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
228 | struct _Z7textureI5uint2Li1EL19cudaTextureReadMode1EE; |
---|
229 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
230 | struct _Z7textureI4int3Li1EL19cudaTextureReadMode1EE; |
---|
231 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
232 | struct _Z7textureI5uint3Li1EL19cudaTextureReadMode1EE; |
---|
233 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
234 | struct _Z7textureI4int4Li1EL19cudaTextureReadMode1EE; |
---|
235 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
236 | struct _Z7textureI5uint4Li1EL19cudaTextureReadMode1EE; |
---|
237 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
238 | struct _Z7textureIcLi2EL19cudaTextureReadMode1EE; |
---|
239 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
240 | struct _Z7textureIaLi2EL19cudaTextureReadMode1EE; |
---|
241 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
242 | struct _Z7textureIhLi2EL19cudaTextureReadMode1EE; |
---|
243 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
244 | struct _Z7textureI5char1Li2EL19cudaTextureReadMode1EE; |
---|
245 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
246 | struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode1EE; |
---|
247 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
248 | struct _Z7textureI5char2Li2EL19cudaTextureReadMode1EE; |
---|
249 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
250 | struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode1EE; |
---|
251 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
252 | struct _Z7textureI5char3Li2EL19cudaTextureReadMode1EE; |
---|
253 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
254 | struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode1EE; |
---|
255 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
256 | struct _Z7textureI5char4Li2EL19cudaTextureReadMode1EE; |
---|
257 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
258 | struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode1EE; |
---|
259 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
260 | struct _Z7textureIsLi2EL19cudaTextureReadMode1EE; |
---|
261 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
262 | struct _Z7textureItLi2EL19cudaTextureReadMode1EE; |
---|
263 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
264 | struct _Z7textureI6short1Li2EL19cudaTextureReadMode1EE; |
---|
265 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
266 | struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode1EE; |
---|
267 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
268 | struct _Z7textureI6short2Li2EL19cudaTextureReadMode1EE; |
---|
269 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
270 | struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode1EE; |
---|
271 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
272 | struct _Z7textureI6short3Li2EL19cudaTextureReadMode1EE; |
---|
273 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
274 | struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode1EE; |
---|
275 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
276 | struct _Z7textureI6short4Li2EL19cudaTextureReadMode1EE; |
---|
277 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
278 | struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode1EE; |
---|
279 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
280 | struct _Z7textureIiLi2EL19cudaTextureReadMode1EE; |
---|
281 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
282 | struct _Z7textureIjLi2EL19cudaTextureReadMode1EE; |
---|
283 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
284 | struct _Z7textureI4int1Li2EL19cudaTextureReadMode1EE; |
---|
285 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
286 | struct _Z7textureI5uint1Li2EL19cudaTextureReadMode1EE; |
---|
287 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
288 | struct _Z7textureI4int2Li2EL19cudaTextureReadMode1EE; |
---|
289 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
290 | struct _Z7textureI5uint2Li2EL19cudaTextureReadMode1EE; |
---|
291 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
292 | struct _Z7textureI4int3Li2EL19cudaTextureReadMode1EE; |
---|
293 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
294 | struct _Z7textureI5uint3Li2EL19cudaTextureReadMode1EE; |
---|
295 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
296 | struct _Z7textureI4int4Li2EL19cudaTextureReadMode1EE; |
---|
297 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
298 | struct _Z7textureI5uint4Li2EL19cudaTextureReadMode1EE; |
---|
299 | # 268 "/usr/include/libio.h" 3 |
---|
300 | struct _IO_FILE; |
---|
301 | # 214 "/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include/stddef.h" 3 |
---|
302 | typedef unsigned long size_t; |
---|
303 | # 1 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 1 3 |
---|
304 | # 56 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 3 |
---|
305 | typedef char bool; |
---|
306 | |
---|
307 | |
---|
308 | |
---|
309 | # 1 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 1 3 |
---|
310 | # 48 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3 |
---|
311 | # 1 "/usr/local/cuda/bin/../include/host_defines.h" 1 3 |
---|
312 | # 49 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 2 3 |
---|
313 | # 1 "/usr/local/cuda/bin/../include/builtin_types.h" 1 3 |
---|
314 | # 42 "/usr/local/cuda/bin/../include/builtin_types.h" 3 |
---|
315 | # 1 "/usr/local/cuda/bin/../include/device_types.h" 1 3 |
---|
316 | # 46 "/usr/local/cuda/bin/../include/device_types.h" 3 |
---|
317 | enum cudaRoundMode |
---|
318 | { |
---|
319 | cudaRoundNearest, |
---|
320 | cudaRoundZero, |
---|
321 | cudaRoundPosInf, |
---|
322 | cudaRoundMinInf |
---|
323 | }; |
---|
324 | # 43 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3 |
---|
325 | # 1 "/usr/local/cuda/bin/../include/driver_types.h" 1 3 |
---|
326 | # 60 "/usr/local/cuda/bin/../include/driver_types.h" 3 |
---|
327 | enum cudaError |
---|
328 | { |
---|
329 | cudaSuccess = 0, |
---|
330 | cudaErrorMissingConfiguration, |
---|
331 | cudaErrorMemoryAllocation, |
---|
332 | cudaErrorInitializationError, |
---|
333 | cudaErrorLaunchFailure, |
---|
334 | cudaErrorPriorLaunchFailure, |
---|
335 | cudaErrorLaunchTimeout, |
---|
336 | cudaErrorLaunchOutOfResources, |
---|
337 | cudaErrorInvalidDeviceFunction, |
---|
338 | cudaErrorInvalidConfiguration, |
---|
339 | cudaErrorInvalidDevice, |
---|
340 | cudaErrorInvalidValue, |
---|
341 | cudaErrorInvalidPitchValue, |
---|
342 | cudaErrorInvalidSymbol, |
---|
343 | cudaErrorMapBufferObjectFailed, |
---|
344 | cudaErrorUnmapBufferObjectFailed, |
---|
345 | cudaErrorInvalidHostPointer, |
---|
346 | cudaErrorInvalidDevicePointer, |
---|
347 | cudaErrorInvalidTexture, |
---|
348 | cudaErrorInvalidTextureBinding, |
---|
349 | cudaErrorInvalidChannelDescriptor, |
---|
350 | cudaErrorInvalidMemcpyDirection, |
---|
351 | cudaErrorAddressOfConstant, |
---|
352 | cudaErrorTextureFetchFailed, |
---|
353 | cudaErrorTextureNotBound, |
---|
354 | cudaErrorSynchronizationError, |
---|
355 | cudaErrorInvalidFilterSetting, |
---|
356 | cudaErrorInvalidNormSetting, |
---|
357 | cudaErrorMixedDeviceExecution, |
---|
358 | cudaErrorCudartUnloading, |
---|
359 | cudaErrorUnknown, |
---|
360 | cudaErrorNotYetImplemented, |
---|
361 | cudaErrorMemoryValueTooLarge, |
---|
362 | cudaErrorInvalidResourceHandle, |
---|
363 | cudaErrorNotReady, |
---|
364 | cudaErrorStartupFailure = 0x7f, |
---|
365 | cudaErrorApiFailureBase = 10000 |
---|
366 | }; |
---|
367 | |
---|
368 | |
---|
369 | enum cudaMemcpyKind |
---|
370 | { |
---|
371 | cudaMemcpyHostToHost = 0, |
---|
372 | cudaMemcpyHostToDevice, |
---|
373 | cudaMemcpyDeviceToHost, |
---|
374 | cudaMemcpyDeviceToDevice |
---|
375 | }; |
---|
376 | |
---|
377 | |
---|
378 | struct cudaDeviceProp |
---|
379 | { |
---|
380 | char name[256]; |
---|
381 | size_t totalGlobalMem; |
---|
382 | size_t sharedMemPerBlock; |
---|
383 | int regsPerBlock; |
---|
384 | int warpSize; |
---|
385 | size_t memPitch; |
---|
386 | int maxThreadsPerBlock; |
---|
387 | int maxThreadsDim[3]; |
---|
388 | int maxGridSize[3]; |
---|
389 | size_t totalConstMem; |
---|
390 | int major; |
---|
391 | int minor; |
---|
392 | int clockRate; |
---|
393 | size_t textureAlignment; |
---|
394 | }; |
---|
395 | # 154 "/usr/local/cuda/bin/../include/driver_types.h" 3 |
---|
396 | typedef enum cudaError cudaError_t; |
---|
397 | |
---|
398 | |
---|
399 | typedef int cudaStream_t; |
---|
400 | |
---|
401 | |
---|
402 | typedef int cudaEvent_t; |
---|
403 | # 44 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3 |
---|
404 | # 1 "/usr/local/cuda/bin/../include/texture_types.h" 1 3 |
---|
405 | # 46 "/usr/local/cuda/bin/../include/texture_types.h" 3 |
---|
406 | struct cudaArray; |
---|
407 | |
---|
408 | |
---|
409 | enum cudaChannelFormatKind |
---|
410 | { |
---|
411 | cudaChannelFormatKindSigned, |
---|
412 | cudaChannelFormatKindUnsigned, |
---|
413 | cudaChannelFormatKindFloat |
---|
414 | }; |
---|
415 | |
---|
416 | |
---|
417 | struct cudaChannelFormatDesc |
---|
418 | { |
---|
419 | int x; |
---|
420 | int y; |
---|
421 | int z; |
---|
422 | int w; |
---|
423 | enum cudaChannelFormatKind f; |
---|
424 | }; |
---|
425 | |
---|
426 | |
---|
427 | enum cudaTextureAddressMode |
---|
428 | { |
---|
429 | cudaAddressModeWrap, |
---|
430 | cudaAddressModeClamp |
---|
431 | }; |
---|
432 | |
---|
433 | |
---|
434 | enum cudaTextureFilterMode |
---|
435 | { |
---|
436 | cudaFilterModePoint, |
---|
437 | cudaFilterModeLinear |
---|
438 | }; |
---|
439 | |
---|
440 | |
---|
441 | enum cudaTextureReadMode |
---|
442 | { |
---|
443 | cudaReadModeElementType, |
---|
444 | cudaReadModeNormalizedFloat |
---|
445 | }; |
---|
446 | |
---|
447 | |
---|
448 | struct textureReference |
---|
449 | { |
---|
450 | int normalized; |
---|
451 | enum cudaTextureFilterMode filterMode; |
---|
452 | enum cudaTextureAddressMode addressMode[2]; |
---|
453 | struct cudaChannelFormatDesc channelDesc; |
---|
454 | }; |
---|
455 | # 45 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3 |
---|
456 | # 1 "/usr/local/cuda/bin/../include/vector_types.h" 1 3 |
---|
457 | # 54 "/usr/local/cuda/bin/../include/vector_types.h" 3 |
---|
458 | struct char1 |
---|
459 | { |
---|
460 | signed char x; |
---|
461 | }; |
---|
462 | |
---|
463 | |
---|
464 | struct uchar1 |
---|
465 | { |
---|
466 | unsigned char x; |
---|
467 | }; |
---|
468 | |
---|
469 | |
---|
470 | struct char2 |
---|
471 | { |
---|
472 | signed char x, y; |
---|
473 | }; |
---|
474 | |
---|
475 | |
---|
476 | struct uchar2 |
---|
477 | { |
---|
478 | unsigned char x, y; |
---|
479 | }; |
---|
480 | |
---|
481 | |
---|
482 | struct char3 |
---|
483 | { |
---|
484 | signed char x, y, z; |
---|
485 | }; |
---|
486 | |
---|
487 | |
---|
488 | struct uchar3 |
---|
489 | { |
---|
490 | unsigned char x, y, z; |
---|
491 | }; |
---|
492 | |
---|
493 | |
---|
494 | struct char4 |
---|
495 | { |
---|
496 | signed char x, y, z, w; |
---|
497 | }; |
---|
498 | |
---|
499 | |
---|
500 | struct uchar4 |
---|
501 | { |
---|
502 | unsigned char x, y, z, w; |
---|
503 | }; |
---|
504 | |
---|
505 | |
---|
506 | struct short1 |
---|
507 | { |
---|
508 | short x; |
---|
509 | }; |
---|
510 | |
---|
511 | |
---|
512 | struct ushort1 |
---|
513 | { |
---|
514 | unsigned short x; |
---|
515 | }; |
---|
516 | |
---|
517 | |
---|
518 | struct short2 |
---|
519 | { |
---|
520 | short x, y; |
---|
521 | }; |
---|
522 | |
---|
523 | |
---|
524 | struct ushort2 |
---|
525 | { |
---|
526 | unsigned short x, y; |
---|
527 | }; |
---|
528 | |
---|
529 | |
---|
530 | struct short3 |
---|
531 | { |
---|
532 | short x, y, z; |
---|
533 | }; |
---|
534 | |
---|
535 | |
---|
536 | struct ushort3 |
---|
537 | { |
---|
538 | unsigned short x, y, z; |
---|
539 | }; |
---|
540 | |
---|
541 | |
---|
542 | struct short4 |
---|
543 | { |
---|
544 | short x, y, z, w; |
---|
545 | }; |
---|
546 | |
---|
547 | |
---|
548 | struct ushort4 |
---|
549 | { |
---|
550 | unsigned short x, y, z, w; |
---|
551 | }; |
---|
552 | |
---|
553 | |
---|
554 | struct int1 |
---|
555 | { |
---|
556 | int x; |
---|
557 | }; |
---|
558 | |
---|
559 | |
---|
560 | struct uint1 |
---|
561 | { |
---|
562 | unsigned int x; |
---|
563 | }; |
---|
564 | |
---|
565 | |
---|
566 | struct int2 |
---|
567 | { |
---|
568 | int x, y; |
---|
569 | }; |
---|
570 | |
---|
571 | |
---|
572 | struct uint2 |
---|
573 | { |
---|
574 | unsigned int x, y; |
---|
575 | }; |
---|
576 | |
---|
577 | |
---|
578 | struct int3 |
---|
579 | { |
---|
580 | int x, y, z; |
---|
581 | }; |
---|
582 | |
---|
583 | |
---|
584 | struct uint3 |
---|
585 | { |
---|
586 | unsigned int x, y, z; |
---|
587 | }; |
---|
588 | |
---|
589 | |
---|
590 | struct int4 |
---|
591 | { |
---|
592 | int x, y, z, w; |
---|
593 | }; |
---|
594 | |
---|
595 | |
---|
596 | struct uint4 |
---|
597 | { |
---|
598 | unsigned int x, y, z, w; |
---|
599 | }; |
---|
600 | |
---|
601 | |
---|
602 | struct long1 |
---|
603 | { |
---|
604 | long x; |
---|
605 | }; |
---|
606 | |
---|
607 | |
---|
608 | struct ulong1 |
---|
609 | { |
---|
610 | unsigned long x; |
---|
611 | }; |
---|
612 | |
---|
613 | |
---|
614 | struct long2 |
---|
615 | { |
---|
616 | long x, y; |
---|
617 | }; |
---|
618 | |
---|
619 | |
---|
620 | struct ulong2 |
---|
621 | { |
---|
622 | unsigned long x, y; |
---|
623 | }; |
---|
624 | |
---|
625 | |
---|
626 | struct long3 |
---|
627 | { |
---|
628 | long x, y, z; |
---|
629 | }; |
---|
630 | |
---|
631 | |
---|
632 | struct ulong3 |
---|
633 | { |
---|
634 | unsigned long x, y, z; |
---|
635 | }; |
---|
636 | |
---|
637 | |
---|
638 | struct long4 |
---|
639 | { |
---|
640 | long x, y, z, w; |
---|
641 | }; |
---|
642 | |
---|
643 | |
---|
644 | struct ulong4 |
---|
645 | { |
---|
646 | unsigned long x, y, z, w; |
---|
647 | }; |
---|
648 | |
---|
649 | |
---|
650 | struct float1 |
---|
651 | { |
---|
652 | float x; |
---|
653 | }; |
---|
654 | |
---|
655 | |
---|
656 | struct float2 |
---|
657 | { |
---|
658 | float x, y; |
---|
659 | }; |
---|
660 | |
---|
661 | |
---|
662 | struct float3 |
---|
663 | { |
---|
664 | float x, y, z; |
---|
665 | }; |
---|
666 | |
---|
667 | |
---|
668 | struct float4 |
---|
669 | { |
---|
670 | float x, y, z, w; |
---|
671 | }; |
---|
672 | |
---|
673 | |
---|
674 | struct double2 |
---|
675 | { |
---|
676 | double x, y; |
---|
677 | }; |
---|
678 | # 282 "/usr/local/cuda/bin/../include/vector_types.h" 3 |
---|
679 | typedef struct char1 char1; |
---|
680 | |
---|
681 | typedef struct uchar1 uchar1; |
---|
682 | |
---|
683 | typedef struct char2 char2; |
---|
684 | |
---|
685 | typedef struct uchar2 uchar2; |
---|
686 | |
---|
687 | typedef struct char3 char3; |
---|
688 | |
---|
689 | typedef struct uchar3 uchar3; |
---|
690 | |
---|
691 | typedef struct char4 char4; |
---|
692 | |
---|
693 | typedef struct uchar4 uchar4; |
---|
694 | |
---|
695 | typedef struct short1 short1; |
---|
696 | |
---|
697 | typedef struct ushort1 ushort1; |
---|
698 | |
---|
699 | typedef struct short2 short2; |
---|
700 | |
---|
701 | typedef struct ushort2 ushort2; |
---|
702 | |
---|
703 | typedef struct short3 short3; |
---|
704 | |
---|
705 | typedef struct ushort3 ushort3; |
---|
706 | |
---|
707 | typedef struct short4 short4; |
---|
708 | |
---|
709 | typedef struct ushort4 ushort4; |
---|
710 | |
---|
711 | typedef struct int1 int1; |
---|
712 | |
---|
713 | typedef struct uint1 uint1; |
---|
714 | |
---|
715 | typedef struct int2 int2; |
---|
716 | |
---|
717 | typedef struct uint2 uint2; |
---|
718 | |
---|
719 | typedef struct int3 int3; |
---|
720 | |
---|
721 | typedef struct uint3 uint3; |
---|
722 | |
---|
723 | typedef struct int4 int4; |
---|
724 | |
---|
725 | typedef struct uint4 uint4; |
---|
726 | |
---|
727 | typedef struct long1 long1; |
---|
728 | |
---|
729 | typedef struct ulong1 ulong1; |
---|
730 | |
---|
731 | typedef struct long2 long2; |
---|
732 | |
---|
733 | typedef struct ulong2 ulong2; |
---|
734 | |
---|
735 | typedef struct long3 long3; |
---|
736 | |
---|
737 | typedef struct ulong3 ulong3; |
---|
738 | |
---|
739 | typedef struct long4 long4; |
---|
740 | |
---|
741 | typedef struct ulong4 ulong4; |
---|
742 | |
---|
743 | typedef struct float1 float1; |
---|
744 | |
---|
745 | typedef struct float2 float2; |
---|
746 | |
---|
747 | typedef struct float3 float3; |
---|
748 | |
---|
749 | typedef struct float4 float4; |
---|
750 | |
---|
751 | typedef struct double2 double2; |
---|
752 | # 363 "/usr/local/cuda/bin/../include/vector_types.h" 3 |
---|
753 | typedef struct dim3 dim3; |
---|
754 | |
---|
755 | |
---|
756 | struct dim3 |
---|
757 | { |
---|
758 | unsigned int x, y, z; |
---|
759 | |
---|
760 | |
---|
761 | |
---|
762 | |
---|
763 | |
---|
764 | }; |
---|
765 | # 45 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3 |
---|
766 | # 50 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 2 3 |
---|
767 | # 82 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3 |
---|
768 | extern cudaError_t cudaMalloc(void **devPtr, size_t size); |
---|
769 | extern cudaError_t cudaMallocHost(void **ptr, size_t size); |
---|
770 | extern cudaError_t cudaMallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height); |
---|
771 | extern cudaError_t cudaMallocArray(struct cudaArray **array, const struct cudaChannelFormatDesc *desc, size_t width, size_t height ); |
---|
772 | extern cudaError_t cudaFree(void *devPtr); |
---|
773 | extern cudaError_t cudaFreeHost(void *ptr); |
---|
774 | extern cudaError_t cudaFreeArray(struct cudaArray *array); |
---|
775 | # 97 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3 |
---|
776 | extern cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind); |
---|
777 | extern cudaError_t cudaMemcpyToArray(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind); |
---|
778 | extern cudaError_t cudaMemcpyFromArray(void *dst, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind); |
---|
779 | extern cudaError_t cudaMemcpyArrayToArray(struct cudaArray *dst, size_t wOffsetDst, size_t hOffsetDst, const struct cudaArray *src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, enum cudaMemcpyKind kind ); |
---|
780 | extern cudaError_t cudaMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind); |
---|
781 | extern cudaError_t cudaMemcpy2DToArray(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind); |
---|
782 | extern cudaError_t cudaMemcpy2DFromArray(void *dst, size_t dpitch, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind); |
---|
783 | extern cudaError_t cudaMemcpy2DArrayToArray(struct cudaArray *dst, size_t wOffsetDst, size_t hOffsetDst, const struct cudaArray *src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, enum cudaMemcpyKind kind ); |
---|
784 | extern cudaError_t cudaMemcpyToSymbol(const char *symbol, const void *src, size_t count, size_t offset , enum cudaMemcpyKind kind ); |
---|
785 | extern cudaError_t cudaMemcpyFromSymbol(void *dst, const char *symbol, size_t count, size_t offset , enum cudaMemcpyKind kind ); |
---|
786 | |
---|
787 | |
---|
788 | |
---|
789 | |
---|
790 | |
---|
791 | |
---|
792 | |
---|
793 | extern cudaError_t cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream); |
---|
794 | extern cudaError_t cudaMemcpyToArrayAsync(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream); |
---|
795 | extern cudaError_t cudaMemcpyFromArrayAsync(void *dst, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream); |
---|
796 | extern cudaError_t cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream); |
---|
797 | extern cudaError_t cudaMemcpy2DToArrayAsync(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream); |
---|
798 | extern cudaError_t cudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream); |
---|
799 | |
---|
800 | |
---|
801 | |
---|
802 | |
---|
803 | |
---|
804 | |
---|
805 | |
---|
806 | extern cudaError_t cudaMemset(void *mem, int c, size_t count); |
---|
807 | extern cudaError_t cudaMemset2D(void *mem, size_t pitch, int c, size_t width, size_t height); |
---|
808 | |
---|
809 | |
---|
810 | |
---|
811 | |
---|
812 | |
---|
813 | |
---|
814 | |
---|
815 | extern cudaError_t cudaGetSymbolAddress(void **devPtr, const char *symbol); |
---|
816 | extern cudaError_t cudaGetSymbolSize(size_t *size, const char *symbol); |
---|
817 | |
---|
818 | |
---|
819 | |
---|
820 | |
---|
821 | |
---|
822 | |
---|
823 | |
---|
824 | extern cudaError_t cudaGetDeviceCount(int *count); |
---|
825 | extern cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device); |
---|
826 | extern cudaError_t cudaChooseDevice(int *device, const struct cudaDeviceProp *prop); |
---|
827 | extern cudaError_t cudaSetDevice(int device); |
---|
828 | extern cudaError_t cudaGetDevice(int *device); |
---|
829 | |
---|
830 | |
---|
831 | |
---|
832 | |
---|
833 | |
---|
834 | |
---|
835 | |
---|
836 | extern cudaError_t cudaBindTexture(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t size ); |
---|
837 | extern cudaError_t cudaBindTextureToArray(const struct textureReference *texref, const struct cudaArray *array, const struct cudaChannelFormatDesc *desc); |
---|
838 | extern cudaError_t cudaUnbindTexture(const struct textureReference *texref); |
---|
839 | extern cudaError_t cudaGetTextureAlignmentOffset(size_t *offset, const struct textureReference *texref); |
---|
840 | extern cudaError_t cudaGetTextureReference(const struct textureReference **texref, const char *symbol); |
---|
841 | |
---|
842 | |
---|
843 | |
---|
844 | |
---|
845 | |
---|
846 | |
---|
847 | |
---|
848 | extern cudaError_t cudaGetChannelDesc(struct cudaChannelFormatDesc *desc, const struct cudaArray *array); |
---|
849 | extern struct cudaChannelFormatDesc cudaCreateChannelDesc(int x, int y, int z, int w, enum cudaChannelFormatKind f); |
---|
850 | |
---|
851 | |
---|
852 | |
---|
853 | |
---|
854 | |
---|
855 | |
---|
856 | |
---|
857 | extern cudaError_t cudaGetLastError(void); |
---|
858 | extern const char* cudaGetErrorString(cudaError_t error); |
---|
859 | |
---|
860 | |
---|
861 | |
---|
862 | |
---|
863 | |
---|
864 | |
---|
865 | |
---|
866 | extern cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem , cudaStream_t stream ); |
---|
867 | extern cudaError_t cudaSetupArgument(const void *arg, size_t size, size_t offset); |
---|
868 | extern cudaError_t cudaLaunch(const char *symbol); |
---|
869 | |
---|
870 | |
---|
871 | |
---|
872 | |
---|
873 | |
---|
874 | |
---|
875 | |
---|
876 | extern cudaError_t cudaStreamCreate(cudaStream_t *stream); |
---|
877 | extern cudaError_t cudaStreamDestroy(cudaStream_t stream); |
---|
878 | extern cudaError_t cudaStreamSynchronize(cudaStream_t stream); |
---|
879 | extern cudaError_t cudaStreamQuery(cudaStream_t stream); |
---|
880 | |
---|
881 | |
---|
882 | |
---|
883 | |
---|
884 | |
---|
885 | |
---|
886 | |
---|
887 | extern cudaError_t cudaEventCreate(cudaEvent_t *event); |
---|
888 | extern cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream); |
---|
889 | extern cudaError_t cudaEventQuery(cudaEvent_t event); |
---|
890 | extern cudaError_t cudaEventSynchronize(cudaEvent_t event); |
---|
891 | extern cudaError_t cudaEventDestroy(cudaEvent_t event); |
---|
892 | extern cudaError_t cudaEventElapsedTime(float *ms, cudaEvent_t start, cudaEvent_t end); |
---|
893 | |
---|
894 | |
---|
895 | |
---|
896 | |
---|
897 | |
---|
898 | |
---|
899 | |
---|
900 | extern cudaError_t cudaThreadExit(void); |
---|
901 | extern cudaError_t cudaThreadSynchronize(void); |
---|
902 | # 61 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 3 |
---|
903 | # 1 "/usr/local/cuda/bin/../include/crt/storage_class.h" 1 3 |
---|
904 | # 62 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 3 |
---|
905 | # 216 "/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include/stddef.h" 2 3 |
---|
906 | # 148 "/usr/include/bits/types.h" 3 |
---|
907 | typedef long __clock_t; |
---|
908 | # 61 "/usr/include/time.h" 3 |
---|
909 | typedef __clock_t clock_t; |
---|
910 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
911 | struct _Z7textureIcLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
912 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
913 | struct _Z7textureIaLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
914 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
915 | struct _Z7textureIhLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
916 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
917 | struct _Z7textureI5char1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
918 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
919 | struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
920 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
921 | struct _Z7textureI5char2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
922 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
923 | struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
924 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
925 | struct _Z7textureI5char3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
926 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
927 | struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
928 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
929 | struct _Z7textureI5char4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
930 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
931 | struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
932 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
933 | struct _Z7textureIsLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
934 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
935 | struct _Z7textureItLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
936 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
937 | struct _Z7textureI6short1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
938 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
939 | struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
940 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
941 | struct _Z7textureI6short2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
942 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
943 | struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
944 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
945 | struct _Z7textureI6short3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
946 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
947 | struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
948 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
949 | struct _Z7textureI6short4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
950 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
951 | struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
952 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
953 | struct _Z7textureIiLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
954 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
955 | struct _Z7textureIjLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
956 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
957 | struct _Z7textureI4int1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
958 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
959 | struct _Z7textureI5uint1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
960 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
961 | struct _Z7textureI4int2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
962 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
963 | struct _Z7textureI5uint2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
964 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
965 | struct _Z7textureI4int3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
966 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
967 | struct _Z7textureI5uint3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
968 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
969 | struct _Z7textureI4int4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
970 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
971 | struct _Z7textureI5uint4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
972 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
973 | struct _Z7textureIcLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
974 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
975 | struct _Z7textureIaLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
976 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
977 | struct _Z7textureIhLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
978 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
979 | struct _Z7textureI5char1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
980 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
981 | struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
982 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
983 | struct _Z7textureI5char2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
984 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
985 | struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
986 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
987 | struct _Z7textureI5char3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
988 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
989 | struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
990 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
991 | struct _Z7textureI5char4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
992 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
993 | struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
994 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
995 | struct _Z7textureIsLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
996 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
997 | struct _Z7textureItLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
998 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
999 | struct _Z7textureI6short1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1000 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1001 | struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1002 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1003 | struct _Z7textureI6short2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1004 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1005 | struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1006 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1007 | struct _Z7textureI6short3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1008 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1009 | struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1010 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1011 | struct _Z7textureI6short4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1012 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1013 | struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1014 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1015 | struct _Z7textureIcLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1016 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1017 | struct _Z7textureIaLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1018 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1019 | struct _Z7textureIhLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1020 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1021 | struct _Z7textureI5char1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1022 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1023 | struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1024 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1025 | struct _Z7textureI5char2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1026 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1027 | struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1028 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1029 | struct _Z7textureI5char3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1030 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1031 | struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1032 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1033 | struct _Z7textureI5char4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1034 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1035 | struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1036 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1037 | struct _Z7textureIsLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1038 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1039 | struct _Z7textureItLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1040 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1041 | struct _Z7textureI6short1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1042 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1043 | struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1044 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1045 | struct _Z7textureI6short2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1046 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1047 | struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1048 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1049 | struct _Z7textureI6short3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1050 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1051 | struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1052 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1053 | struct _Z7textureI6short4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1054 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1055 | struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1056 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1057 | struct _Z7textureIiLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1058 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1059 | struct _Z7textureIjLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1060 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1061 | struct _Z7textureI4int1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1062 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1063 | struct _Z7textureI5uint1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1064 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1065 | struct _Z7textureI4int2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1066 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1067 | struct _Z7textureI5uint2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1068 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1069 | struct _Z7textureI4int3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1070 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1071 | struct _Z7textureI5uint3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1072 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1073 | struct _Z7textureI4int4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1074 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1075 | struct _Z7textureI5uint4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;}; |
---|
1076 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1077 | struct _Z7textureIiLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1078 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1079 | struct _Z7textureIjLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1080 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1081 | struct _Z7textureI4int1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1082 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1083 | struct _Z7textureI5uint1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1084 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1085 | struct _Z7textureI4int2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1086 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1087 | struct _Z7textureI5uint2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1088 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1089 | struct _Z7textureI4int3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1090 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1091 | struct _Z7textureI5uint3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1092 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1093 | struct _Z7textureI4int4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1094 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1095 | struct _Z7textureI5uint4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1096 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1097 | struct _Z7textureIcLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1098 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1099 | struct _Z7textureIaLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1100 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1101 | struct _Z7textureIhLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1102 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1103 | struct _Z7textureI5char1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1104 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1105 | struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1106 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1107 | struct _Z7textureI5char2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1108 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1109 | struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1110 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1111 | struct _Z7textureI5char3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1112 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1113 | struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1114 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1115 | struct _Z7textureI5char4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1116 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1117 | struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1118 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1119 | struct _Z7textureIsLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1120 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1121 | struct _Z7textureItLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1122 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1123 | struct _Z7textureI6short1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1124 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1125 | struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1126 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1127 | struct _Z7textureI6short2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1128 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1129 | struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1130 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1131 | struct _Z7textureI6short3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1132 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1133 | struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1134 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1135 | struct _Z7textureI6short4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1136 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1137 | struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1138 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1139 | struct _Z7textureIiLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1140 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1141 | struct _Z7textureIjLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1142 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1143 | struct _Z7textureI4int1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1144 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1145 | struct _Z7textureI5uint1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1146 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1147 | struct _Z7textureI4int2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1148 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1149 | struct _Z7textureI5uint2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1150 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1151 | struct _Z7textureI4int3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1152 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1153 | struct _Z7textureI5uint3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1154 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1155 | struct _Z7textureI4int4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1156 | # 115 "/usr/local/cuda/bin/../include/texture_types.h" |
---|
1157 | struct _Z7textureI5uint4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;}; |
---|
1158 | # 46 "/usr/include/stdio.h" 3 |
---|
1159 | typedef struct _IO_FILE FILE; |
---|
1160 | void *memcpy(void*, const void*, size_t); void *memset(void*, int, size_t); |
---|
1161 | # 82 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" |
---|
1162 | extern cudaError_t cudaMalloc(void **, size_t); |
---|
1163 | |
---|
1164 | |
---|
1165 | |
---|
1166 | extern cudaError_t cudaFree(void *); |
---|
1167 | # 97 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" |
---|
1168 | extern cudaError_t cudaMemcpy(void *, const void *, size_t, enum cudaMemcpyKind); |
---|
1169 | # 145 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" |
---|
1170 | extern cudaError_t cudaGetDeviceCount(int *); |
---|
1171 | extern cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp *, int); |
---|
1172 | |
---|
1173 | extern cudaError_t cudaSetDevice(int); |
---|
1174 | # 187 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" |
---|
1175 | extern cudaError_t cudaConfigureCall(dim3, dim3, size_t, cudaStream_t); |
---|
1176 | # 222 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" |
---|
1177 | extern cudaError_t cudaThreadSynchronize(void); |
---|
1178 | # 184 "/usr/include/time.h" 3 |
---|
1179 | extern __attribute__((__weak__)) clock_t clock(void); |
---|
1180 | # 58 "/usr/local/cuda/bin/../include/common_functions.h" |
---|
1181 | extern __attribute__((__weak__)) void *memset(void *, int, size_t); |
---|
1182 | # 65 "/usr/local/cuda/bin/../include/math_functions.h" |
---|
1183 | extern __attribute__((__weak__)) int abs(int) __attribute__((__const__)); |
---|
1184 | |
---|
1185 | extern __attribute__((__weak__)) long labs(long) __attribute__((__const__)); |
---|
1186 | |
---|
1187 | extern __attribute__((__weak__)) long long llabs(long long) __attribute__((__const__)); |
---|
1188 | |
---|
1189 | extern __attribute__((__weak__)) double fabs(double) __attribute__((__const__)); |
---|
1190 | |
---|
1191 | extern __attribute__((__weak__)) float fabsf(float) __attribute__((__const__)); |
---|
1192 | |
---|
1193 | |
---|
1194 | extern __attribute__((__weak__)) int min(int, int); |
---|
1195 | |
---|
1196 | extern __attribute__((__weak__)) unsigned umin(unsigned, unsigned); |
---|
1197 | |
---|
1198 | extern __attribute__((__weak__)) float fminf(float, float); |
---|
1199 | |
---|
1200 | extern __attribute__((__weak__)) double fmin(double, double); |
---|
1201 | |
---|
1202 | |
---|
1203 | extern __attribute__((__weak__)) int max(int, int); |
---|
1204 | |
---|
1205 | extern __attribute__((__weak__)) unsigned umax(unsigned, unsigned); |
---|
1206 | |
---|
1207 | extern __attribute__((__weak__)) float fmaxf(float, float); |
---|
1208 | |
---|
1209 | extern __attribute__((__weak__)) double fmax(double, double); |
---|
1210 | |
---|
1211 | |
---|
1212 | extern __attribute__((__weak__)) double sin(double); |
---|
1213 | |
---|
1214 | extern __attribute__((__weak__)) float sinf(float); |
---|
1215 | |
---|
1216 | |
---|
1217 | extern __attribute__((__weak__)) double cos(double); |
---|
1218 | |
---|
1219 | extern __attribute__((__weak__)) float cosf(float); |
---|
1220 | |
---|
1221 | |
---|
1222 | extern __attribute__((__weak__)) void sincos(double, double *, double *); |
---|
1223 | |
---|
1224 | extern __attribute__((__weak__)) void sincosf(float, float *, float *); |
---|
1225 | |
---|
1226 | |
---|
1227 | extern __attribute__((__weak__)) double tan(double); |
---|
1228 | |
---|
1229 | extern __attribute__((__weak__)) float tanf(float); |
---|
1230 | |
---|
1231 | |
---|
1232 | extern __attribute__((__weak__)) double sqrt(double); |
---|
1233 | |
---|
1234 | extern __attribute__((__weak__)) float sqrtf(float); |
---|
1235 | |
---|
1236 | |
---|
1237 | extern __attribute__((__weak__)) double rsqrt(double); |
---|
1238 | |
---|
1239 | extern __attribute__((__weak__)) float rsqrtf(float); |
---|
1240 | |
---|
1241 | |
---|
1242 | extern __attribute__((__weak__)) double exp2(double); |
---|
1243 | |
---|
1244 | extern __attribute__((__weak__)) float exp2f(float); |
---|
1245 | |
---|
1246 | |
---|
1247 | extern __attribute__((__weak__)) double exp10(double); |
---|
1248 | |
---|
1249 | extern __attribute__((__weak__)) float exp10f(float); |
---|
1250 | |
---|
1251 | |
---|
1252 | extern __attribute__((__weak__)) double expm1(double); |
---|
1253 | |
---|
1254 | extern __attribute__((__weak__)) float expm1f(float); |
---|
1255 | |
---|
1256 | |
---|
1257 | extern __attribute__((__weak__)) double log2(double); |
---|
1258 | |
---|
1259 | extern __attribute__((__weak__)) float log2f(float); |
---|
1260 | |
---|
1261 | |
---|
1262 | extern __attribute__((__weak__)) double log10(double); |
---|
1263 | |
---|
1264 | extern __attribute__((__weak__)) float log10f(float); |
---|
1265 | |
---|
1266 | |
---|
1267 | extern __attribute__((__weak__)) double log(double); |
---|
1268 | |
---|
1269 | extern __attribute__((__weak__)) float logf(float); |
---|
1270 | |
---|
1271 | |
---|
1272 | extern __attribute__((__weak__)) double log1p(double); |
---|
1273 | |
---|
1274 | extern __attribute__((__weak__)) float log1pf(float); |
---|
1275 | |
---|
1276 | |
---|
1277 | extern __attribute__((__weak__)) double floor(double) __attribute__((__const__)); |
---|
1278 | |
---|
1279 | extern __attribute__((__weak__)) float floorf(float) __attribute__((__const__)); |
---|
1280 | |
---|
1281 | |
---|
1282 | extern __attribute__((__weak__)) double exp(double); |
---|
1283 | |
---|
1284 | extern __attribute__((__weak__)) float expf(float); |
---|
1285 | |
---|
1286 | |
---|
1287 | extern __attribute__((__weak__)) double cosh(double); |
---|
1288 | |
---|
1289 | extern __attribute__((__weak__)) float coshf(float); |
---|
1290 | |
---|
1291 | |
---|
1292 | extern __attribute__((__weak__)) double sinh(double); |
---|
1293 | |
---|
1294 | extern __attribute__((__weak__)) float sinhf(float); |
---|
1295 | |
---|
1296 | |
---|
1297 | extern __attribute__((__weak__)) double tanh(double); |
---|
1298 | |
---|
1299 | extern __attribute__((__weak__)) float tanhf(float); |
---|
1300 | |
---|
1301 | |
---|
1302 | extern __attribute__((__weak__)) double acosh(double); |
---|
1303 | |
---|
1304 | extern __attribute__((__weak__)) float acoshf(float); |
---|
1305 | |
---|
1306 | |
---|
1307 | extern __attribute__((__weak__)) double asinh(double); |
---|
1308 | |
---|
1309 | extern __attribute__((__weak__)) float asinhf(float); |
---|
1310 | |
---|
1311 | |
---|
1312 | extern __attribute__((__weak__)) double atanh(double); |
---|
1313 | |
---|
1314 | extern __attribute__((__weak__)) float atanhf(float); |
---|
1315 | |
---|
1316 | |
---|
1317 | extern __attribute__((__weak__)) double ldexp(double, int); |
---|
1318 | |
---|
1319 | extern __attribute__((__weak__)) float ldexpf(float, int); |
---|
1320 | |
---|
1321 | |
---|
1322 | extern __attribute__((__weak__)) double logb(double); |
---|
1323 | |
---|
1324 | extern __attribute__((__weak__)) float logbf(float); |
---|
1325 | |
---|
1326 | |
---|
1327 | extern __attribute__((__weak__)) int ilogb(double); |
---|
1328 | |
---|
1329 | extern __attribute__((__weak__)) int ilogbf(float); |
---|
1330 | |
---|
1331 | |
---|
1332 | extern __attribute__((__weak__)) double scalbn(double, int); |
---|
1333 | |
---|
1334 | extern __attribute__((__weak__)) float scalbnf(float, int); |
---|
1335 | |
---|
1336 | |
---|
1337 | extern __attribute__((__weak__)) double scalbln(double, long); |
---|
1338 | |
---|
1339 | extern __attribute__((__weak__)) float scalblnf(float, long); |
---|
1340 | |
---|
1341 | |
---|
1342 | extern __attribute__((__weak__)) double frexp(double, int *); |
---|
1343 | |
---|
1344 | extern __attribute__((__weak__)) float frexpf(float, int *); |
---|
1345 | |
---|
1346 | |
---|
1347 | extern __attribute__((__weak__)) double round(double) __attribute__((__const__)); |
---|
1348 | |
---|
1349 | extern __attribute__((__weak__)) float roundf(float) __attribute__((__const__)); |
---|
1350 | |
---|
1351 | |
---|
1352 | extern __attribute__((__weak__)) long lround(double); |
---|
1353 | |
---|
1354 | extern __attribute__((__weak__)) long lroundf(float); |
---|
1355 | |
---|
1356 | |
---|
1357 | extern __attribute__((__weak__)) long long llround(double); |
---|
1358 | |
---|
1359 | extern __attribute__((__weak__)) long long llroundf(float); |
---|
1360 | |
---|
1361 | |
---|
1362 | extern __attribute__((__weak__)) double rint(double); |
---|
1363 | |
---|
1364 | extern __attribute__((__weak__)) float rintf(float); |
---|
1365 | |
---|
1366 | |
---|
1367 | extern __attribute__((__weak__)) long lrint(double); |
---|
1368 | |
---|
1369 | extern __attribute__((__weak__)) long lrintf(float); |
---|
1370 | |
---|
1371 | |
---|
1372 | extern __attribute__((__weak__)) long long llrint(double); |
---|
1373 | |
---|
1374 | extern __attribute__((__weak__)) long long llrintf(float); |
---|
1375 | |
---|
1376 | |
---|
1377 | extern __attribute__((__weak__)) double nearbyint(double); |
---|
1378 | |
---|
1379 | extern __attribute__((__weak__)) float nearbyintf(float); |
---|
1380 | |
---|
1381 | |
---|
1382 | extern __attribute__((__weak__)) double ceil(double) __attribute__((__const__)); |
---|
1383 | |
---|
1384 | extern __attribute__((__weak__)) float ceilf(float) __attribute__((__const__)); |
---|
1385 | |
---|
1386 | |
---|
1387 | extern __attribute__((__weak__)) double trunc(double) __attribute__((__const__)); |
---|
1388 | |
---|
1389 | extern __attribute__((__weak__)) float truncf(float) __attribute__((__const__)); |
---|
1390 | |
---|
1391 | |
---|
1392 | extern __attribute__((__weak__)) double fdim(double, double); |
---|
1393 | |
---|
1394 | extern __attribute__((__weak__)) float fdimf(float, float); |
---|
1395 | |
---|
1396 | |
---|
1397 | extern __attribute__((__weak__)) double atan2(double, double); |
---|
1398 | |
---|
1399 | extern __attribute__((__weak__)) float atan2f(float, float); |
---|
1400 | |
---|
1401 | |
---|
1402 | extern __attribute__((__weak__)) double atan(double); |
---|
1403 | |
---|
1404 | extern __attribute__((__weak__)) float atanf(float); |
---|
1405 | |
---|
1406 | |
---|
1407 | extern __attribute__((__weak__)) double asin(double); |
---|
1408 | |
---|
1409 | extern __attribute__((__weak__)) float asinf(float); |
---|
1410 | |
---|
1411 | |
---|
1412 | extern __attribute__((__weak__)) double acos(double); |
---|
1413 | |
---|
1414 | extern __attribute__((__weak__)) float acosf(float); |
---|
1415 | |
---|
1416 | |
---|
1417 | extern __attribute__((__weak__)) double hypot(double, double); |
---|
1418 | |
---|
1419 | extern __attribute__((__weak__)) float hypotf(float, float); |
---|
1420 | |
---|
1421 | |
---|
1422 | extern __attribute__((__weak__)) double cbrt(double); |
---|
1423 | |
---|
1424 | extern __attribute__((__weak__)) float cbrtf(float); |
---|
1425 | |
---|
1426 | |
---|
1427 | extern __attribute__((__weak__)) double pow(double, double); |
---|
1428 | |
---|
1429 | extern __attribute__((__weak__)) float powf(float, float); |
---|
1430 | |
---|
1431 | |
---|
1432 | extern __attribute__((__weak__)) double modf(double, double *); |
---|
1433 | |
---|
1434 | extern __attribute__((__weak__)) float modff(float, float *); |
---|
1435 | |
---|
1436 | |
---|
1437 | extern __attribute__((__weak__)) double fmod(double, double); |
---|
1438 | |
---|
1439 | extern __attribute__((__weak__)) float fmodf(float, float); |
---|
1440 | |
---|
1441 | |
---|
1442 | extern __attribute__((__weak__)) double remainder(double, double); |
---|
1443 | |
---|
1444 | extern __attribute__((__weak__)) float remainderf(float, float); |
---|
1445 | |
---|
1446 | |
---|
1447 | extern __attribute__((__weak__)) double remquo(double, double, int *); |
---|
1448 | |
---|
1449 | extern __attribute__((__weak__)) float remquof(float, float, int *); |
---|
1450 | |
---|
1451 | |
---|
1452 | extern __attribute__((__weak__)) double erf(double); |
---|
1453 | |
---|
1454 | extern __attribute__((__weak__)) float erff(float); |
---|
1455 | |
---|
1456 | |
---|
1457 | extern __attribute__((__weak__)) double erfc(double); |
---|
1458 | |
---|
1459 | extern __attribute__((__weak__)) float erfcf(float); |
---|
1460 | |
---|
1461 | |
---|
1462 | extern __attribute__((__weak__)) double lgamma(double); |
---|
1463 | |
---|
1464 | extern __attribute__((__weak__)) float lgammaf(float); |
---|
1465 | |
---|
1466 | |
---|
1467 | extern __attribute__((__weak__)) double tgamma(double); |
---|
1468 | |
---|
1469 | extern __attribute__((__weak__)) float tgammaf(float); |
---|
1470 | |
---|
1471 | |
---|
1472 | extern __attribute__((__weak__)) double copysign(double, double) __attribute__((__const__)); |
---|
1473 | |
---|
1474 | extern __attribute__((__weak__)) float copysignf(float, float) __attribute__((__const__)); |
---|
1475 | |
---|
1476 | |
---|
1477 | extern __attribute__((__weak__)) double nextafter(double, double) __attribute__((__const__)); |
---|
1478 | |
---|
1479 | extern __attribute__((__weak__)) float nextafterf(float, float) __attribute__((__const__)); |
---|
1480 | |
---|
1481 | |
---|
1482 | extern __attribute__((__weak__)) double nan(const char *) __attribute__((__const__)); |
---|
1483 | |
---|
1484 | extern __attribute__((__weak__)) float nanf(const char *) __attribute__((__const__)); |
---|
1485 | |
---|
1486 | |
---|
1487 | extern __attribute__((__weak__)) int __signbit(double) __attribute__((__const__)); |
---|
1488 | |
---|
1489 | extern __attribute__((__weak__)) int __signbitf(float) __attribute__((__const__)); |
---|
1490 | |
---|
1491 | |
---|
1492 | extern __attribute__((__weak__)) int __isinf(double) __attribute__((__const__)); |
---|
1493 | |
---|
1494 | extern __attribute__((__weak__)) int __isinff(float) __attribute__((__const__)); |
---|
1495 | |
---|
1496 | |
---|
1497 | extern __attribute__((__weak__)) int __isnan(double) __attribute__((__const__)); |
---|
1498 | |
---|
1499 | extern __attribute__((__weak__)) int __isnanf(float) __attribute__((__const__)); |
---|
1500 | |
---|
1501 | |
---|
1502 | extern __attribute__((__weak__)) int __finite(double) __attribute__((__const__)); |
---|
1503 | |
---|
1504 | extern __attribute__((__weak__)) int __finitef(float) __attribute__((__const__)); |
---|
1505 | |
---|
1506 | |
---|
1507 | extern __attribute__((__weak__)) double fma(double, double, double); |
---|
1508 | |
---|
1509 | extern __attribute__((__weak__)) float fmaf(float, float, float); |
---|
1510 | # 193 "/usr/include/bits/mathcalls.h" 3 |
---|
1511 | extern __attribute__((__weak__)) int __isinfl(long double) __attribute__((__const__)); |
---|
1512 | |
---|
1513 | |
---|
1514 | extern __attribute__((__weak__)) int __finitel(long double) __attribute__((__const__)); |
---|
1515 | # 231 "/usr/include/bits/mathcalls.h" 3 |
---|
1516 | extern __attribute__((__weak__)) int __isnanl(long double) __attribute__((__const__)); |
---|
1517 | # 350 "/usr/include/bits/mathcalls.h" 3 |
---|
1518 | extern __attribute__((__weak__)) int __signbitl(long double) __attribute__((__const__)); |
---|
1519 | # 589 "/usr/include/stdlib.h" 3 |
---|
1520 | extern void *malloc(size_t) __attribute__((__malloc__)); |
---|
1521 | # 327 "/usr/include/stdio.h" 3 |
---|
1522 | extern int fprintf(FILE *, const char *, ...); |
---|
1523 | # 113 "y.cu" |
---|
1524 | extern int rsl_internal_microclock_(void); |
---|
1525 | # 135 "y.cu" |
---|
1526 | extern int gethostname(char *, size_t); |
---|
1527 | # 142 "y.cu" |
---|
1528 | extern int wsm5_gpu_init_(int *, int *, int *); |
---|
1529 | # 199 "y.cu" |
---|
1530 | extern int wsm5_host_(float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *); |
---|
1531 | # 470 "y.cu" |
---|
1532 | extern int get_wsm5_gpu_levels_(int *); |
---|
1533 | extern void __sti___29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) __attribute__((__constructor__)); |
---|
1534 | # 144 "/usr/include/stdio.h" 3 |
---|
1535 | extern struct _IO_FILE *stderr; |
---|
1536 | # 1 "/tmp/tmpxft_00001ecc_00000000-0.stub.h" 1 3 |
---|
1537 | |
---|
1538 | |
---|
1539 | |
---|
1540 | |
---|
1541 | extern void __device_stub__Z8wsm5_gpuPfS_S_S_S_S_S_S_S_S_S_S_S_S_S_fS_iiiiiiiiiiiiiiiiii(float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float, float *, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); |
---|
1542 | # 146 "/usr/include/stdio.h" 2 3 |
---|
1543 | # 142 "y.cu" |
---|
1544 | int wsm5_gpu_init_( int *myproc, int *nproc, int *mydevice) |
---|
1545 | { |
---|
1546 | auto float x; |
---|
1547 | # 144 "y.cu" |
---|
1548 | auto float *x_d; |
---|
1549 | auto int s; |
---|
1550 | # 145 "y.cu" |
---|
1551 | auto int e; |
---|
1552 | auto int i; |
---|
1553 | # 146 "y.cu" |
---|
1554 | auto int dc; |
---|
1555 | auto cudaError_t cerr; |
---|
1556 | auto char hostname[64]; |
---|
1557 | auto struct cudaDeviceProp dp; |
---|
1558 | |
---|
1559 | cudaGetDeviceCount((&dc)); |
---|
1560 | if (dc > 4) |
---|
1561 | { fprintf(stderr, "warning: more than %d devices on node (%d)\n", 4, dc); dc = 4; } |
---|
1562 | fprintf(stderr, "Number of devices on this node: %d\n", dc); |
---|
1563 | |
---|
1564 | |
---|
1565 | |
---|
1566 | i = ((*mydevice)); |
---|
1567 | if (dc > 0) |
---|
1568 | { |
---|
1569 | if ((int)(cerr = (cudaSetDevice(i)))) { |
---|
1570 | fprintf(stderr, " non-zero cerr %d\n", ((int)cerr)); |
---|
1571 | } |
---|
1572 | } |
---|
1573 | gethostname(((char *)hostname), 64UL); |
---|
1574 | fprintf(stderr, "Setting device %02d for task %03d on host %s\n", i, ((*myproc)), ((char *)hostname)); |
---|
1575 | |
---|
1576 | if ((int)(cerr = (cudaGetDeviceProperties((&dp), i)))) { |
---|
1577 | fprintf(stderr, "Device %02d: cerr = %d\n", ((int)cerr)); |
---|
1578 | } else { |
---|
1579 | fprintf(stderr, "Device %02d: name %s\n", i, ((char *)(&dp.name))); |
---|
1580 | fprintf(stderr, "Device %02d: mem %d\n", i, ((dp.totalGlobalMem))); |
---|
1581 | fprintf(stderr, "Device %02d: smem %d\n", i, ((dp.sharedMemPerBlock))); |
---|
1582 | fprintf(stderr, "Device %02d: nreg %d\n", i, ((dp.regsPerBlock))); |
---|
1583 | fprintf(stderr, "Device %02d: warp %d\n", i, ((dp.warpSize))); |
---|
1584 | fprintf(stderr, "Device %02d: pitch %d\n", i, ((dp.memPitch))); |
---|
1585 | fprintf(stderr, "Device %02d: maxthrds %d\n", i, ((dp.maxThreadsPerBlock))); |
---|
1586 | fprintf(stderr, "Device %02d: maxtdim %d %d %d\n", i, (((int *)(&dp.maxThreadsDim))[0]), (((int *)(&dp.maxThreadsDim))[1]), (((int *)(&dp.maxThreadsDim))[2])); |
---|
1587 | |
---|
1588 | |
---|
1589 | fprintf(stderr, "Device %02d: maxgdim %d %d %d\n", i, (((int *)(&dp.maxGridSize))[0]), (((int *)(&dp.maxGridSize))[1]), (((int *)(&dp.maxGridSize))[2])); |
---|
1590 | |
---|
1591 | |
---|
1592 | fprintf(stderr, "Device %02d: clock %d\n", i, ((dp.clockRate))); |
---|
1593 | fprintf(stderr, "Device %02d: talign %d\n", i, ((dp.textureAlignment))); |
---|
1594 | } |
---|
1595 | |
---|
1596 | |
---|
1597 | s = (rsl_internal_microclock_()); |
---|
1598 | cudaMalloc(((void **)(&x_d)), 4UL); |
---|
1599 | cudaMemcpy(((void *)x_d), ((const void *)(&x)), 4UL, cudaMemcpyHostToDevice); |
---|
1600 | cudaFree(((void *)x_d)); |
---|
1601 | e = (rsl_internal_microclock_()); |
---|
1602 | fprintf(stderr, "wsm5_init: %d\n", (e - s)); |
---|
1603 | return 0; |
---|
1604 | } |
---|
1605 | |
---|
1606 | |
---|
1607 | int wsm5_host_( |
---|
1608 | float *th, float *pii, |
---|
1609 | float *q, |
---|
1610 | float *qc, float *qi, float *qr, float *qs, |
---|
1611 | float *den, float *p, float *delz, |
---|
1612 | |
---|
1613 | |
---|
1614 | |
---|
1615 | float *delt, |
---|
1616 | float *rain, float *rainncv, |
---|
1617 | float *sr, |
---|
1618 | float *snow, float *snowncv, |
---|
1619 | int *ids, int *ide, int *jds, int *jde, int *kds, int *kde, |
---|
1620 | int *ims, int *ime, int *jms, int *jme, int *kms, int *kme, |
---|
1621 | int *ips, int *ipe, int *jps, int *jpe, int *kps, int *kpe) |
---|
1622 | |
---|
1623 | { auto unsigned __T20; |
---|
1624 | auto unsigned __T21; |
---|
1625 | auto float *bigbuf; |
---|
1626 | auto int s; |
---|
1627 | # 218 "y.cu" |
---|
1628 | auto int e; |
---|
1629 | # 218 "y.cu" |
---|
1630 | auto int s2; |
---|
1631 | # 218 "y.cu" |
---|
1632 | auto int e2; |
---|
1633 | auto int d3; |
---|
1634 | auto int d2; |
---|
1635 | # 229 "y.cu" |
---|
1636 | auto int dips; |
---|
1637 | # 229 "y.cu" |
---|
1638 | auto int dipe; |
---|
1639 | |
---|
1640 | auto int djps; |
---|
1641 | # 231 "y.cu" |
---|
1642 | auto int djpe; |
---|
1643 | auto int dkps; |
---|
1644 | # 232 "y.cu" |
---|
1645 | auto int dkpe; |
---|
1646 | # 242 "y.cu" |
---|
1647 | auto float *th_d; |
---|
1648 | auto float *pii_d; |
---|
1649 | auto float *q_d; |
---|
1650 | auto float *qc_d; |
---|
1651 | auto float *qi_d; |
---|
1652 | auto float *qr_d; |
---|
1653 | auto float *qs_d; |
---|
1654 | auto float *den_d; |
---|
1655 | auto float *p_d; |
---|
1656 | auto float *delz_d; |
---|
1657 | |
---|
1658 | |
---|
1659 | |
---|
1660 | auto float *rain_d; |
---|
1661 | auto float *rainncv_d; |
---|
1662 | auto float *sr_d; |
---|
1663 | auto float *snow_d; |
---|
1664 | auto float *snowncv_d; |
---|
1665 | auto float retvals[100]; |
---|
1666 | |
---|
1667 | |
---|
1668 | |
---|
1669 | auto float *retvals_d; |
---|
1670 | |
---|
1671 | auto int remx; |
---|
1672 | # 266 "y.cu" |
---|
1673 | auto int remy; |
---|
1674 | |
---|
1675 | |
---|
1676 | |
---|
1677 | |
---|
1678 | auto dim3 dimBlock; |
---|
1679 | |
---|
1680 | auto dim3 dimGrid; |
---|
1681 | # 219 "y.cu" |
---|
1682 | d3 = ((((((*ime)) - ((*ims))) + 1) * ((((*jme)) - ((*jms))) + 1)) * ((((*kme)) - ((*kms))) + 1)); |
---|
1683 | d2 = (((((*ime)) - ((*ims))) + 1) * ((((*jme)) - ((*jms))) + 1)); |
---|
1684 | # 229 "y.cu" |
---|
1685 | dips = 0; dipe = ((((*ipe)) - ((*ips))) + 1); |
---|
1686 | |
---|
1687 | djps = 0; djpe = ((((*jpe)) - ((*jps))) + 1); |
---|
1688 | dkps = 0; dkpe = ((((*kpe)) - ((*kps))) + 1); |
---|
1689 | |
---|
1690 | bigbuf = ((float *)(malloc((((unsigned long)((dipe * djpe) * dkpe)) * 4UL)))); |
---|
1691 | # 241 "y.cu" |
---|
1692 | s = (rsl_internal_microclock_()); |
---|
1693 | cudaMalloc(((void **)(&th_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)th_d), ((const void *)th), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1694 | cudaMalloc(((void **)(&pii_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)pii_d), ((const void *)pii), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1695 | cudaMalloc(((void **)(&q_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)q_d), ((const void *)q), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1696 | cudaMalloc(((void **)(&qc_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qc_d), ((const void *)qc), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1697 | cudaMalloc(((void **)(&qi_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qi_d), ((const void *)qi), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1698 | cudaMalloc(((void **)(&qr_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qr_d), ((const void *)qr), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1699 | cudaMalloc(((void **)(&qs_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qs_d), ((const void *)qs), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1700 | cudaMalloc(((void **)(&den_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)den_d), ((const void *)den), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1701 | cudaMalloc(((void **)(&p_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)p_d), ((const void *)p), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1702 | cudaMalloc(((void **)(&delz_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)delz_d), ((const void *)delz), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice); |
---|
1703 | |
---|
1704 | |
---|
1705 | |
---|
1706 | cudaMalloc(((void **)(&rain_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)rain_d), ((const void *)rain), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice); |
---|
1707 | cudaMalloc(((void **)(&rainncv_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)rainncv_d), ((const void *)rainncv), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice); |
---|
1708 | cudaMalloc(((void **)(&sr_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)sr_d), ((const void *)sr), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice); |
---|
1709 | cudaMalloc(((void **)(&snow_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)snow_d), ((const void *)snow), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice); |
---|
1710 | cudaMalloc(((void **)(&snowncv_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)snowncv_d), ((const void *)snowncv), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice); |
---|
1711 | |
---|
1712 | { auto int k; |
---|
1713 | for (k = 0; (k < ((((*kme)) - ((*kms))) + 1)); k++) { (((float *)retvals)[k]) = (0.0F); } |
---|
1714 | } |
---|
1715 | cudaMalloc(((void **)(&retvals_d)), (((unsigned long)((((*kme)) - ((*kms))) + 1)) * 4UL)); cudaMemcpy(((void *)retvals_d), ((const void *)((float *)retvals)), (((unsigned long)((((*kme)) - ((*kms))) + 1)) * 4UL), cudaMemcpyHostToDevice); |
---|
1716 | |
---|
1717 | |
---|
1718 | |
---|
1719 | remx = ((((((*ipe)) - ((*ips))) + 1) % 16) ? 1 : 0); |
---|
1720 | remy = ((((((*jpe)) - ((*jps))) + 1) % 8) ? 1 : 0); |
---|
1721 | |
---|
1722 | { (dimBlock.x) = 16U; (dimBlock.y) = 8U; (dimBlock.z) = 1U; } |
---|
1723 | |
---|
1724 | { __T20 = ((unsigned)((((((*ipe)) - ((*ips))) + 1) / 16) + remx)); __T21 = ((unsigned)((((((*jpe)) - ((*jps))) + 1) / 8) + remy)); { (dimGrid.x) = __T20; (dimGrid.y) = __T21; (dimGrid.z) = 1U; } } |
---|
1725 | |
---|
1726 | fprintf(stderr, "Call to wsm5_gpu: block dims %d %d\n", ((dimBlock.x)), ((dimBlock.y))); |
---|
1727 | fprintf(stderr, "Call to wsm5_gpu: grid dims %d %d\n", ((dimGrid.x)), ((dimGrid.y))); |
---|
1728 | # 284 "y.cu" |
---|
1729 | s2 = (rsl_internal_microclock_()); |
---|
1730 | ((int)(cudaConfigureCall(dimGrid, dimBlock, 0UL, 0))) ? ((void)0) : (__device_stub__Z8wsm5_gpuPfS_S_S_S_S_S_S_S_S_S_S_S_S_S_fS_iiiiiiiiiiiiiiiiii(th_d, pii_d, q_d, qc_d, qi_d, qr_d, qs_d, den_d, p_d, delz_d, rain_d, rainncv_d, sr_d, snow_d, snowncv_d, ((*delt)), retvals_d, (dips + 1), ((((*ipe)) - ((*ips))) + 1), (djps + 1), ((((*jpe)) - ((*jps))) + 1), (dkps + 1), ((((*kpe)) - ((*kps))) + 1), (dips + 1), dipe, (djps + 1), djpe, (dkps + 1), dkpe, (dips + 1), dipe, (djps + 1), djpe, (dkps + 1), dkpe)); |
---|
1731 | # 299 "y.cu" |
---|
1732 | cudaThreadSynchronize(); |
---|
1733 | e2 = (rsl_internal_microclock_()); |
---|
1734 | fprintf(stderr, "Call to wsm5_gpu (not including data xfer): %d microseconds\n", (e2 - s2)); |
---|
1735 | |
---|
1736 | |
---|
1737 | cudaMemcpy(((void *)th), ((const void *)th_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost); |
---|
1738 | cudaMemcpy(((void *)pii), ((const void *)pii_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost); |
---|
1739 | cudaMemcpy(((void *)q), ((const void *)q_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost); |
---|
1740 | cudaMemcpy(((void *)qc), ((const void *)qc_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost); |
---|
1741 | cudaMemcpy(((void *)qi), ((const void *)qi_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost); |
---|
1742 | cudaMemcpy(((void *)qr), ((const void *)qr_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost); |
---|
1743 | cudaMemcpy(((void *)qs), ((const void *)qs_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost); |
---|
1744 | |
---|
1745 | |
---|
1746 | |
---|
1747 | cudaMemcpy(((void *)rain), ((const void *)rain_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost); |
---|
1748 | cudaMemcpy(((void *)rainncv), ((const void *)rainncv_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost); |
---|
1749 | cudaMemcpy(((void *)sr), ((const void *)sr_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost); |
---|
1750 | cudaMemcpy(((void *)snow), ((const void *)snow_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost); |
---|
1751 | cudaMemcpy(((void *)snowncv), ((const void *)snowncv_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost); |
---|
1752 | e = (rsl_internal_microclock_()); |
---|
1753 | |
---|
1754 | cudaMemcpy(((void *)((float *)retvals)), ((const void *)retvals_d), (((unsigned long)((((*kme)) - ((*kms))) + 1)) * 4UL), cudaMemcpyDeviceToHost); |
---|
1755 | fprintf(stderr, "Call to wsm5_gpu (including data xfer): %d microseconds\n", (e - s)); |
---|
1756 | |
---|
1757 | { |
---|
1758 | |
---|
1759 | |
---|
1760 | } |
---|
1761 | |
---|
1762 | cudaFree(((void *)th_d)); |
---|
1763 | cudaFree(((void *)pii_d)); |
---|
1764 | cudaFree(((void *)q_d)); |
---|
1765 | cudaFree(((void *)qc_d)); |
---|
1766 | cudaFree(((void *)qi_d)); |
---|
1767 | cudaFree(((void *)qr_d)); |
---|
1768 | cudaFree(((void *)qs_d)); |
---|
1769 | cudaFree(((void *)den_d)); |
---|
1770 | cudaFree(((void *)p_d)); |
---|
1771 | cudaFree(((void *)delz_d)); |
---|
1772 | |
---|
1773 | |
---|
1774 | |
---|
1775 | cudaFree(((void *)rain_d)); |
---|
1776 | cudaFree(((void *)rainncv_d)); |
---|
1777 | cudaFree(((void *)sr_d)); |
---|
1778 | cudaFree(((void *)snow_d)); |
---|
1779 | cudaFree(((void *)snowncv_d)); |
---|
1780 | cudaFree(((void *)retvals_d)); |
---|
1781 | |
---|
1782 | return 0; |
---|
1783 | } |
---|
1784 | # 470 "y.cu" |
---|
1785 | int get_wsm5_gpu_levels_( int *retval) |
---|
1786 | { |
---|
1787 | (*retval) = 41; |
---|
1788 | } |
---|
1789 | void __sti___29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) { } |
---|
1790 | # 1 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 1 |
---|
1791 | |
---|
1792 | |
---|
1793 | |
---|
1794 | # 1 "/tmp/tmpxft_00001ecc_00000000-1.c" 1 |
---|
1795 | # 1 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" 1 |
---|
1796 | # 97 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" |
---|
1797 | typedef struct { |
---|
1798 | char* gpuProfileName; |
---|
1799 | char* cubin; |
---|
1800 | } __cudaFatCubinEntry; |
---|
1801 | # 113 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" |
---|
1802 | typedef struct { |
---|
1803 | char* gpuProfileName; |
---|
1804 | char* ptx; |
---|
1805 | } __cudaFatPtxEntry; |
---|
1806 | # 125 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" |
---|
1807 | typedef struct { |
---|
1808 | char* gpuProfileName; |
---|
1809 | char* debug; |
---|
1810 | } __cudaFatDebugEntry; |
---|
1811 | |
---|
1812 | |
---|
1813 | typedef enum { |
---|
1814 | __cudaFatDontSearchFlag = (1 << 0), |
---|
1815 | __cudaFatDontCacheFlag = (1 << 1) |
---|
1816 | } __cudaFatCudaBinaryFlag; |
---|
1817 | # 145 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" |
---|
1818 | typedef struct { |
---|
1819 | unsigned long magic; |
---|
1820 | unsigned long version; |
---|
1821 | unsigned long gpuInfoVersion; |
---|
1822 | char* key; |
---|
1823 | char* ident; |
---|
1824 | char* usageMode; |
---|
1825 | __cudaFatPtxEntry *ptx; |
---|
1826 | __cudaFatCubinEntry *cubin; |
---|
1827 | __cudaFatDebugEntry *debug; |
---|
1828 | void* debugInfo; |
---|
1829 | unsigned int flags; |
---|
1830 | } __cudaFatCudaBinary; |
---|
1831 | # 189 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" |
---|
1832 | void fatGetCubinForGpu( __cudaFatCudaBinary *binary, char* gpuName, char* *cubin, char* *dbgInfoFile ); |
---|
1833 | # 2 "/tmp/tmpxft_00001ecc_00000000-1.c" 2 |
---|
1834 | |
---|
1835 | |
---|
1836 | |
---|
1837 | |
---|
1838 | |
---|
1839 | |
---|
1840 | |
---|
1841 | static const unsigned char __deviceText[] = { |
---|
1842 | 0x61,0x72,0x63,0x68,0x69,0x74,0x65,0x63,0x74,0x75,0x72,0x65,0x20,0x7b,0x73,0x6d, |
---|
1843 | 0x5f,0x31,0x30,0x7d,0x0a,0x61,0x62,0x69,0x76,0x65,0x72,0x73,0x69,0x6f,0x6e,0x20, |
---|
1844 | 0x7b,0x30,0x7d,0x0a,0x63,0x6f,0x64,0x65,0x20,0x20,0x7b,0x0a,0x09,0x6e,0x61,0x6d, |
---|
1845 | 0x65,0x20,0x3d,0x20,0x5f,0x5f,0x64,0x75,0x6d,0x6d,0x79,0x5f,0x65,0x6e,0x74,0x72, |
---|
1846 | 0x79,0x5f,0x5f,0x0a,0x09,0x6c,0x6d,0x65,0x6d,0x20,0x3d,0x20,0x30,0x0a,0x09,0x73, |
---|
1847 | 0x6d,0x65,0x6d,0x20,0x3d,0x20,0x30,0x0a,0x09,0x72,0x65,0x67,0x20,0x3d,0x20,0x30, |
---|
1848 | 0x0a,0x09,0x62,0x61,0x72,0x20,0x3d,0x20,0x30,0x0a,0x09,0x62,0x69,0x6e,0x63,0x6f, |
---|
1849 | 0x64,0x65,0x20,0x20,0x7b,0x0a,0x09,0x09,0x30,0x78,0x66,0x30,0x30,0x30,0x30,0x30, |
---|
1850 | 0x30,0x31,0x20,0x30,0x78,0x65,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x20,0x0a,0x09, |
---|
1851 | 0x7d,0x0a,0x7d,0x0a,0x00 |
---|
1852 | }; |
---|
1853 | |
---|
1854 | |
---|
1855 | |
---|
1856 | |
---|
1857 | |
---|
1858 | static __cudaFatPtxEntry __ptxEntries [] = {{0,0}}; |
---|
1859 | static __cudaFatCubinEntry __cubinEntries[] = {{"sm_10",(char*)__deviceText},{0,0}}; |
---|
1860 | static __cudaFatDebugEntry __debugEntries[] = {{0,0}}; |
---|
1861 | |
---|
1862 | static __cudaFatCudaBinary __fatDeviceText __attribute__ ((section (".nvFatBinSegment")))= {0x1ee55a01,0x00000002,0x840b5bca,"81bb892378501d16","y.cu"," ",__ptxEntries,__cubinEntries,__debugEntries,0,0}; |
---|
1863 | # 5 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 2 |
---|
1864 | # 1 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 1 |
---|
1865 | # 65 "/usr/local/cuda/bin/../include/crt/host_runtime.h" |
---|
1866 | # 1 "/usr/local/cuda/bin/../include/host_defines.h" 1 |
---|
1867 | # 66 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 |
---|
1868 | # 88 "/usr/local/cuda/bin/../include/crt/host_runtime.h" |
---|
1869 | extern void** __cudaRegisterFatBinary( |
---|
1870 | void *fatCubin |
---|
1871 | ); |
---|
1872 | |
---|
1873 | extern void __cudaUnregisterFatBinary( |
---|
1874 | void **fatCubinHandle |
---|
1875 | ); |
---|
1876 | |
---|
1877 | extern void __cudaRegisterVar( |
---|
1878 | void **fatCubinHandle, |
---|
1879 | char *hostVar, |
---|
1880 | char *deviceAddress, |
---|
1881 | const char *deviceName, |
---|
1882 | int ext, |
---|
1883 | int size, |
---|
1884 | int constant, |
---|
1885 | int global |
---|
1886 | ); |
---|
1887 | |
---|
1888 | extern void __cudaRegisterTexture( |
---|
1889 | void **fatCubinHandle, |
---|
1890 | const struct textureReference *hostVar, |
---|
1891 | const void **deviceAddress, |
---|
1892 | const char *deviceName, |
---|
1893 | int dim, |
---|
1894 | int norm, |
---|
1895 | int ext |
---|
1896 | ); |
---|
1897 | |
---|
1898 | extern void __cudaRegisterShared( |
---|
1899 | void **fatCubinHandle, |
---|
1900 | void **devicePtr |
---|
1901 | ); |
---|
1902 | |
---|
1903 | extern void __cudaRegisterFunction( |
---|
1904 | void **fatCubinHandle, |
---|
1905 | const char *hostFun, |
---|
1906 | char *deviceFun, |
---|
1907 | const char *deviceName, |
---|
1908 | int thread_limit, |
---|
1909 | uint3 *tid, |
---|
1910 | uint3 *bid, |
---|
1911 | dim3 *bDim, |
---|
1912 | dim3 *gDim |
---|
1913 | ); |
---|
1914 | |
---|
1915 | |
---|
1916 | |
---|
1917 | |
---|
1918 | |
---|
1919 | static void **__cudaFatCubinHandle; |
---|
1920 | |
---|
1921 | static void __cudaUnregisterBinaryUtil(void) |
---|
1922 | { |
---|
1923 | __cudaUnregisterFatBinary(__cudaFatCubinHandle); |
---|
1924 | } |
---|
1925 | |
---|
1926 | |
---|
1927 | |
---|
1928 | __attribute__((destructor)) static void __cudaUnregisterBinary(void) |
---|
1929 | { |
---|
1930 | __cudaUnregisterBinaryUtil(); |
---|
1931 | } |
---|
1932 | # 195 "/usr/local/cuda/bin/../include/crt/host_runtime.h" |
---|
1933 | # 1 "/usr/local/cuda/bin/../include/common_functions.h" 1 |
---|
1934 | # 64 "/usr/local/cuda/bin/../include/common_functions.h" |
---|
1935 | # 1 "/usr/local/cuda/bin/../include/crt/func_macro.h" 1 3 |
---|
1936 | # 65 "/usr/local/cuda/bin/../include/common_functions.h" 2 |
---|
1937 | |
---|
1938 | extern __attribute__((weak)) long __cuda_clock(void); long __cuda_clock(void) |
---|
1939 | { |
---|
1940 | return clock(); |
---|
1941 | } |
---|
1942 | |
---|
1943 | extern __attribute__((weak)) void *__cuda_memset(void *s, int c, size_t n); void *__cuda_memset(void *s, int c, size_t n) |
---|
1944 | { |
---|
1945 | char *p = (char*)s; |
---|
1946 | |
---|
1947 | while (n--) *p++ = (char)c; |
---|
1948 | |
---|
1949 | return s; |
---|
1950 | } |
---|
1951 | # 88 "/usr/local/cuda/bin/../include/common_functions.h" |
---|
1952 | # 1 "/usr/local/cuda/bin/../include/math_functions.h" 1 3 |
---|
1953 | # 794 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
1954 | extern __attribute__((weak)) int __cuda___signbitl(long double a); int __cuda___signbitl(long double a); |
---|
1955 | extern __attribute__((weak)) int __cuda___isinfl(long double a); int __cuda___isinfl(long double a); |
---|
1956 | extern __attribute__((weak)) int __cuda___isnanl(long double a); int __cuda___isnanl(long double a); |
---|
1957 | extern __attribute__((weak)) int __cuda___finitel(long double a); int __cuda___finitel(long double a); |
---|
1958 | # 834 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
1959 | extern __attribute__((weak)) int __cuda_abs(int a); int __cuda_abs(int a) |
---|
1960 | { |
---|
1961 | return abs(a); |
---|
1962 | } |
---|
1963 | |
---|
1964 | extern __attribute__((weak)) float __cuda_fabsf(float a); float __cuda_fabsf(float a) |
---|
1965 | { |
---|
1966 | return fabsf(a); |
---|
1967 | } |
---|
1968 | |
---|
1969 | extern __attribute__((weak)) long long int __cuda_llabs(long long int a); long long int __cuda_llabs(long long int a) |
---|
1970 | { |
---|
1971 | |
---|
1972 | |
---|
1973 | |
---|
1974 | return llabs(a); |
---|
1975 | |
---|
1976 | } |
---|
1977 | |
---|
1978 | extern __attribute__((weak)) float __cuda_exp2f(float a); float __cuda_exp2f(float a) |
---|
1979 | { |
---|
1980 | return exp2f(a); |
---|
1981 | } |
---|
1982 | |
---|
1983 | # 1 "/usr/local/cuda/bin/../include/device_functions.h" 1 3 |
---|
1984 | # 322 "/usr/local/cuda/bin/../include/device_functions.h" 3 |
---|
1985 | # 1 "/usr/local/cuda/bin/../include/math_constants.h" 1 3 |
---|
1986 | # 323 "/usr/local/cuda/bin/../include/device_functions.h" 2 3 |
---|
1987 | |
---|
1988 | |
---|
1989 | |
---|
1990 | extern __attribute__((weak)) int __cuda___isnan(double a); int __cuda___isnan(double a); |
---|
1991 | extern __attribute__((weak)) int __cuda___isnanf(float a); int __cuda___isnanf(float a); |
---|
1992 | static int __double2int_rz(double); |
---|
1993 | static unsigned int __double2uint_rz(double); |
---|
1994 | static long long int __double2ll_rz(double); |
---|
1995 | static unsigned long long int __double2ull_rz(double); |
---|
1996 | # 345 "/usr/local/cuda/bin/../include/device_functions.h" 3 |
---|
1997 | static int __mulhi(int a, int b) |
---|
1998 | { |
---|
1999 | long long int c = (long long int)a * (long long int)b; |
---|
2000 | |
---|
2001 | return (int)(c >> 32); |
---|
2002 | } |
---|
2003 | |
---|
2004 | static unsigned int __umulhi(unsigned int a, unsigned int b) |
---|
2005 | { |
---|
2006 | unsigned long long int c = (unsigned long long int)a * (unsigned long long int)b; |
---|
2007 | |
---|
2008 | return (unsigned int)(c >> 32); |
---|
2009 | } |
---|
2010 | |
---|
2011 | static unsigned long long int __umul64hi(unsigned long long int a, unsigned long long int b) |
---|
2012 | { |
---|
2013 | unsigned int a_lo = (unsigned int)a; |
---|
2014 | unsigned long long int a_hi = a >> 32; |
---|
2015 | unsigned int b_lo = (unsigned int)b; |
---|
2016 | unsigned long long int b_hi = b >> 32; |
---|
2017 | unsigned long long int m1 = a_lo * b_hi; |
---|
2018 | unsigned long long int m2 = a_hi * b_lo; |
---|
2019 | unsigned int carry; |
---|
2020 | |
---|
2021 | carry = (0ULL + __umulhi(a_lo, b_lo) + (unsigned int)m1 + (unsigned int)m2) >> 32; |
---|
2022 | |
---|
2023 | return a_hi * b_hi + (m1 >> 32) + (m2 >> 32) + carry; |
---|
2024 | } |
---|
2025 | |
---|
2026 | static long long int __mul64hi(long long int a, long long int b) |
---|
2027 | { |
---|
2028 | return __umul64hi(a, b) - (a < 0LL ? b : 0LL) - (b < 0LL ? a : 0LL); |
---|
2029 | } |
---|
2030 | |
---|
2031 | static float __saturatef(float a) |
---|
2032 | { |
---|
2033 | return a >= 1.0f ? 1.0f : a <= 0.0f ? 0.0f : a; |
---|
2034 | } |
---|
2035 | |
---|
2036 | static unsigned int __sad(int a, int b, unsigned int c) |
---|
2037 | { |
---|
2038 | long long int diff = (long long int)a - (long long int)b; |
---|
2039 | |
---|
2040 | return (unsigned int)(__cuda_llabs(diff) + (long long int)c); |
---|
2041 | } |
---|
2042 | |
---|
2043 | static unsigned int __usad(unsigned int a, unsigned int b, unsigned int c) |
---|
2044 | { |
---|
2045 | long long int diff = (long long int)a - (long long int)b; |
---|
2046 | |
---|
2047 | return (unsigned int)(__cuda_llabs(diff) + (long long int)c); |
---|
2048 | } |
---|
2049 | |
---|
2050 | static int __mul24(int a, int b) |
---|
2051 | { |
---|
2052 | a &= 0xffffff; |
---|
2053 | a = (a & 0x800000) != 0 ? a | ~0xffffff : a; |
---|
2054 | b &= 0xffffff; |
---|
2055 | b = (b & 0x800000) != 0 ? b | ~0xffffff : b; |
---|
2056 | |
---|
2057 | return a * b; |
---|
2058 | } |
---|
2059 | |
---|
2060 | static unsigned int __umul24(unsigned int a, unsigned int b) |
---|
2061 | { |
---|
2062 | a &= 0xffffff; |
---|
2063 | b &= 0xffffff; |
---|
2064 | |
---|
2065 | return a * b; |
---|
2066 | } |
---|
2067 | |
---|
2068 | static float __int_as_float(int a) |
---|
2069 | { |
---|
2070 | union {int a; float b;} u; |
---|
2071 | |
---|
2072 | u.a = a; |
---|
2073 | |
---|
2074 | return u.b; |
---|
2075 | } |
---|
2076 | |
---|
2077 | static int __float_as_int(float a) |
---|
2078 | { |
---|
2079 | union {float a; int b;} u; |
---|
2080 | |
---|
2081 | u.a = a; |
---|
2082 | |
---|
2083 | return u.b; |
---|
2084 | } |
---|
2085 | |
---|
2086 | static long long int __internal_float2ll_kernel(float a, long long int max, long long int min, long long int nan, enum cudaRoundMode rndMode) |
---|
2087 | { |
---|
2088 | unsigned long long int res, t = 0ULL; |
---|
2089 | int shift; |
---|
2090 | unsigned int ia; |
---|
2091 | |
---|
2092 | if (sizeof(a) == sizeof(double) && __cuda___isnan((double)a)) return nan; if (sizeof(a) == sizeof(float) && __cuda___isnanf((float)a)) return nan; if (a >= max) return max; if (a <= min) return min; |
---|
2093 | ia = __float_as_int(a); |
---|
2094 | shift = 189 - ((ia >> 23) & 0xff); |
---|
2095 | res = (unsigned long long int)(((ia << 8) | 0x80000000) >> 1) << 32; |
---|
2096 | if (shift >= 64) { |
---|
2097 | t = res; |
---|
2098 | res = 0; |
---|
2099 | } else if (shift) { |
---|
2100 | t = res << (64 - shift); |
---|
2101 | res = res >> shift; |
---|
2102 | } |
---|
2103 | if (rndMode == cudaRoundNearest && (long long int)t < 0LL) { |
---|
2104 | res += t == 0x8000000000000000ULL ? res & 1ULL : 1ULL; |
---|
2105 | } |
---|
2106 | else if (rndMode == cudaRoundMinInf && t != 0ULL && ia > 0x80000000) { |
---|
2107 | res++; |
---|
2108 | } |
---|
2109 | else if (rndMode == cudaRoundPosInf && t != 0ULL && (int)ia > 0) { |
---|
2110 | res++; |
---|
2111 | } |
---|
2112 | if ((int)ia < 0) res = (unsigned long long int)-(long long int)res; |
---|
2113 | return (long long int)res; |
---|
2114 | } |
---|
2115 | |
---|
2116 | static int __internal_float2int(float a, enum cudaRoundMode rndMode) |
---|
2117 | { |
---|
2118 | return (int)__internal_float2ll_kernel(a, 2147483647LL, -2147483648LL, 0LL, rndMode); |
---|
2119 | } |
---|
2120 | |
---|
2121 | static int __float2int_rz(float a) |
---|
2122 | { |
---|
2123 | return __internal_float2int(a, cudaRoundZero); |
---|
2124 | } |
---|
2125 | |
---|
2126 | static int __float2int_ru(float a) |
---|
2127 | { |
---|
2128 | return __internal_float2int(a, cudaRoundPosInf); |
---|
2129 | } |
---|
2130 | |
---|
2131 | static int __float2int_rd(float a) |
---|
2132 | { |
---|
2133 | return __internal_float2int(a, cudaRoundMinInf); |
---|
2134 | } |
---|
2135 | |
---|
2136 | static int __float2int_rn(float a) |
---|
2137 | { |
---|
2138 | return __internal_float2int(a, cudaRoundNearest); |
---|
2139 | } |
---|
2140 | |
---|
2141 | static long long int __internal_float2ll(float a, enum cudaRoundMode rndMode) |
---|
2142 | { |
---|
2143 | return __internal_float2ll_kernel(a, 9223372036854775807LL, -9223372036854775807LL -1LL, -9223372036854775807LL -1LL, rndMode); |
---|
2144 | } |
---|
2145 | |
---|
2146 | static long long int __float2ll_rz(float a) |
---|
2147 | { |
---|
2148 | return __internal_float2ll(a, cudaRoundZero); |
---|
2149 | } |
---|
2150 | |
---|
2151 | static long long int __float2ll_ru(float a) |
---|
2152 | { |
---|
2153 | return __internal_float2ll(a, cudaRoundPosInf); |
---|
2154 | } |
---|
2155 | |
---|
2156 | static long long int __float2ll_rd(float a) |
---|
2157 | { |
---|
2158 | return __internal_float2ll(a, cudaRoundMinInf); |
---|
2159 | } |
---|
2160 | |
---|
2161 | static long long int __float2ll_rn(float a) |
---|
2162 | { |
---|
2163 | return __internal_float2ll(a, cudaRoundNearest); |
---|
2164 | } |
---|
2165 | |
---|
2166 | static unsigned long long int __internal_float2ull_kernel(float a, unsigned long long int max, unsigned long long int nan, enum cudaRoundMode rndMode) |
---|
2167 | { |
---|
2168 | unsigned long long int res, t = 0ULL; |
---|
2169 | int shift; |
---|
2170 | unsigned int ia; |
---|
2171 | |
---|
2172 | if (sizeof(a) == sizeof(double) && __cuda___isnan((double)a)) return nan; if (sizeof(a) == sizeof(float) && __cuda___isnanf((float)a)) return nan; if (a >= max) return max; if (a <= 0LL) return 0LL; |
---|
2173 | ia = __float_as_int(a); |
---|
2174 | shift = 190 - ((ia >> 23) & 0xff); |
---|
2175 | res = (unsigned long long int)((ia << 8) | 0x80000000) << 32; |
---|
2176 | if (shift >= 64) { |
---|
2177 | t = res >> (int)(shift > 64); |
---|
2178 | res = 0; |
---|
2179 | } else if (shift) { |
---|
2180 | t = res << (64 - shift); |
---|
2181 | res = res >> shift; |
---|
2182 | } |
---|
2183 | if (rndMode == cudaRoundNearest && (long long int)t < 0LL) { |
---|
2184 | res += t == 0x8000000000000000ULL ? res & 1ULL : 1ULL; |
---|
2185 | } |
---|
2186 | else if (rndMode == cudaRoundPosInf && t != 0ULL) { |
---|
2187 | res++; |
---|
2188 | } |
---|
2189 | return res; |
---|
2190 | } |
---|
2191 | |
---|
2192 | static unsigned int __internal_float2uint(float a, enum cudaRoundMode rndMode) |
---|
2193 | { |
---|
2194 | return (unsigned int)__internal_float2ull_kernel(a, 4294967295U, 0U, rndMode); |
---|
2195 | } |
---|
2196 | |
---|
2197 | static unsigned int __float2uint_rz(float a) |
---|
2198 | { |
---|
2199 | return __internal_float2uint(a, cudaRoundZero); |
---|
2200 | } |
---|
2201 | |
---|
2202 | static unsigned int __float2uint_ru(float a) |
---|
2203 | { |
---|
2204 | return __internal_float2uint(a, cudaRoundPosInf); |
---|
2205 | } |
---|
2206 | |
---|
2207 | static unsigned int __float2uint_rd(float a) |
---|
2208 | { |
---|
2209 | return __internal_float2uint(a, cudaRoundMinInf); |
---|
2210 | } |
---|
2211 | |
---|
2212 | static unsigned int __float2uint_rn(float a) |
---|
2213 | { |
---|
2214 | return __internal_float2uint(a, cudaRoundNearest); |
---|
2215 | } |
---|
2216 | |
---|
2217 | static unsigned long long int __internal_float2ull(float a, enum cudaRoundMode rndMode) |
---|
2218 | { |
---|
2219 | return __internal_float2ull_kernel(a, 18446744073709551615ULL, 9223372036854775808ULL, rndMode); |
---|
2220 | } |
---|
2221 | |
---|
2222 | static unsigned long long int __float2ull_rz(float a) |
---|
2223 | { |
---|
2224 | return __internal_float2ull(a, cudaRoundZero); |
---|
2225 | } |
---|
2226 | |
---|
2227 | static unsigned long long int __float2ull_ru(float a) |
---|
2228 | { |
---|
2229 | return __internal_float2ull(a, cudaRoundPosInf); |
---|
2230 | } |
---|
2231 | |
---|
2232 | static unsigned long long int __float2ull_rd(float a) |
---|
2233 | { |
---|
2234 | return __internal_float2ull(a, cudaRoundMinInf); |
---|
2235 | } |
---|
2236 | |
---|
2237 | static unsigned long long int __float2ull_rn(float a) |
---|
2238 | { |
---|
2239 | return __internal_float2ull(a, cudaRoundNearest); |
---|
2240 | } |
---|
2241 | |
---|
2242 | static int __internal_normalize64(unsigned long long int *a) |
---|
2243 | { |
---|
2244 | int lz = 0; |
---|
2245 | |
---|
2246 | if ((*a & 0xffffffff00000000ULL) == 0ULL) { |
---|
2247 | *a <<= 32; |
---|
2248 | lz += 32; |
---|
2249 | } |
---|
2250 | if ((*a & 0xffff000000000000ULL) == 0ULL) { |
---|
2251 | *a <<= 16; |
---|
2252 | lz += 16; |
---|
2253 | } |
---|
2254 | if ((*a & 0xff00000000000000ULL) == 0ULL) { |
---|
2255 | *a <<= 8; |
---|
2256 | lz += 8; |
---|
2257 | } |
---|
2258 | if ((*a & 0xf000000000000000ULL) == 0ULL) { |
---|
2259 | *a <<= 4; |
---|
2260 | lz += 4; |
---|
2261 | } |
---|
2262 | if ((*a & 0xC000000000000000ULL) == 0ULL) { |
---|
2263 | *a <<= 2; |
---|
2264 | lz += 2; |
---|
2265 | } |
---|
2266 | if ((*a & 0x8000000000000000ULL) == 0ULL) { |
---|
2267 | *a <<= 1; |
---|
2268 | lz += 1; |
---|
2269 | } |
---|
2270 | return lz; |
---|
2271 | } |
---|
2272 | |
---|
2273 | static int __internal_normalize(unsigned int *a) |
---|
2274 | { |
---|
2275 | unsigned long long int t = (unsigned long long int)*a; |
---|
2276 | int lz = __internal_normalize64(&t); |
---|
2277 | |
---|
2278 | *a = (unsigned int)(t >> 32); |
---|
2279 | |
---|
2280 | return lz - 32; |
---|
2281 | } |
---|
2282 | |
---|
2283 | static float __internal_int2float_kernel(int a, enum cudaRoundMode rndMode) |
---|
2284 | { |
---|
2285 | volatile union { |
---|
2286 | float f; |
---|
2287 | unsigned int i; |
---|
2288 | } res; |
---|
2289 | int shift; |
---|
2290 | unsigned int t; |
---|
2291 | res.i = a; |
---|
2292 | if (a == 0) return res.f; |
---|
2293 | if (a < 0) res.i = (unsigned int)-a; |
---|
2294 | shift = __internal_normalize((unsigned int*)&res.i); |
---|
2295 | t = res.i << 24; |
---|
2296 | res.i = (res.i >> 8); |
---|
2297 | res.i += (127 + 30 - shift) << 23; |
---|
2298 | if (a < 0) res.i |= 0x80000000; |
---|
2299 | if ((rndMode == cudaRoundNearest) && (t >= 0x80000000)) { |
---|
2300 | res.i += (t == 0x80000000) ? (res.i & 1) : (t >> 31); |
---|
2301 | } |
---|
2302 | else if ((rndMode == cudaRoundMinInf) && t && (a < 0)) { |
---|
2303 | res.i++; |
---|
2304 | } |
---|
2305 | else if ((rndMode == cudaRoundPosInf) && t && (a > 0)) { |
---|
2306 | res.i++; |
---|
2307 | } |
---|
2308 | return res.f; |
---|
2309 | } |
---|
2310 | |
---|
2311 | static float __int2float_rz(int a) |
---|
2312 | { |
---|
2313 | return __internal_int2float_kernel(a, cudaRoundZero); |
---|
2314 | } |
---|
2315 | |
---|
2316 | static float __int2float_ru(int a) |
---|
2317 | { |
---|
2318 | return __internal_int2float_kernel(a, cudaRoundPosInf); |
---|
2319 | } |
---|
2320 | |
---|
2321 | static float __int2float_rd(int a) |
---|
2322 | { |
---|
2323 | return __internal_int2float_kernel(a, cudaRoundMinInf); |
---|
2324 | } |
---|
2325 | |
---|
2326 | static float __int2float_rn(int a) |
---|
2327 | { |
---|
2328 | return __internal_int2float_kernel(a, cudaRoundNearest); |
---|
2329 | } |
---|
2330 | |
---|
2331 | static float __internal_uint2float_kernel(unsigned int a, enum cudaRoundMode rndMode) |
---|
2332 | { |
---|
2333 | volatile union { |
---|
2334 | float f; |
---|
2335 | unsigned int i; |
---|
2336 | } res; |
---|
2337 | int shift; |
---|
2338 | unsigned int t; |
---|
2339 | res.i = a; |
---|
2340 | if (a == 0) return res.f; |
---|
2341 | shift = __internal_normalize((unsigned int*)&res.i); |
---|
2342 | t = res.i << 24; |
---|
2343 | res.i = (res.i >> 8); |
---|
2344 | res.i += (127 + 30 - shift) << 23; |
---|
2345 | if ((rndMode == cudaRoundNearest) && (t >= 0x80000000)) { |
---|
2346 | res.i += (t == 0x80000000) ? (res.i & 1) : (t >> 31); |
---|
2347 | } |
---|
2348 | else if ((rndMode == cudaRoundPosInf) && t) { |
---|
2349 | res.i++; |
---|
2350 | } |
---|
2351 | return res.f; |
---|
2352 | } |
---|
2353 | |
---|
2354 | static float __uint2float_rz(unsigned int a) |
---|
2355 | { |
---|
2356 | return __internal_uint2float_kernel(a, cudaRoundZero); |
---|
2357 | } |
---|
2358 | |
---|
2359 | static float __uint2float_ru(unsigned int a) |
---|
2360 | { |
---|
2361 | return __internal_uint2float_kernel(a, cudaRoundPosInf); |
---|
2362 | } |
---|
2363 | |
---|
2364 | static float __uint2float_rd(unsigned int a) |
---|
2365 | { |
---|
2366 | return __internal_uint2float_kernel(a, cudaRoundMinInf); |
---|
2367 | } |
---|
2368 | |
---|
2369 | static float __uint2float_rn(unsigned int a) |
---|
2370 | { |
---|
2371 | return __internal_uint2float_kernel(a, cudaRoundNearest); |
---|
2372 | } |
---|
2373 | |
---|
2374 | static float __ll2float_rn(long long int a) |
---|
2375 | { |
---|
2376 | return (float)a; |
---|
2377 | } |
---|
2378 | |
---|
2379 | static float __ull2float_rn(unsigned long long int a) |
---|
2380 | { |
---|
2381 | unsigned long long int temp; |
---|
2382 | unsigned int res, t; |
---|
2383 | int shift; |
---|
2384 | if (a == 0ULL) return 0.0f; |
---|
2385 | temp = a; |
---|
2386 | shift = __internal_normalize64(&temp); |
---|
2387 | temp = (temp >> 8) | ((temp & 0xffULL) ? 1ULL : 0ULL); |
---|
2388 | res = (unsigned int)(temp >> 32); |
---|
2389 | t = (unsigned int)temp; |
---|
2390 | res += (127 + 62 - shift) << 23; |
---|
2391 | res += t == 0x80000000 ? res & 1 : t >> 31; |
---|
2392 | return __int_as_float(res); |
---|
2393 | } |
---|
2394 | |
---|
2395 | static float __internal_fmul_kernel(float a, float b, int rndNearest) |
---|
2396 | { |
---|
2397 | unsigned long long product; |
---|
2398 | volatile union { |
---|
2399 | float f; |
---|
2400 | unsigned int i; |
---|
2401 | } xx, yy; |
---|
2402 | unsigned expo_x, expo_y; |
---|
2403 | |
---|
2404 | xx.f = a; |
---|
2405 | yy.f = b; |
---|
2406 | |
---|
2407 | expo_y = 0xFF; |
---|
2408 | expo_x = expo_y & (xx.i >> 23); |
---|
2409 | expo_x = expo_x - 1; |
---|
2410 | expo_y = expo_y & (yy.i >> 23); |
---|
2411 | expo_y = expo_y - 1; |
---|
2412 | |
---|
2413 | if ((expo_x <= 0xFD) && |
---|
2414 | (expo_y <= 0xFD)) { |
---|
2415 | multiply: |
---|
2416 | expo_x = expo_x + expo_y; |
---|
2417 | expo_y = xx.i ^ yy.i; |
---|
2418 | xx.i = xx.i & 0x00ffffff; |
---|
2419 | yy.i = yy.i << 8; |
---|
2420 | xx.i = xx.i | 0x00800000; |
---|
2421 | yy.i = yy.i | 0x80000000; |
---|
2422 | |
---|
2423 | product = ((unsigned long long)xx.i) * yy.i; |
---|
2424 | expo_x = expo_x - 127 + 2; |
---|
2425 | expo_y = expo_y & 0x80000000; |
---|
2426 | xx.i = (unsigned int)(product >> 32); |
---|
2427 | yy.i = (unsigned int)(product & 0xffffffff); |
---|
2428 | |
---|
2429 | if (xx.i < 0x00800000) { |
---|
2430 | xx.i = (xx.i << 1) | (yy.i >> 31); |
---|
2431 | yy.i = (yy.i << 1); |
---|
2432 | expo_x--; |
---|
2433 | } |
---|
2434 | if (expo_x <= 0xFD) { |
---|
2435 | xx.i = xx.i | expo_y; |
---|
2436 | xx.i = xx.i + (expo_x << 23); |
---|
2437 | |
---|
2438 | if (yy.i < 0x80000000) return xx.f; |
---|
2439 | xx.i += (((yy.i == 0x80000000) ? (xx.i & 1) : (yy.i >> 31)) |
---|
2440 | && rndNearest); |
---|
2441 | return xx.f; |
---|
2442 | } else if ((int)expo_x >= 254) { |
---|
2443 | |
---|
2444 | xx.i = (expo_y | 0x7F800000) - (!rndNearest); |
---|
2445 | return xx.f; |
---|
2446 | } else { |
---|
2447 | |
---|
2448 | expo_x = ((unsigned int)-((int)expo_x)); |
---|
2449 | if (expo_x > 25) { |
---|
2450 | |
---|
2451 | xx.i = expo_y; |
---|
2452 | return xx.f; |
---|
2453 | } else { |
---|
2454 | yy.i = (xx.i << (32 - expo_x)) | ((yy.i) ? 1 : 0); |
---|
2455 | xx.i = expo_y + (xx.i >> expo_x); |
---|
2456 | xx.i += (((yy.i == 0x80000000) ? (xx.i & 1) : (yy.i >> 31)) |
---|
2457 | && rndNearest); |
---|
2458 | return xx.f; |
---|
2459 | } |
---|
2460 | } |
---|
2461 | } else { |
---|
2462 | product = xx.i ^ yy.i; |
---|
2463 | product = product & 0x80000000; |
---|
2464 | if (!(xx.i & 0x7fffffff)) { |
---|
2465 | if (expo_y != 254) { |
---|
2466 | xx.i = (unsigned int)product; |
---|
2467 | return xx.f; |
---|
2468 | } |
---|
2469 | expo_y = yy.i << 1; |
---|
2470 | if (expo_y == 0xFF000000) { |
---|
2471 | xx.i = expo_y | 0x00C00000; |
---|
2472 | } else { |
---|
2473 | xx.i = yy.i | 0x00400000; |
---|
2474 | } |
---|
2475 | return xx.f; |
---|
2476 | } |
---|
2477 | if (!(yy.i & 0x7fffffff)) { |
---|
2478 | if (expo_x != 254) { |
---|
2479 | xx.i = (unsigned int)product; |
---|
2480 | return xx.f; |
---|
2481 | } |
---|
2482 | expo_x = xx.i << 1; |
---|
2483 | if (expo_x == 0xFF000000) { |
---|
2484 | xx.i = expo_x | 0x00C00000; |
---|
2485 | } else { |
---|
2486 | xx.i = xx.i | 0x00400000; |
---|
2487 | } |
---|
2488 | return xx.f; |
---|
2489 | } |
---|
2490 | if ((expo_y != 254) && (expo_x != 254)) { |
---|
2491 | expo_y++; |
---|
2492 | expo_x++; |
---|
2493 | if (expo_x == 0) { |
---|
2494 | expo_y |= xx.i & 0x80000000; |
---|
2495 | |
---|
2496 | |
---|
2497 | |
---|
2498 | |
---|
2499 | xx.i = xx.i << 8; |
---|
2500 | while (!(xx.i & 0x80000000)) { |
---|
2501 | xx.i <<= 1; |
---|
2502 | expo_x--; |
---|
2503 | } |
---|
2504 | xx.i = (xx.i >> 8) | (expo_y & 0x80000000); |
---|
2505 | expo_y &= ~0x80000000; |
---|
2506 | expo_y--; |
---|
2507 | goto multiply; |
---|
2508 | } |
---|
2509 | if (expo_y == 0) { |
---|
2510 | expo_x |= yy.i & 0x80000000; |
---|
2511 | yy.i = yy.i << 8; |
---|
2512 | while (!(yy.i & 0x80000000)) { |
---|
2513 | yy.i <<= 1; |
---|
2514 | expo_y--; |
---|
2515 | } |
---|
2516 | yy.i = (yy.i >> 8) | (expo_x & 0x80000000); |
---|
2517 | expo_x &= ~0x80000000; |
---|
2518 | expo_x--; |
---|
2519 | goto multiply; |
---|
2520 | } |
---|
2521 | } |
---|
2522 | expo_x = xx.i << 1; |
---|
2523 | expo_y = yy.i << 1; |
---|
2524 | |
---|
2525 | if (expo_x > 0xFF000000) { |
---|
2526 | |
---|
2527 | xx.i = xx.i | 0x00400000; |
---|
2528 | return xx.f; |
---|
2529 | } |
---|
2530 | |
---|
2531 | if (expo_y > 0xFF000000) { |
---|
2532 | |
---|
2533 | xx.i = yy.i | 0x00400000; |
---|
2534 | return xx.f; |
---|
2535 | } |
---|
2536 | xx.i = (unsigned int)product | 0x7f800000; |
---|
2537 | return xx.f; |
---|
2538 | } |
---|
2539 | } |
---|
2540 | |
---|
2541 | static float __internal_fadd_kernel(float a, float b, int rndNearest) |
---|
2542 | { |
---|
2543 | volatile union { |
---|
2544 | float f; |
---|
2545 | unsigned int i; |
---|
2546 | } xx, yy; |
---|
2547 | unsigned int expo_x; |
---|
2548 | unsigned int expo_y; |
---|
2549 | unsigned int temp; |
---|
2550 | |
---|
2551 | xx.f = a; |
---|
2552 | yy.f = b; |
---|
2553 | |
---|
2554 | |
---|
2555 | expo_y = yy.i << 1; |
---|
2556 | if (expo_y > (xx.i << 1)) { |
---|
2557 | expo_y = xx.i; |
---|
2558 | xx.i = yy.i; |
---|
2559 | yy.i = expo_y; |
---|
2560 | } |
---|
2561 | |
---|
2562 | temp = 0xff; |
---|
2563 | expo_x = temp & (xx.i >> 23); |
---|
2564 | expo_x = expo_x - 1; |
---|
2565 | expo_y = temp & (yy.i >> 23); |
---|
2566 | expo_y = expo_y - 1; |
---|
2567 | |
---|
2568 | if ((expo_x <= 0xFD) && |
---|
2569 | (expo_y <= 0xFD)) { |
---|
2570 | |
---|
2571 | add: |
---|
2572 | expo_y = expo_x - expo_y; |
---|
2573 | if (expo_y > 25) { |
---|
2574 | expo_y = 31; |
---|
2575 | } |
---|
2576 | temp = xx.i ^ yy.i; |
---|
2577 | xx.i = xx.i & ~0x7f000000; |
---|
2578 | xx.i = xx.i | 0x00800000; |
---|
2579 | yy.i = yy.i & ~0xff000000; |
---|
2580 | yy.i = yy.i | 0x00800000; |
---|
2581 | |
---|
2582 | if ((int)temp < 0) { |
---|
2583 | |
---|
2584 | temp = 32 - expo_y; |
---|
2585 | temp = (expo_y) ? (yy.i << temp) : 0; |
---|
2586 | temp = (unsigned int)(-((int)temp)); |
---|
2587 | xx.i = xx.i - (yy.i >> expo_y) - (temp ? 1 : 0); |
---|
2588 | if (xx.i & 0x00800000) { |
---|
2589 | if (expo_x <= 0xFD) { |
---|
2590 | xx.i = xx.i & ~0x00800000; |
---|
2591 | xx.i = (xx.i + (expo_x << 23)) + 0x00800000; |
---|
2592 | if (temp < 0x80000000) return xx.f; |
---|
2593 | xx.i += (((temp == 0x80000000) ? (xx.i & 1) : (temp >> 31)) |
---|
2594 | && rndNearest); |
---|
2595 | return xx.f; |
---|
2596 | } |
---|
2597 | } else { |
---|
2598 | if ((temp | (xx.i << 1)) == 0) { |
---|
2599 | |
---|
2600 | xx.i = 0; |
---|
2601 | return xx.f; |
---|
2602 | } |
---|
2603 | |
---|
2604 | yy.i = xx.i & 0x80000000; |
---|
2605 | do { |
---|
2606 | xx.i = (xx.i << 1) | (temp >> 31); |
---|
2607 | temp <<= 1; |
---|
2608 | expo_x--; |
---|
2609 | } while (!(xx.i & 0x00800000)); |
---|
2610 | xx.i = xx.i | yy.i; |
---|
2611 | } |
---|
2612 | } else { |
---|
2613 | |
---|
2614 | temp = 32 - expo_y; |
---|
2615 | temp = (expo_y) ? (yy.i << temp) : 0; |
---|
2616 | xx.i = xx.i + (yy.i >> expo_y); |
---|
2617 | if (!(xx.i & 0x01000000)) { |
---|
2618 | if (expo_x <= 0xFD) { |
---|
2619 | expo_y = xx.i & 1; |
---|
2620 | xx.i = xx.i + (expo_x << 23); |
---|
2621 | if (temp < 0x80000000) return xx.f; |
---|
2622 | xx.i += (((temp == 0x80000000) ? expo_y : (temp >> 31)) |
---|
2623 | && rndNearest); |
---|
2624 | return xx.f; |
---|
2625 | } |
---|
2626 | } else { |
---|
2627 | |
---|
2628 | temp = (xx.i << 31) | (temp >> 1); |
---|
2629 | |
---|
2630 | xx.i = ((xx.i & 0x80000000) | (xx.i >> 1)) & ~0x40000000; |
---|
2631 | expo_x++; |
---|
2632 | } |
---|
2633 | } |
---|
2634 | if (expo_x <= 0xFD) { |
---|
2635 | expo_y = xx.i & 1; |
---|
2636 | xx.i += (((temp == 0x80000000) ? expo_y : (temp >> 31)) |
---|
2637 | && rndNearest); |
---|
2638 | xx.i = xx.i + (expo_x << 23); |
---|
2639 | return xx.f; |
---|
2640 | } |
---|
2641 | if ((int)expo_x >= 254) { |
---|
2642 | |
---|
2643 | xx.i = ((xx.i & 0x80000000) | 0x7f800000) - (!rndNearest); |
---|
2644 | return xx.f; |
---|
2645 | } |
---|
2646 | |
---|
2647 | expo_y = expo_x + 32; |
---|
2648 | yy.i = xx.i & 0x80000000; |
---|
2649 | xx.i = xx.i & ~0xff000000; |
---|
2650 | |
---|
2651 | expo_x = (unsigned int)(-((int)expo_x)); |
---|
2652 | temp = xx.i << expo_y | ((temp) ? 1 : 0); |
---|
2653 | xx.i = yy.i | (xx.i >> expo_x); |
---|
2654 | xx.i += (((temp == 0x80000000) ? (xx.i & 1) : (temp >> 31)) |
---|
2655 | && rndNearest); |
---|
2656 | return xx.f; |
---|
2657 | } else { |
---|
2658 | |
---|
2659 | if (!(yy.i << 1)) { |
---|
2660 | if (xx.i == 0x80000000) { |
---|
2661 | xx.i = yy.i; |
---|
2662 | } |
---|
2663 | return xx.f; |
---|
2664 | } |
---|
2665 | if ((expo_y != 254) && (expo_x != 254)) { |
---|
2666 | |
---|
2667 | if (expo_x == (unsigned int) -1) { |
---|
2668 | temp = xx.i & 0x80000000; |
---|
2669 | xx.i = xx.i << 8; |
---|
2670 | while (!(xx.i & 0x80000000)) { |
---|
2671 | xx.i <<= 1; |
---|
2672 | expo_x--; |
---|
2673 | } |
---|
2674 | expo_x++; |
---|
2675 | xx.i = (xx.i >> 8) | temp; |
---|
2676 | } |
---|
2677 | if (expo_y == (unsigned int) -1) { |
---|
2678 | temp = yy.i & 0x80000000; |
---|
2679 | yy.i = yy.i << 8; |
---|
2680 | while (!(yy.i & 0x80000000)) { |
---|
2681 | yy.i <<= 1; |
---|
2682 | expo_y--; |
---|
2683 | } |
---|
2684 | expo_y++; |
---|
2685 | yy.i = (yy.i >> 8) | temp; |
---|
2686 | } |
---|
2687 | goto add; |
---|
2688 | } |
---|
2689 | expo_x = xx.i << 1; |
---|
2690 | expo_y = yy.i << 1; |
---|
2691 | |
---|
2692 | if (expo_x > 0xff000000) { |
---|
2693 | |
---|
2694 | xx.i = xx.i | 0x00400000; |
---|
2695 | return xx.f; |
---|
2696 | } |
---|
2697 | |
---|
2698 | if (expo_y > 0xff000000) { |
---|
2699 | |
---|
2700 | xx.i = yy.i | 0x00400000; |
---|
2701 | return xx.f; |
---|
2702 | } |
---|
2703 | if ((expo_x == 0xff000000) && (expo_y == 0xff000000)) { |
---|
2704 | |
---|
2705 | |
---|
2706 | |
---|
2707 | |
---|
2708 | expo_x = xx.i ^ yy.i; |
---|
2709 | xx.i = xx.i | ((expo_x) ? 0xffc00000 : 0); |
---|
2710 | return xx.f; |
---|
2711 | } |
---|
2712 | |
---|
2713 | if (expo_y == 0xff000000) { |
---|
2714 | xx.i = yy.i; |
---|
2715 | } |
---|
2716 | return xx.f; |
---|
2717 | } |
---|
2718 | } |
---|
2719 | |
---|
2720 | static float __fadd_rz(float a, float b) |
---|
2721 | { |
---|
2722 | return __internal_fadd_kernel(a, b, 0); |
---|
2723 | } |
---|
2724 | |
---|
2725 | static float __fmul_rz(float a, float b) |
---|
2726 | { |
---|
2727 | return __internal_fmul_kernel(a, b, 0); |
---|
2728 | } |
---|
2729 | |
---|
2730 | static float __fdividef(float a, float b) |
---|
2731 | { |
---|
2732 | |
---|
2733 | if (__cuda_fabsf(b) > 8.507059173e37f) { |
---|
2734 | if (__cuda_fabsf(a) <= 3.402823466e38f) { |
---|
2735 | return ((a / b) / 3.402823466e38f) / 3.402823466e38f; |
---|
2736 | } else { |
---|
2737 | return __int_as_float(0x7fffffff); |
---|
2738 | } |
---|
2739 | } else { |
---|
2740 | return a / b; |
---|
2741 | } |
---|
2742 | } |
---|
2743 | |
---|
2744 | static void __brkpt(int c) |
---|
2745 | { |
---|
2746 | |
---|
2747 | } |
---|
2748 | |
---|
2749 | extern int __cudaSynchronizeThreads(void**, void*); |
---|
2750 | |
---|
2751 | |
---|
2752 | |
---|
2753 | static inline __attribute__((always_inline)) void __syncthreads(void) |
---|
2754 | { |
---|
2755 | volatile int _ = 0; |
---|
2756 | L: if (__cudaSynchronizeThreads((void**)&&L, (void*)&_)) goto L; |
---|
2757 | } |
---|
2758 | |
---|
2759 | static void __trap(void) |
---|
2760 | { |
---|
2761 | __builtin_trap(); |
---|
2762 | } |
---|
2763 | # 1139 "/usr/local/cuda/bin/../include/device_functions.h" 3 |
---|
2764 | static float __sinf(float a) |
---|
2765 | { |
---|
2766 | return sinf(a); |
---|
2767 | } |
---|
2768 | |
---|
2769 | static float __cosf(float a) |
---|
2770 | { |
---|
2771 | return cosf(a); |
---|
2772 | } |
---|
2773 | |
---|
2774 | static float __log2f(float a) |
---|
2775 | { |
---|
2776 | return log2f(a); |
---|
2777 | } |
---|
2778 | |
---|
2779 | |
---|
2780 | |
---|
2781 | |
---|
2782 | |
---|
2783 | |
---|
2784 | |
---|
2785 | static float __internal_accurate_fdividef(float a, float b) |
---|
2786 | { |
---|
2787 | if (__cuda_fabsf(b) > 8.507059173e37f) { |
---|
2788 | a *= .25f; |
---|
2789 | b *= .25f; |
---|
2790 | } |
---|
2791 | return __fdividef(a, b); |
---|
2792 | } |
---|
2793 | |
---|
2794 | static float __tanf(float a) |
---|
2795 | { |
---|
2796 | return __sinf(a) / __cosf(a); |
---|
2797 | } |
---|
2798 | |
---|
2799 | static void __sincosf(float a, float *sptr, float *cptr) |
---|
2800 | { |
---|
2801 | *sptr = __sinf(a); |
---|
2802 | *cptr = __cosf(a); |
---|
2803 | } |
---|
2804 | |
---|
2805 | static float __expf(float a) |
---|
2806 | { |
---|
2807 | return __cuda_exp2f(a * 1.442695041f); |
---|
2808 | } |
---|
2809 | |
---|
2810 | static float __exp10f(float a) |
---|
2811 | { |
---|
2812 | return __cuda_exp2f(a * 3.321928094f); |
---|
2813 | } |
---|
2814 | |
---|
2815 | static float __log10f(float a) |
---|
2816 | { |
---|
2817 | return 0.301029996f * __log2f(a); |
---|
2818 | } |
---|
2819 | |
---|
2820 | static float __logf(float a) |
---|
2821 | { |
---|
2822 | return 0.693147181f * __log2f(a); |
---|
2823 | } |
---|
2824 | |
---|
2825 | static float __powf(float a, float b) |
---|
2826 | { |
---|
2827 | return __cuda_exp2f(b * __log2f(a)); |
---|
2828 | } |
---|
2829 | |
---|
2830 | static float fdividef(float a, float b) |
---|
2831 | { |
---|
2832 | |
---|
2833 | |
---|
2834 | |
---|
2835 | return __internal_accurate_fdividef(a, b); |
---|
2836 | |
---|
2837 | } |
---|
2838 | |
---|
2839 | static int __clz(int a) |
---|
2840 | { |
---|
2841 | return (a)?(158-(__float_as_int(__uint2float_rz((unsigned int)a))>>23)):32; |
---|
2842 | } |
---|
2843 | |
---|
2844 | static int __ffs(int a) |
---|
2845 | { |
---|
2846 | return 32 - __clz (a & -a); |
---|
2847 | } |
---|
2848 | |
---|
2849 | static int __clzll(long long int a) |
---|
2850 | { |
---|
2851 | int ahi = ((int)(a >> 32)); |
---|
2852 | int alo = ((int)(a & 0xffffffffULL)); |
---|
2853 | int res; |
---|
2854 | if (ahi) { |
---|
2855 | res = 0; |
---|
2856 | } else { |
---|
2857 | res = 32; |
---|
2858 | ahi = alo; |
---|
2859 | } |
---|
2860 | res = res + __clz(ahi); |
---|
2861 | return res; |
---|
2862 | } |
---|
2863 | |
---|
2864 | static int __ffsll(long long int a) |
---|
2865 | { |
---|
2866 | return 64 - __clzll (a & -a); |
---|
2867 | } |
---|
2868 | # 1252 "/usr/local/cuda/bin/../include/device_functions.h" 3 |
---|
2869 | static double fdivide(double a, double b) |
---|
2870 | { |
---|
2871 | return (double)fdividef((float)a, (float)b); |
---|
2872 | } |
---|
2873 | |
---|
2874 | |
---|
2875 | |
---|
2876 | static int __double2int_rz(double a) |
---|
2877 | { |
---|
2878 | return __float2int_rz((float)a); |
---|
2879 | } |
---|
2880 | |
---|
2881 | static unsigned int __double2uint_rz(double a) |
---|
2882 | { |
---|
2883 | return __float2uint_rz((float)a); |
---|
2884 | } |
---|
2885 | |
---|
2886 | static long long int __double2ll_rz(double a) |
---|
2887 | { |
---|
2888 | return __float2ll_rz((float)a); |
---|
2889 | } |
---|
2890 | |
---|
2891 | static unsigned long long int __double2ull_rz(double a) |
---|
2892 | { |
---|
2893 | return __float2ull_rz((float)a); |
---|
2894 | } |
---|
2895 | # 1291 "/usr/local/cuda/bin/../include/device_functions.h" 3 |
---|
2896 | # 1 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" 1 3 |
---|
2897 | # 214 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" 3 |
---|
2898 | static int __iAtomicAdd(int *address, int val) |
---|
2899 | { |
---|
2900 | int old = *address; |
---|
2901 | |
---|
2902 | *address = old + val; |
---|
2903 | |
---|
2904 | return old; |
---|
2905 | } |
---|
2906 | |
---|
2907 | static unsigned int __uAtomicAdd(unsigned int *address, unsigned int val) |
---|
2908 | { |
---|
2909 | unsigned int old = *address; |
---|
2910 | |
---|
2911 | *address = old + val; |
---|
2912 | |
---|
2913 | return old; |
---|
2914 | } |
---|
2915 | |
---|
2916 | static int __iAtomicExch(int *address, int val) |
---|
2917 | { |
---|
2918 | int old = *address; |
---|
2919 | |
---|
2920 | *address = val; |
---|
2921 | |
---|
2922 | return old; |
---|
2923 | } |
---|
2924 | |
---|
2925 | static unsigned int __uAtomicExch(unsigned int *address, unsigned int val) |
---|
2926 | { |
---|
2927 | unsigned int old = *address; |
---|
2928 | |
---|
2929 | *address = val; |
---|
2930 | |
---|
2931 | return old; |
---|
2932 | } |
---|
2933 | |
---|
2934 | static float __fAtomicExch(float *address, float val) |
---|
2935 | { |
---|
2936 | float old = *address; |
---|
2937 | |
---|
2938 | *address = val; |
---|
2939 | |
---|
2940 | return old; |
---|
2941 | } |
---|
2942 | |
---|
2943 | static int __iAtomicMin(int *address, int val) |
---|
2944 | { |
---|
2945 | int old = *address; |
---|
2946 | |
---|
2947 | *address = old < val ? old : val; |
---|
2948 | |
---|
2949 | return old; |
---|
2950 | } |
---|
2951 | |
---|
2952 | static unsigned int __uAtomicMin(unsigned int *address, unsigned int val) |
---|
2953 | { |
---|
2954 | unsigned int old = *address; |
---|
2955 | |
---|
2956 | *address = old < val ? old : val; |
---|
2957 | |
---|
2958 | return old; |
---|
2959 | } |
---|
2960 | |
---|
2961 | static int __iAtomicMax(int *address, int val) |
---|
2962 | { |
---|
2963 | int old = *address; |
---|
2964 | |
---|
2965 | *address = old > val ? old : val; |
---|
2966 | |
---|
2967 | return old; |
---|
2968 | } |
---|
2969 | |
---|
2970 | static unsigned int __uAtomicMax(unsigned int *address, unsigned int val) |
---|
2971 | { |
---|
2972 | unsigned int old = *address; |
---|
2973 | |
---|
2974 | *address = old > val ? old : val; |
---|
2975 | |
---|
2976 | return old; |
---|
2977 | } |
---|
2978 | |
---|
2979 | static unsigned int __uAtomicInc(unsigned int *address, unsigned int val) |
---|
2980 | { |
---|
2981 | unsigned int old = *address; |
---|
2982 | |
---|
2983 | *address = (old >= val) ? 0 : old + 1; |
---|
2984 | |
---|
2985 | return old; |
---|
2986 | } |
---|
2987 | |
---|
2988 | static unsigned int __uAtomicDec(unsigned int *address, unsigned int val) |
---|
2989 | { |
---|
2990 | unsigned int old = *address; |
---|
2991 | |
---|
2992 | *address = ((old == 0) | (old > val)) ? val : (old - 1); |
---|
2993 | |
---|
2994 | return old; |
---|
2995 | } |
---|
2996 | |
---|
2997 | static int __iAtomicAnd(int *address, int val) |
---|
2998 | { |
---|
2999 | int old = *address; |
---|
3000 | |
---|
3001 | *address = old & val; |
---|
3002 | |
---|
3003 | return old; |
---|
3004 | } |
---|
3005 | |
---|
3006 | static unsigned int __uAtomicAnd(unsigned int *address, unsigned int val) |
---|
3007 | { |
---|
3008 | unsigned int old = *address; |
---|
3009 | |
---|
3010 | *address = old & val; |
---|
3011 | |
---|
3012 | return old; |
---|
3013 | } |
---|
3014 | |
---|
3015 | static int __iAtomicOr(int *address, int val) |
---|
3016 | { |
---|
3017 | int old = *address; |
---|
3018 | |
---|
3019 | *address = old | val; |
---|
3020 | |
---|
3021 | return old; |
---|
3022 | } |
---|
3023 | |
---|
3024 | static unsigned int __uAtomicOr(unsigned int *address, unsigned int val) |
---|
3025 | { |
---|
3026 | unsigned int old = *address; |
---|
3027 | |
---|
3028 | *address = old | val; |
---|
3029 | |
---|
3030 | return old; |
---|
3031 | } |
---|
3032 | |
---|
3033 | static int __iAtomicXor(int *address, int val) |
---|
3034 | { |
---|
3035 | int old = *address; |
---|
3036 | |
---|
3037 | *address = old ^ val; |
---|
3038 | |
---|
3039 | return old; |
---|
3040 | } |
---|
3041 | |
---|
3042 | static unsigned int __uAtomicXor(unsigned int *address, unsigned int val) |
---|
3043 | { |
---|
3044 | unsigned int old = *address; |
---|
3045 | |
---|
3046 | *address = old ^ val; |
---|
3047 | |
---|
3048 | return old; |
---|
3049 | } |
---|
3050 | |
---|
3051 | static int __iAtomicCAS(int *address, int compare, int val) |
---|
3052 | { |
---|
3053 | int old = *address; |
---|
3054 | |
---|
3055 | *address = old == compare ? val : old; |
---|
3056 | |
---|
3057 | return old; |
---|
3058 | } |
---|
3059 | |
---|
3060 | static unsigned int __uAtomicCAS(unsigned int *address, unsigned int compare, unsigned int val) |
---|
3061 | { |
---|
3062 | unsigned int old = *address; |
---|
3063 | |
---|
3064 | *address = old == compare ? val : old; |
---|
3065 | |
---|
3066 | return old; |
---|
3067 | } |
---|
3068 | # 1292 "/usr/local/cuda/bin/../include/device_functions.h" 2 3 |
---|
3069 | # 1 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" 1 3 |
---|
3070 | # 2007 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" 3 |
---|
3071 | extern void __cudaTextureFetch(const void *tex, void *index, int integer, void *val); |
---|
3072 | |
---|
3073 | static int4 __itexfetchi(const void *tex, int4 index) |
---|
3074 | { |
---|
3075 | int4 val; |
---|
3076 | |
---|
3077 | __cudaTextureFetch(tex, &index, 1, &val); |
---|
3078 | |
---|
3079 | return val; |
---|
3080 | } |
---|
3081 | |
---|
3082 | static uint4 __utexfetchi(const void *tex, int4 index) |
---|
3083 | { |
---|
3084 | uint4 val; |
---|
3085 | |
---|
3086 | __cudaTextureFetch(tex, &index, 1, &val); |
---|
3087 | |
---|
3088 | return val; |
---|
3089 | } |
---|
3090 | |
---|
3091 | static float4 __ftexfetchi(const void *tex, int4 index) |
---|
3092 | { |
---|
3093 | float4 val; |
---|
3094 | |
---|
3095 | __cudaTextureFetch(tex, &index, 1, &val); |
---|
3096 | |
---|
3097 | return val; |
---|
3098 | } |
---|
3099 | |
---|
3100 | static int4 __itexfetch(const void *tex, float4 index, int dim) |
---|
3101 | { |
---|
3102 | int4 val; |
---|
3103 | |
---|
3104 | __cudaTextureFetch(tex, &index, 0, &val); |
---|
3105 | |
---|
3106 | return val; |
---|
3107 | } |
---|
3108 | |
---|
3109 | static uint4 __utexfetch(const void *tex, float4 index, int dim) |
---|
3110 | { |
---|
3111 | uint4 val; |
---|
3112 | |
---|
3113 | __cudaTextureFetch(tex, &index, 0, &val); |
---|
3114 | |
---|
3115 | return val; |
---|
3116 | } |
---|
3117 | |
---|
3118 | static float4 __ftexfetch(const void *tex, float4 index, int dim) |
---|
3119 | { |
---|
3120 | float4 val; |
---|
3121 | |
---|
3122 | __cudaTextureFetch(tex, &index, 0, &val); |
---|
3123 | |
---|
3124 | return val; |
---|
3125 | } |
---|
3126 | # 1293 "/usr/local/cuda/bin/../include/device_functions.h" 2 3 |
---|
3127 | # 859 "/usr/local/cuda/bin/../include/math_functions.h" 2 3 |
---|
3128 | |
---|
3129 | |
---|
3130 | extern __attribute__((weak)) int __cuda___signbitf(float a); int __cuda___signbitf(float a) |
---|
3131 | { |
---|
3132 | return (int)((unsigned int)__float_as_int(a) >> 31); |
---|
3133 | } |
---|
3134 | |
---|
3135 | |
---|
3136 | |
---|
3137 | |
---|
3138 | extern __attribute__((weak)) float __cuda_copysignf(float a, float b); float __cuda_copysignf(float a, float b) |
---|
3139 | { |
---|
3140 | return __int_as_float((__float_as_int(b) & 0x80000000) | |
---|
3141 | (__float_as_int(a) & ~0x80000000)); |
---|
3142 | } |
---|
3143 | # 883 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
3144 | extern __attribute__((weak)) int min(int a, int b); int min(int a, int b) |
---|
3145 | { |
---|
3146 | return a < b ? a : b; |
---|
3147 | } |
---|
3148 | |
---|
3149 | extern __attribute__((weak)) unsigned int umin(unsigned int a, unsigned int b); unsigned int umin(unsigned int a, unsigned int b) |
---|
3150 | { |
---|
3151 | return a < b ? a : b; |
---|
3152 | } |
---|
3153 | |
---|
3154 | extern __attribute__((weak)) int max(int a, int b); int max(int a, int b) |
---|
3155 | { |
---|
3156 | return a > b ? a : b; |
---|
3157 | } |
---|
3158 | |
---|
3159 | extern __attribute__((weak)) unsigned int umax(unsigned int a, unsigned int b); unsigned int umax(unsigned int a, unsigned int b) |
---|
3160 | { |
---|
3161 | return a > b ? a : b; |
---|
3162 | } |
---|
3163 | # 967 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
3164 | extern __attribute__((weak)) float __internal_nearbyintf(float a); float __internal_nearbyintf(float a) |
---|
3165 | { |
---|
3166 | float fa = fabsf(a); |
---|
3167 | |
---|
3168 | if (fa >= 8388608.0f) { |
---|
3169 | return a; |
---|
3170 | } else { |
---|
3171 | volatile float u = 8388608.0f + fa; |
---|
3172 | |
---|
3173 | u = u - 8388608.0f; |
---|
3174 | return copysignf(u, a); |
---|
3175 | } |
---|
3176 | } |
---|
3177 | |
---|
3178 | extern __attribute__((weak)) float __internal_fminf(float a, float b); float __internal_fminf(float a, float b) |
---|
3179 | { |
---|
3180 | volatile union { |
---|
3181 | float f; |
---|
3182 | unsigned int i; |
---|
3183 | } cvta, cvtb; |
---|
3184 | |
---|
3185 | cvta.f = a; |
---|
3186 | cvtb.f = b; |
---|
3187 | if ((cvta.i << 1) > 0xff000000) return b; |
---|
3188 | if ((cvtb.i << 1) > 0xff000000) return a; |
---|
3189 | if ((cvta.i | cvtb.i) == 0x80000000) { |
---|
3190 | return __int_as_float(0x80000000); |
---|
3191 | } |
---|
3192 | return a < b ? a : b; |
---|
3193 | } |
---|
3194 | |
---|
3195 | extern __attribute__((weak)) float __internal_fmaxf(float a, float b); float __internal_fmaxf(float a, float b) |
---|
3196 | { |
---|
3197 | volatile union { |
---|
3198 | float f; |
---|
3199 | unsigned int i; |
---|
3200 | } cvta, cvtb; |
---|
3201 | |
---|
3202 | cvta.f = a; |
---|
3203 | cvtb.f = b; |
---|
3204 | if ((cvta.i << 1) > 0xff000000) return b; |
---|
3205 | if ((cvtb.i << 1) > 0xff000000) return a; |
---|
3206 | if ((cvta.f == 0.0f) && (cvtb.f == 0.0f)) { |
---|
3207 | cvta.i &= cvtb.i; |
---|
3208 | return cvta.f; |
---|
3209 | } |
---|
3210 | return a > b ? a : b; |
---|
3211 | } |
---|
3212 | # 1055 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
3213 | extern __attribute__((weak)) long int __cuda_labs(long int a); long int __cuda_labs(long int a) |
---|
3214 | { |
---|
3215 | return labs(a); |
---|
3216 | } |
---|
3217 | |
---|
3218 | extern __attribute__((weak)) float __cuda_ceilf(float a); float __cuda_ceilf(float a) |
---|
3219 | { |
---|
3220 | return ceilf(a); |
---|
3221 | } |
---|
3222 | |
---|
3223 | extern __attribute__((weak)) float __cuda_floorf(float a); float __cuda_floorf(float a) |
---|
3224 | { |
---|
3225 | return floorf(a); |
---|
3226 | } |
---|
3227 | |
---|
3228 | extern __attribute__((weak)) float __cuda_sqrtf(float a); float __cuda_sqrtf(float a) |
---|
3229 | { |
---|
3230 | return sqrtf(a); |
---|
3231 | } |
---|
3232 | |
---|
3233 | extern __attribute__((weak)) float __cuda_rsqrtf(float a); float __cuda_rsqrtf(float a) |
---|
3234 | { |
---|
3235 | return 1.0f / sqrtf(a); |
---|
3236 | } |
---|
3237 | |
---|
3238 | extern __attribute__((weak)) float __cuda_truncf(float a); float __cuda_truncf(float a) |
---|
3239 | { |
---|
3240 | return truncf(a); |
---|
3241 | } |
---|
3242 | |
---|
3243 | extern __attribute__((weak)) int __cuda_max(int a, int b); int __cuda_max(int a, int b) |
---|
3244 | { |
---|
3245 | return max(a, b); |
---|
3246 | } |
---|
3247 | |
---|
3248 | extern __attribute__((weak)) int __cuda_min(int a, int b); int __cuda_min(int a, int b) |
---|
3249 | { |
---|
3250 | return min(a, b); |
---|
3251 | } |
---|
3252 | |
---|
3253 | extern __attribute__((weak)) unsigned int __cuda_umax(unsigned int a, unsigned int b); unsigned int __cuda_umax(unsigned int a, unsigned int b) |
---|
3254 | { |
---|
3255 | return umax(a, b); |
---|
3256 | } |
---|
3257 | |
---|
3258 | extern __attribute__((weak)) unsigned int __cuda_umin(unsigned int a, unsigned int b); unsigned int __cuda_umin(unsigned int a, unsigned int b) |
---|
3259 | { |
---|
3260 | return umin(a, b); |
---|
3261 | } |
---|
3262 | |
---|
3263 | extern __attribute__((weak)) long long int __cuda_llrintf(float a); long long int __cuda_llrintf(float a) |
---|
3264 | { |
---|
3265 | return __float2ll_rn(a); |
---|
3266 | } |
---|
3267 | |
---|
3268 | extern __attribute__((weak)) long int __cuda_lrintf(float a); long int __cuda_lrintf(float a) |
---|
3269 | { |
---|
3270 | |
---|
3271 | return (long int)__cuda_llrintf(a); |
---|
3272 | |
---|
3273 | |
---|
3274 | |
---|
3275 | } |
---|
3276 | |
---|
3277 | extern __attribute__((weak)) float __cuda_nearbyintf(float a); float __cuda_nearbyintf(float a) |
---|
3278 | { |
---|
3279 | |
---|
3280 | |
---|
3281 | |
---|
3282 | return __internal_nearbyintf(a); |
---|
3283 | |
---|
3284 | } |
---|
3285 | |
---|
3286 | extern __attribute__((weak)) float __cuda_fmaxf(float a, float b); float __cuda_fmaxf(float a, float b) |
---|
3287 | { |
---|
3288 | |
---|
3289 | |
---|
3290 | |
---|
3291 | return __internal_fmaxf(a, b); |
---|
3292 | |
---|
3293 | } |
---|
3294 | |
---|
3295 | extern __attribute__((weak)) float __cuda_fminf(float a, float b); float __cuda_fminf(float a, float b) |
---|
3296 | { |
---|
3297 | |
---|
3298 | |
---|
3299 | |
---|
3300 | return __internal_fminf(a, b); |
---|
3301 | |
---|
3302 | } |
---|
3303 | # 1162 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
3304 | extern __attribute__((weak)) int __cuda___finitef(float a); int __cuda___finitef(float a) |
---|
3305 | { |
---|
3306 | return __cuda_fabsf(a) < __int_as_float(0x7f800000); |
---|
3307 | } |
---|
3308 | |
---|
3309 | extern __attribute__((weak)) int __cuda___isinff(float a); int __cuda___isinff(float a) |
---|
3310 | { |
---|
3311 | return __cuda_fabsf(a) == __int_as_float(0x7f800000); |
---|
3312 | } |
---|
3313 | |
---|
3314 | extern __attribute__((weak)) int __cuda___isnanf(float a); int __cuda___isnanf(float a) |
---|
3315 | { |
---|
3316 | return !(__cuda_fabsf(a) <= __int_as_float(0x7f800000)); |
---|
3317 | } |
---|
3318 | |
---|
3319 | extern __attribute__((weak)) float __cuda_nextafterf(float a, float b); float __cuda_nextafterf(float a, float b) |
---|
3320 | { |
---|
3321 | unsigned int ia; |
---|
3322 | unsigned int ib; |
---|
3323 | ia = __float_as_int(a); |
---|
3324 | ib = __float_as_int(b); |
---|
3325 | |
---|
3326 | |
---|
3327 | |
---|
3328 | |
---|
3329 | if (__cuda___isnanf(a) || __cuda___isnanf(b)) return a + b; |
---|
3330 | if (__int_as_float (ia | ib) == 0.0f) return b; |
---|
3331 | |
---|
3332 | |
---|
3333 | |
---|
3334 | |
---|
3335 | |
---|
3336 | if (__int_as_float(ia) == 0.0f) { |
---|
3337 | return __cuda_copysignf(__int_as_float(0x00000001), b); |
---|
3338 | } |
---|
3339 | |
---|
3340 | if ((a < b) && (a < 0.0f)) ia--; |
---|
3341 | if ((a < b) && (a > 0.0f)) ia++; |
---|
3342 | if ((a > b) && (a < 0.0f)) ia++; |
---|
3343 | if ((a > b) && (a > 0.0f)) ia--; |
---|
3344 | a = __int_as_float(ia); |
---|
3345 | |
---|
3346 | |
---|
3347 | |
---|
3348 | |
---|
3349 | |
---|
3350 | return a; |
---|
3351 | } |
---|
3352 | |
---|
3353 | extern __attribute__((weak)) float __cuda_nanf(const char *tagp); float __cuda_nanf(const char *tagp) |
---|
3354 | { |
---|
3355 | |
---|
3356 | return __int_as_float(0x7fffffff); |
---|
3357 | } |
---|
3358 | |
---|
3359 | |
---|
3360 | extern __attribute__((weak)) float __internal_atanhf_kernel(float a_1, float a_2); float __internal_atanhf_kernel(float a_1, float a_2) |
---|
3361 | { |
---|
3362 | float a, a2, t; |
---|
3363 | |
---|
3364 | a = a_1 + a_2; |
---|
3365 | a2 = a * a; |
---|
3366 | t = 1.566305595598990E-001f/64.0f; |
---|
3367 | t = t * a2 + 1.995081856004762E-001f/16.0f; |
---|
3368 | t = t * a2 + 3.333382699617026E-001f/4.0f; |
---|
3369 | t = t * a2; |
---|
3370 | t = t * a + a_2; |
---|
3371 | t = t + a_1; |
---|
3372 | return t; |
---|
3373 | } |
---|
3374 | |
---|
3375 | |
---|
3376 | |
---|
3377 | |
---|
3378 | extern __attribute__((weak)) float __internal_atanf_kernel(float a); float __internal_atanf_kernel(float a) |
---|
3379 | { |
---|
3380 | float t4, t0, t1; |
---|
3381 | |
---|
3382 | t4 = a * a; |
---|
3383 | t0 = - 5.674867153f; |
---|
3384 | t0 = t4 * - 0.823362947f + t0; |
---|
3385 | t0 = t0 * t4 - 6.565555096f; |
---|
3386 | t0 = t0 * t4; |
---|
3387 | t0 = t0 * a; |
---|
3388 | t1 = t4 + 11.33538818f; |
---|
3389 | t1 = t1 * t4 + 28.84246826f; |
---|
3390 | t1 = t1 * t4 + 19.69667053f; |
---|
3391 | t1 = 1.0f / t1; |
---|
3392 | a = t0 * t1 + a; |
---|
3393 | return a; |
---|
3394 | } |
---|
3395 | |
---|
3396 | |
---|
3397 | extern __attribute__((weak)) float __internal_tan_kernel(float a); float __internal_tan_kernel(float a) |
---|
3398 | { |
---|
3399 | float a2, s, t; |
---|
3400 | |
---|
3401 | a2 = a * a; |
---|
3402 | t = 4.114678393115178E-003f * a2 - 8.231194034909670E-001f; |
---|
3403 | s = a2 - 2.469348886157666E+000f; |
---|
3404 | s = 1.0f / s; |
---|
3405 | t = t * s; |
---|
3406 | t = t * a2; |
---|
3407 | t = t * a + a; |
---|
3408 | return t; |
---|
3409 | } |
---|
3410 | |
---|
3411 | extern __attribute__((weak)) float __internal_accurate_logf(float a); float __internal_accurate_logf(float a) |
---|
3412 | { |
---|
3413 | float t; |
---|
3414 | float z; |
---|
3415 | float m; |
---|
3416 | int ia, e; |
---|
3417 | ia = __float_as_int(a); |
---|
3418 | |
---|
3419 | if ((ia < 0x00800000) || (ia > 0x7f7fffff)) { |
---|
3420 | return __logf(a); |
---|
3421 | } |
---|
3422 | |
---|
3423 | m = __int_as_float((ia & 0x807fffff) | 0x3f800000); |
---|
3424 | e = ((unsigned)ia >> 23) - 127; |
---|
3425 | if (m > 1.414213562f) { |
---|
3426 | m = m * 0.5f; |
---|
3427 | e = e + 1; |
---|
3428 | } |
---|
3429 | t = m - 1.0f; |
---|
3430 | z = m + 1.0f; |
---|
3431 | z = t / z; |
---|
3432 | z = -t * z; |
---|
3433 | z = __internal_atanhf_kernel(t, z); |
---|
3434 | z = (float)e * 0.693147181f + z; |
---|
3435 | return z; |
---|
3436 | } |
---|
3437 | |
---|
3438 | extern __attribute__((weak)) float __internal_accurate_log2f(float a); float __internal_accurate_log2f(float a) |
---|
3439 | { |
---|
3440 | return 1.442695041f * __internal_accurate_logf(a); |
---|
3441 | } |
---|
3442 | |
---|
3443 | |
---|
3444 | static unsigned int __cudart_i2opi_f [] = { |
---|
3445 | 0x3c439041, |
---|
3446 | 0xdb629599, |
---|
3447 | 0xf534ddc0, |
---|
3448 | 0xfc2757d1, |
---|
3449 | 0x4e441529, |
---|
3450 | 0xa2f9836e, |
---|
3451 | }; |
---|
3452 | |
---|
3453 | |
---|
3454 | extern __attribute__((weak)) float __internal_trig_reduction_kernel(float a, int *quadrant); float __internal_trig_reduction_kernel(float a, int *quadrant) |
---|
3455 | { |
---|
3456 | float j; |
---|
3457 | int q; |
---|
3458 | if (__cuda_fabsf(a) > 48039.0f) { |
---|
3459 | |
---|
3460 | unsigned int ia = __float_as_int(a); |
---|
3461 | unsigned int s = ia & 0x80000000; |
---|
3462 | unsigned int result[7]; |
---|
3463 | unsigned int phi, plo; |
---|
3464 | unsigned int hi, lo; |
---|
3465 | unsigned int e; |
---|
3466 | int idx; |
---|
3467 | e = ((ia >> 23) & 0xff) - 128; |
---|
3468 | ia = (ia << 8) | 0x80000000; |
---|
3469 | |
---|
3470 | idx = 4 - (e >> 5); |
---|
3471 | hi = 0; |
---|
3472 | |
---|
3473 | |
---|
3474 | |
---|
3475 | for (q = 0; q < 6; q++) { |
---|
3476 | plo = __cudart_i2opi_f[q] * ia; |
---|
3477 | phi = __umulhi (__cudart_i2opi_f[q], ia); |
---|
3478 | lo = hi + plo; |
---|
3479 | hi = phi + (lo < plo); |
---|
3480 | result[q] = lo; |
---|
3481 | } |
---|
3482 | result[q] = hi; |
---|
3483 | e = e & 31; |
---|
3484 | |
---|
3485 | |
---|
3486 | |
---|
3487 | hi = result[idx+2]; |
---|
3488 | lo = result[idx+1]; |
---|
3489 | if (e) { |
---|
3490 | q = 32 - e; |
---|
3491 | hi = (hi << e) | (lo >> q); |
---|
3492 | lo = (lo << e) | (result[idx] >> q); |
---|
3493 | } |
---|
3494 | q = hi >> 30; |
---|
3495 | |
---|
3496 | hi = (hi << 2) | (lo >> 30); |
---|
3497 | lo = (lo << 2); |
---|
3498 | e = (hi + (lo > 0)) > 0x80000000; |
---|
3499 | q += e; |
---|
3500 | if (s) q = -q; |
---|
3501 | if (e) { |
---|
3502 | unsigned int t; |
---|
3503 | hi = ~hi; |
---|
3504 | lo = -(int)lo; |
---|
3505 | t = (lo == 0); |
---|
3506 | hi += t; |
---|
3507 | s = s ^ 0x80000000; |
---|
3508 | } |
---|
3509 | *quadrant = q; |
---|
3510 | |
---|
3511 | e = 0; |
---|
3512 | while ((int)hi > 0) { |
---|
3513 | hi = (hi << 1) | (lo >> 31); |
---|
3514 | lo = (lo << 1); |
---|
3515 | e--; |
---|
3516 | } |
---|
3517 | lo = hi * 0xc90fdaa2; |
---|
3518 | hi = __umulhi(hi, 0xc90fdaa2); |
---|
3519 | if ((int)hi > 0) { |
---|
3520 | hi = (hi << 1) | (lo >> 31); |
---|
3521 | lo = (lo << 1); |
---|
3522 | e--; |
---|
3523 | } |
---|
3524 | hi = hi + (lo > 0); |
---|
3525 | ia = s | (((e + 126) << 23) + (hi >> 8) + ((hi << 24) >= 0x80000000)); |
---|
3526 | return __int_as_float(ia); |
---|
3527 | } |
---|
3528 | q = __float2int_rn(a * 0.636619772f); |
---|
3529 | j = (float)q; |
---|
3530 | a = a - j * 1.5703125000000000e+000f; |
---|
3531 | a = a - j * 4.8351287841796875e-004f; |
---|
3532 | a = a - j * 3.1385570764541626e-007f; |
---|
3533 | a = a - j * 6.0771005065061922e-011f; |
---|
3534 | *quadrant = q; |
---|
3535 | return a; |
---|
3536 | } |
---|
3537 | # 1405 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
3538 | extern __attribute__((weak)) float __internal_expf_kernel(float a, float scale); float __internal_expf_kernel(float a, float scale) |
---|
3539 | { |
---|
3540 | float j, z; |
---|
3541 | |
---|
3542 | j = __cuda_truncf(a * 1.442695041f); |
---|
3543 | z = a - j * 0.6931457519f; |
---|
3544 | z = z - j * 1.4286067653e-6f; |
---|
3545 | z = z * 1.442695041f; |
---|
3546 | z = __cuda_exp2f(z) * __cuda_exp2f(j + scale); |
---|
3547 | return z; |
---|
3548 | } |
---|
3549 | |
---|
3550 | extern __attribute__((weak)) float __internal_accurate_expf(float a); float __internal_accurate_expf(float a) |
---|
3551 | { |
---|
3552 | float z; |
---|
3553 | z = __internal_expf_kernel(a, 0.0f); |
---|
3554 | if (a < -105.0f) z = 0.0f; |
---|
3555 | if (a > 105.0f) z = __int_as_float(0x7f800000); |
---|
3556 | return z; |
---|
3557 | } |
---|
3558 | |
---|
3559 | extern __attribute__((weak)) float __internal_accurate_exp10f(float a); float __internal_accurate_exp10f(float a) |
---|
3560 | { |
---|
3561 | float j, z; |
---|
3562 | j = __cuda_truncf(a * 3.321928094f); |
---|
3563 | z = a - j * 3.0102920532226563e-001f; |
---|
3564 | z = z - j * 7.9034171557301747e-007f; |
---|
3565 | z = z * 3.321928094f; |
---|
3566 | z = __cuda_exp2f(z) * __cuda_exp2f(j); |
---|
3567 | if (a < -46.0f) z = 0.0f; |
---|
3568 | if (a > 46.0f) z = __int_as_float(0x7f800000); |
---|
3569 | return z; |
---|
3570 | } |
---|
3571 | |
---|
3572 | extern __attribute__((weak)) float __internal_lgammaf_pos(float a); float __internal_lgammaf_pos(float a) |
---|
3573 | { |
---|
3574 | float sum; |
---|
3575 | float s, t; |
---|
3576 | |
---|
3577 | if (__cuda___isinff(a)) { |
---|
3578 | return a; |
---|
3579 | } |
---|
3580 | if (a >= 3.0f) { |
---|
3581 | if (a >= 7.8f) { |
---|
3582 | |
---|
3583 | |
---|
3584 | |
---|
3585 | s = 1.0f / a; |
---|
3586 | t = s * s; |
---|
3587 | sum = 0.77783067e-3f; |
---|
3588 | sum = sum * t - 0.2777655457e-2f; |
---|
3589 | sum = sum * t + 0.83333273853e-1f; |
---|
3590 | sum = sum * s + 0.918938533204672f; |
---|
3591 | s = 0.5f * __internal_accurate_logf(a); |
---|
3592 | t = a - 0.5f; |
---|
3593 | s = s * t; |
---|
3594 | t = s - a; |
---|
3595 | s = s + sum; |
---|
3596 | t = t + s; |
---|
3597 | return t; |
---|
3598 | } else { |
---|
3599 | a = a - 3.0f; |
---|
3600 | s = - 7.488903254816711E+002f; |
---|
3601 | s = s * a - 1.234974215949363E+004f; |
---|
3602 | s = s * a - 4.106137688064877E+004f; |
---|
3603 | s = s * a - 4.831066242492429E+004f; |
---|
3604 | s = s * a - 1.430333998207429E+005f; |
---|
3605 | t = a - 2.592509840117874E+002f; |
---|
3606 | t = t * a - 1.077717972228532E+004f; |
---|
3607 | t = t * a - 9.268505031444956E+004f; |
---|
3608 | t = t * a - 2.063535768623558E+005f; |
---|
3609 | t = s / t; |
---|
3610 | t = t + a; |
---|
3611 | return t; |
---|
3612 | } |
---|
3613 | } else if (a >= 1.5f) { |
---|
3614 | a = a - 2.0f; |
---|
3615 | t = + 4.959849168282574E-005f; |
---|
3616 | t = t * a - 2.208948403848352E-004f; |
---|
3617 | t = t * a + 5.413142447864599E-004f; |
---|
3618 | t = t * a - 1.204516976842832E-003f; |
---|
3619 | t = t * a + 2.884251838546602E-003f; |
---|
3620 | t = t * a - 7.382757963931180E-003f; |
---|
3621 | t = t * a + 2.058131963026755E-002f; |
---|
3622 | t = t * a - 6.735248600734503E-002f; |
---|
3623 | t = t * a + 3.224670187176319E-001f; |
---|
3624 | t = t * a + 4.227843368636472E-001f; |
---|
3625 | t = t * a; |
---|
3626 | return t; |
---|
3627 | } else if (a >= 0.7f) { |
---|
3628 | a = 1.0f - a; |
---|
3629 | t = + 4.588266515364258E-002f; |
---|
3630 | t = t * a + 1.037396712740616E-001f; |
---|
3631 | t = t * a + 1.228036339653591E-001f; |
---|
3632 | t = t * a + 1.275242157462838E-001f; |
---|
3633 | t = t * a + 1.432166835245778E-001f; |
---|
3634 | t = t * a + 1.693435824224152E-001f; |
---|
3635 | t = t * a + 2.074079329483975E-001f; |
---|
3636 | t = t * a + 2.705875136435339E-001f; |
---|
3637 | t = t * a + 4.006854436743395E-001f; |
---|
3638 | t = t * a + 8.224669796332661E-001f; |
---|
3639 | t = t * a + 5.772156651487230E-001f; |
---|
3640 | t = t * a; |
---|
3641 | return t; |
---|
3642 | } else { |
---|
3643 | t = + 3.587515669447039E-003f; |
---|
3644 | t = t * a - 5.471285428060787E-003f; |
---|
3645 | t = t * a - 4.462712795343244E-002f; |
---|
3646 | t = t * a + 1.673177015593242E-001f; |
---|
3647 | t = t * a - 4.213597883575600E-002f; |
---|
3648 | t = t * a - 6.558672843439567E-001f; |
---|
3649 | t = t * a + 5.772153712885004E-001f; |
---|
3650 | t = t * a; |
---|
3651 | t = t * a + a; |
---|
3652 | return -__internal_accurate_logf(t); |
---|
3653 | } |
---|
3654 | } |
---|
3655 | |
---|
3656 | |
---|
3657 | extern __attribute__((weak)) float __internal_sin_kernel(float x); float __internal_sin_kernel(float x) |
---|
3658 | { |
---|
3659 | float x2, z; |
---|
3660 | |
---|
3661 | x2 = x * x; |
---|
3662 | z = - 1.95152959e-4f; |
---|
3663 | z = z * x2 + 8.33216087e-3f; |
---|
3664 | z = z * x2 - 1.66666546e-1f; |
---|
3665 | z = z * x2; |
---|
3666 | z = z * x + x; |
---|
3667 | |
---|
3668 | return z; |
---|
3669 | } |
---|
3670 | |
---|
3671 | |
---|
3672 | extern __attribute__((weak)) float __internal_cos_kernel(float x); float __internal_cos_kernel(float x) |
---|
3673 | { |
---|
3674 | float x2, z; |
---|
3675 | |
---|
3676 | x2 = x * x; |
---|
3677 | z = 2.44331571e-5f; |
---|
3678 | z = z * x2 - 1.38873163e-3f; |
---|
3679 | z = z * x2 + 4.16666457e-2f; |
---|
3680 | z = z * x2 - 5.00000000e-1f; |
---|
3681 | z = z * x2 + 1.00000000e+0f; |
---|
3682 | return z; |
---|
3683 | } |
---|
3684 | |
---|
3685 | extern __attribute__((weak)) float __internal_accurate_sinf(float a); float __internal_accurate_sinf(float a) |
---|
3686 | { |
---|
3687 | float z; |
---|
3688 | int i; |
---|
3689 | |
---|
3690 | if (__cuda___isinff(a)) { |
---|
3691 | return __int_as_float(0x7fffffff); |
---|
3692 | } |
---|
3693 | if (a == 0.0f) { |
---|
3694 | return a; |
---|
3695 | } |
---|
3696 | z = __internal_trig_reduction_kernel(a, &i); |
---|
3697 | |
---|
3698 | if (i & 1) { |
---|
3699 | z = __internal_cos_kernel(z); |
---|
3700 | } else { |
---|
3701 | z = __internal_sin_kernel(z); |
---|
3702 | } |
---|
3703 | if (i & 2) { |
---|
3704 | z = -z; |
---|
3705 | } |
---|
3706 | return z; |
---|
3707 | } |
---|
3708 | |
---|
3709 | |
---|
3710 | |
---|
3711 | |
---|
3712 | |
---|
3713 | |
---|
3714 | |
---|
3715 | extern __attribute__((weak)) float __cuda_rintf(float a); float __cuda_rintf(float a) |
---|
3716 | { |
---|
3717 | return __cuda_nearbyintf(a); |
---|
3718 | } |
---|
3719 | |
---|
3720 | extern __attribute__((weak)) float __cuda_sinf(float a); float __cuda_sinf(float a) |
---|
3721 | { |
---|
3722 | |
---|
3723 | |
---|
3724 | |
---|
3725 | return __internal_accurate_sinf(a); |
---|
3726 | |
---|
3727 | } |
---|
3728 | |
---|
3729 | extern __attribute__((weak)) float __cuda_cosf(float a); float __cuda_cosf(float a) |
---|
3730 | { |
---|
3731 | |
---|
3732 | |
---|
3733 | |
---|
3734 | float z; |
---|
3735 | int i; |
---|
3736 | |
---|
3737 | if (__cuda___isinff(a)) { |
---|
3738 | return __int_as_float(0x7fffffff); |
---|
3739 | } |
---|
3740 | z = __internal_trig_reduction_kernel(a, &i); |
---|
3741 | |
---|
3742 | i++; |
---|
3743 | if (i & 1) { |
---|
3744 | z = __internal_cos_kernel(z); |
---|
3745 | } else { |
---|
3746 | z = __internal_sin_kernel(z); |
---|
3747 | } |
---|
3748 | if (i & 2) { |
---|
3749 | z = -z; |
---|
3750 | } |
---|
3751 | return z; |
---|
3752 | |
---|
3753 | } |
---|
3754 | |
---|
3755 | extern __attribute__((weak)) float __cuda_tanf(float a); float __cuda_tanf(float a) |
---|
3756 | { |
---|
3757 | |
---|
3758 | |
---|
3759 | |
---|
3760 | float z; |
---|
3761 | int i; |
---|
3762 | |
---|
3763 | if (__cuda___isinff(a)) { |
---|
3764 | return __int_as_float(0x7fffffff); |
---|
3765 | } |
---|
3766 | z = __internal_trig_reduction_kernel(a, &i); |
---|
3767 | |
---|
3768 | z = __internal_tan_kernel(z); |
---|
3769 | if (i & 1) { |
---|
3770 | z = -1.0f / z; |
---|
3771 | } |
---|
3772 | return z; |
---|
3773 | |
---|
3774 | } |
---|
3775 | |
---|
3776 | extern __attribute__((weak)) float __cuda_log2f(float a); float __cuda_log2f(float a) |
---|
3777 | { |
---|
3778 | |
---|
3779 | |
---|
3780 | |
---|
3781 | return __internal_accurate_log2f(a); |
---|
3782 | |
---|
3783 | } |
---|
3784 | |
---|
3785 | extern __attribute__((weak)) float __cuda_expf(float a); float __cuda_expf(float a) |
---|
3786 | { |
---|
3787 | |
---|
3788 | |
---|
3789 | |
---|
3790 | return __internal_accurate_expf(a); |
---|
3791 | |
---|
3792 | } |
---|
3793 | |
---|
3794 | extern __attribute__((weak)) float __cuda_exp10f(float a); float __cuda_exp10f(float a) |
---|
3795 | { |
---|
3796 | |
---|
3797 | |
---|
3798 | |
---|
3799 | return __internal_accurate_exp10f(a); |
---|
3800 | |
---|
3801 | } |
---|
3802 | |
---|
3803 | extern __attribute__((weak)) float __cuda_coshf(float a); float __cuda_coshf(float a) |
---|
3804 | { |
---|
3805 | float z; |
---|
3806 | |
---|
3807 | a = __cuda_fabsf(a); |
---|
3808 | z = __internal_expf_kernel(a, -2.0f); |
---|
3809 | z = 2.0f * z + 0.125f / z; |
---|
3810 | if (a >= 90.0f) { |
---|
3811 | z = __int_as_float(0x7f800000); |
---|
3812 | } |
---|
3813 | return z; |
---|
3814 | } |
---|
3815 | |
---|
3816 | extern __attribute__((weak)) float __cuda_sinhf(float a); float __cuda_sinhf(float a) |
---|
3817 | { |
---|
3818 | float s, z; |
---|
3819 | |
---|
3820 | s = a; |
---|
3821 | a = __cuda_fabsf(a); |
---|
3822 | if (a < 1.0f) { |
---|
3823 | float a2 = a * a; |
---|
3824 | |
---|
3825 | z = 2.816951222e-6f; |
---|
3826 | z = z * a2 + 1.983615978e-4f; |
---|
3827 | z = z * a2 + 8.333350058e-3f; |
---|
3828 | z = z * a2 + 1.666666650e-1f; |
---|
3829 | z = z * a2; |
---|
3830 | z = z * a + a; |
---|
3831 | } else { |
---|
3832 | z = __internal_expf_kernel(a, -2.0f); |
---|
3833 | z = 2.0f * z - 0.125f / z; |
---|
3834 | if (a >= 90.0f) { |
---|
3835 | z = __int_as_float(0x7f800000); |
---|
3836 | } |
---|
3837 | } |
---|
3838 | return __cuda_copysignf(z, s); |
---|
3839 | } |
---|
3840 | |
---|
3841 | extern __attribute__((weak)) float __cuda_tanhf(float a); float __cuda_tanhf(float a) |
---|
3842 | { |
---|
3843 | float t; |
---|
3844 | |
---|
3845 | t = __cuda_fabsf(a); |
---|
3846 | if (t < 0.55f) { |
---|
3847 | float z, z2; |
---|
3848 | z = t; |
---|
3849 | z2 = z * z; |
---|
3850 | t = 1.643758066599993e-2f; |
---|
3851 | t = t * z2 - 5.267181327760551e-2f; |
---|
3852 | t = t * z2 + 1.332072505223051e-1f; |
---|
3853 | t = t * z2 - 3.333294663641083e-1f; |
---|
3854 | t = t * z2; |
---|
3855 | t = t * z + z; |
---|
3856 | } |
---|
3857 | else if (t < 88.0f) { |
---|
3858 | t = 1.0f - 2.0f / (__internal_expf_kernel(2.0f * t, 0.0f) + 1.0f); |
---|
3859 | } |
---|
3860 | else if (t >= 88.0f) { |
---|
3861 | t = 1.0f; |
---|
3862 | } |
---|
3863 | return __cuda_copysignf(t, a); |
---|
3864 | } |
---|
3865 | |
---|
3866 | extern __attribute__((weak)) float __cuda_atan2f(float a, float b); float __cuda_atan2f(float a, float b) |
---|
3867 | { |
---|
3868 | float t0, t1, t3; |
---|
3869 | |
---|
3870 | |
---|
3871 | |
---|
3872 | t3 = __cuda_fabsf(b); |
---|
3873 | t1 = __cuda_fabsf(a); |
---|
3874 | |
---|
3875 | if (t3 == 0.0f && t1 == 0.0f) { |
---|
3876 | t3 = __cuda___signbitf(b) ? 3.141592654f : 0; |
---|
3877 | } else if (__cuda___isinff(t3) && __cuda___isinff(t1)) { |
---|
3878 | t3 = __cuda___signbitf(b) ? 2.356194490f : 0.785398163f; |
---|
3879 | } else { |
---|
3880 | |
---|
3881 | if (t3 < t1) { |
---|
3882 | t0 = t1; |
---|
3883 | t1 = t3; |
---|
3884 | } else { |
---|
3885 | t0 = t3; |
---|
3886 | t1 = t1; |
---|
3887 | } |
---|
3888 | t3 = __internal_accurate_fdividef(t1, t0); |
---|
3889 | t3 = __internal_atanf_kernel(t3); |
---|
3890 | |
---|
3891 | if (__cuda_fabsf(a) > __cuda_fabsf(b)) t3 = 1.570796327f - t3; |
---|
3892 | if (b < 0.0f) t3 = 3.141592654f - t3; |
---|
3893 | } |
---|
3894 | t3 = __cuda_copysignf(t3, a); |
---|
3895 | |
---|
3896 | return t3; |
---|
3897 | } |
---|
3898 | |
---|
3899 | extern __attribute__((weak)) float __cuda_atanf(float a); float __cuda_atanf(float a) |
---|
3900 | { |
---|
3901 | float t0, t1; |
---|
3902 | |
---|
3903 | |
---|
3904 | t0 = __cuda_fabsf(a); |
---|
3905 | t1 = t0; |
---|
3906 | if (t0 > 1.0f) { |
---|
3907 | t1 = 1.0f / t1; |
---|
3908 | } |
---|
3909 | |
---|
3910 | t1 = __internal_atanf_kernel(t1); |
---|
3911 | |
---|
3912 | if (t0 > 1.0f) { |
---|
3913 | t1 = 1.570796327f - t1; |
---|
3914 | } |
---|
3915 | return __cuda_copysignf(t1, a); |
---|
3916 | } |
---|
3917 | |
---|
3918 | |
---|
3919 | extern __attribute__((weak)) float __internal_asinf_kernel(float a); float __internal_asinf_kernel(float a) |
---|
3920 | { |
---|
3921 | float t2, t3, t4; |
---|
3922 | |
---|
3923 | t2 = a * a; |
---|
3924 | t3 = - 0.501162291f; |
---|
3925 | t3 = t3 * t2 + 0.915201485f; |
---|
3926 | t3 = t3 * t2; |
---|
3927 | t3 = t3 * a; |
---|
3928 | t4 = t2 - 5.478654385f; |
---|
3929 | t4 = t4 * t2 + 5.491230488f; |
---|
3930 | t4 = 1.0f / t4; |
---|
3931 | a = t3 * t4 + a; |
---|
3932 | return a; |
---|
3933 | } |
---|
3934 | |
---|
3935 | extern __attribute__((weak)) float __cuda_asinf(float a); float __cuda_asinf(float a) |
---|
3936 | { |
---|
3937 | float t0, t1, t2; |
---|
3938 | |
---|
3939 | t0 = __cuda_fabsf(a); |
---|
3940 | t2 = 1.0f - t0; |
---|
3941 | t2 = 0.5f * t2; |
---|
3942 | t2 = __cuda_sqrtf(t2); |
---|
3943 | t1 = t0 > 0.575f ? t2 : t0; |
---|
3944 | t1 = __internal_asinf_kernel(t1); |
---|
3945 | t2 = -2.0f * t1 + 1.570796327f; |
---|
3946 | if (t0 > 0.575f) { |
---|
3947 | t1 = t2; |
---|
3948 | } |
---|
3949 | return __cuda_copysignf(t1, a); |
---|
3950 | } |
---|
3951 | |
---|
3952 | extern __attribute__((weak)) float __cuda_acosf(float a); float __cuda_acosf(float a) |
---|
3953 | { |
---|
3954 | float t0, t1, t2; |
---|
3955 | |
---|
3956 | t0 = __cuda_fabsf(a); |
---|
3957 | t2 = 1.0f - t0; |
---|
3958 | t2 = 0.5f * t2; |
---|
3959 | t2 = __cuda_sqrtf(t2); |
---|
3960 | t1 = t0 > 0.575f ? t2 : t0; |
---|
3961 | t1 = __internal_asinf_kernel(t1); |
---|
3962 | t1 = t0 > 0.575f ? 2.0f * t1 : 1.570796327f - t1; |
---|
3963 | if (__cuda___signbitf(a)) { |
---|
3964 | t1 = 3.141592654f - t1; |
---|
3965 | } |
---|
3966 | return t1; |
---|
3967 | } |
---|
3968 | |
---|
3969 | extern __attribute__((weak)) float __cuda_logf(float a); float __cuda_logf(float a) |
---|
3970 | { |
---|
3971 | |
---|
3972 | |
---|
3973 | |
---|
3974 | return __internal_accurate_logf(a); |
---|
3975 | |
---|
3976 | } |
---|
3977 | |
---|
3978 | extern __attribute__((weak)) float __cuda_log10f(float a); float __cuda_log10f(float a) |
---|
3979 | { |
---|
3980 | |
---|
3981 | |
---|
3982 | |
---|
3983 | return 0.434294482f * __internal_accurate_logf(a); |
---|
3984 | |
---|
3985 | } |
---|
3986 | |
---|
3987 | extern __attribute__((weak)) float __cuda_log1pf(float a); float __cuda_log1pf(float a) |
---|
3988 | { |
---|
3989 | float t; |
---|
3990 | |
---|
3991 | |
---|
3992 | |
---|
3993 | |
---|
3994 | if (a >= -0.394f && a <= 0.65f) { |
---|
3995 | |
---|
3996 | t = a + 2.0f; |
---|
3997 | t = a / t; |
---|
3998 | t = -a * t; |
---|
3999 | t = __internal_atanhf_kernel (a, t); |
---|
4000 | } else { |
---|
4001 | t = __internal_accurate_logf (1.0f + a); |
---|
4002 | } |
---|
4003 | return t; |
---|
4004 | } |
---|
4005 | |
---|
4006 | extern __attribute__((weak)) float __cuda_acoshf(float a); float __cuda_acoshf(float a) |
---|
4007 | { |
---|
4008 | float s, t; |
---|
4009 | |
---|
4010 | t = a - 1.0f; |
---|
4011 | if (__cuda_fabsf(t) > 8388608.0f) { |
---|
4012 | |
---|
4013 | return 0.693147181f + __internal_accurate_logf(a); |
---|
4014 | } else { |
---|
4015 | s = a + 1.0f; |
---|
4016 | t = t + __cuda_sqrtf(s * t); |
---|
4017 | return __cuda_log1pf(t); |
---|
4018 | } |
---|
4019 | } |
---|
4020 | |
---|
4021 | extern __attribute__((weak)) float __cuda_asinhf(float a); float __cuda_asinhf(float a) |
---|
4022 | { |
---|
4023 | float fa, oofa, t; |
---|
4024 | |
---|
4025 | fa = __cuda_fabsf(a); |
---|
4026 | if (fa > 8.507059173e37f) { |
---|
4027 | t = 0.693147181f + __logf(fa); |
---|
4028 | } else { |
---|
4029 | oofa = 1.0f / fa; |
---|
4030 | t = fa + fa / (oofa + __cuda_sqrtf(1.0f + oofa * oofa)); |
---|
4031 | t = __cuda_log1pf(t); |
---|
4032 | } |
---|
4033 | return __cuda_copysignf(t, a); |
---|
4034 | } |
---|
4035 | |
---|
4036 | extern __attribute__((weak)) float __cuda_atanhf(float a); float __cuda_atanhf(float a) |
---|
4037 | { |
---|
4038 | float fa, t; |
---|
4039 | |
---|
4040 | fa = __cuda_fabsf(a); |
---|
4041 | t = (2.0f * fa) / (1.0f - fa); |
---|
4042 | t = 0.5f * __cuda_log1pf(t); |
---|
4043 | return __cuda_copysignf(t, a); |
---|
4044 | } |
---|
4045 | |
---|
4046 | extern __attribute__((weak)) float __cuda_expm1f(float a); float __cuda_expm1f(float a) |
---|
4047 | { |
---|
4048 | float t, z, j, u; |
---|
4049 | |
---|
4050 | t = __cuda_rintf (a * 1.442695041f); |
---|
4051 | z = a - t * 0.6931457519f; |
---|
4052 | z = z - t * 1.4286067653e-6f; |
---|
4053 | |
---|
4054 | if (__cuda_fabsf(a) < 0.41f) { |
---|
4055 | z = a; |
---|
4056 | t = 0.0f; |
---|
4057 | } |
---|
4058 | |
---|
4059 | j = t; |
---|
4060 | if (t == 128.0f) j = j - 1.0f; |
---|
4061 | |
---|
4062 | u = 1.38795078474044430E-003f; |
---|
4063 | u = u * z + 8.38241261853264930E-003f; |
---|
4064 | u = u * z + 4.16678317762833940E-002f; |
---|
4065 | u = u * z + 1.66663978874356580E-001f; |
---|
4066 | u = u * z + 4.99999940395997040E-001f; |
---|
4067 | u = u * z; |
---|
4068 | u = u * z + z; |
---|
4069 | if (a == 0.0f) u = a; |
---|
4070 | |
---|
4071 | z = __cuda_exp2f (j); |
---|
4072 | a = z - 1.0f; |
---|
4073 | if (a != 0.0f) u = u * z + a; |
---|
4074 | if (t == 128.0f) u = u + u; |
---|
4075 | |
---|
4076 | if (j > 128.0f) u = __int_as_float(0x7f800000); |
---|
4077 | if (j < -25.0f) u = -1.0f; |
---|
4078 | return u; |
---|
4079 | } |
---|
4080 | |
---|
4081 | extern __attribute__((weak)) float __cuda_hypotf(float a, float b); float __cuda_hypotf(float a, float b) |
---|
4082 | { |
---|
4083 | float v, w, t; |
---|
4084 | |
---|
4085 | a = __cuda_fabsf(a); |
---|
4086 | b = __cuda_fabsf(b); |
---|
4087 | |
---|
4088 | if (a > b) { |
---|
4089 | v = a; |
---|
4090 | w = b; |
---|
4091 | } else { |
---|
4092 | v = b; |
---|
4093 | w = a; |
---|
4094 | } |
---|
4095 | t = __internal_accurate_fdividef(w, v); |
---|
4096 | t = 1.0f + t * t; |
---|
4097 | t = v * __cuda_sqrtf(t); |
---|
4098 | if (v == 0.0f) { |
---|
4099 | t = v + w; |
---|
4100 | } |
---|
4101 | if ((v == __int_as_float(0x7f800000)) || (w == __int_as_float(0x7f800000))) { |
---|
4102 | t = __int_as_float(0x7f800000); |
---|
4103 | } |
---|
4104 | return t; |
---|
4105 | } |
---|
4106 | |
---|
4107 | extern __attribute__((weak)) float __cuda_cbrtf(float a); float __cuda_cbrtf(float a) |
---|
4108 | { |
---|
4109 | float s, t; |
---|
4110 | if (a == 0.0f || __cuda___isinff(a)) { |
---|
4111 | return a; |
---|
4112 | } |
---|
4113 | s = __cuda_fabsf(a); |
---|
4114 | t = __cuda_exp2f(0.333333333f * __log2f(s)); |
---|
4115 | t = t - (t - (s / (t * t))) * 0.333333333f; |
---|
4116 | if (__cuda___signbitf(a)) { |
---|
4117 | t = -t; |
---|
4118 | } |
---|
4119 | return t; |
---|
4120 | } |
---|
4121 | |
---|
4122 | extern __attribute__((weak)) float __cuda_erff(float a); float __cuda_erff(float a) |
---|
4123 | { |
---|
4124 | float t, r, q; |
---|
4125 | |
---|
4126 | t = __cuda_fabsf(a); |
---|
4127 | if (t < 1.0f) { |
---|
4128 | t = t * t; |
---|
4129 | r = -5.58510127926029810E-004f; |
---|
4130 | r = r * t + 4.90688891415893070E-003f; |
---|
4131 | r = r * t - 2.67027980930150640E-002f; |
---|
4132 | r = r * t + 1.12799056505903940E-001f; |
---|
4133 | r = r * t - 3.76122956138427440E-001f; |
---|
4134 | r = r * t + 1.12837911712623450E+000f; |
---|
4135 | a = a * r; |
---|
4136 | } else if (t <= __int_as_float(0x7f800000)) { |
---|
4137 | |
---|
4138 | |
---|
4139 | |
---|
4140 | q = 0.3275911f * t + 1.0f; |
---|
4141 | q = 1.0f / q; |
---|
4142 | r = 1.061405429f; |
---|
4143 | r = r * q - 1.453152027f; |
---|
4144 | r = r * q + 1.421413741f; |
---|
4145 | r = r * q - 0.284496736f; |
---|
4146 | r = r * q + 0.254829592f; |
---|
4147 | r = r * q; |
---|
4148 | q = __internal_expf_kernel(-a * a, 0.0f); |
---|
4149 | r = 1.0f - q * r; |
---|
4150 | if (t >= 5.5f) { |
---|
4151 | r = 1.0f; |
---|
4152 | } |
---|
4153 | a = __int_as_float (__float_as_int(r) | (__float_as_int(a) & 0x80000000)); |
---|
4154 | } |
---|
4155 | return a; |
---|
4156 | } |
---|
4157 | |
---|
4158 | extern __attribute__((weak)) float __cuda_erfcf(float a); float __cuda_erfcf(float a) |
---|
4159 | { |
---|
4160 | if (a <= 0.55f) { |
---|
4161 | return 1.0f - __cuda_erff(a); |
---|
4162 | } else if (a > 10.0f) { |
---|
4163 | return 0.0f; |
---|
4164 | } else { |
---|
4165 | float p; |
---|
4166 | float q; |
---|
4167 | float h; |
---|
4168 | float l; |
---|
4169 | |
---|
4170 | |
---|
4171 | |
---|
4172 | |
---|
4173 | p = + 4.014893410762552E-006f; |
---|
4174 | p = p * a + 5.640401259462436E-001f; |
---|
4175 | p = p * a + 2.626649872281140E+000f; |
---|
4176 | p = p * a + 5.486372652389673E+000f; |
---|
4177 | p = p * a + 5.250714831459401E+000f; |
---|
4178 | q = a + 4.651376250488319E+000f; |
---|
4179 | q = q * a + 1.026302828878470E+001f; |
---|
4180 | q = q * a + 1.140762166021288E+001f; |
---|
4181 | q = q * a + 5.251211619089947E+000f; |
---|
4182 | |
---|
4183 | h = 1.0f / q; |
---|
4184 | q = 2.0f * h - q * h * h; |
---|
4185 | p = p * q; |
---|
4186 | |
---|
4187 | h = __int_as_float(__float_as_int(a) & 0xfffff000); |
---|
4188 | l = a - h; |
---|
4189 | q = -h * h; |
---|
4190 | q = __internal_expf_kernel(q, 0.0f); |
---|
4191 | if (l != 0.0f) { |
---|
4192 | a = a + h; |
---|
4193 | l = l * a; |
---|
4194 | h = __internal_expf_kernel(-l, 0.0f); |
---|
4195 | q = q * h; |
---|
4196 | } |
---|
4197 | p = p * q; |
---|
4198 | return p; |
---|
4199 | } |
---|
4200 | } |
---|
4201 | |
---|
4202 | extern __attribute__((weak)) float __cuda_lgammaf(float a); float __cuda_lgammaf(float a) |
---|
4203 | { |
---|
4204 | float t; |
---|
4205 | float i; |
---|
4206 | int quot; |
---|
4207 | t = __internal_lgammaf_pos(__cuda_fabsf(a)); |
---|
4208 | if (a >= 0.0f) return t; |
---|
4209 | a = __cuda_fabsf(a); |
---|
4210 | i = __cuda_floorf(a); |
---|
4211 | if (a == i) return __int_as_float(0x7f800000); |
---|
4212 | if (a < 1e-19f) return -__internal_accurate_logf(a); |
---|
4213 | i = __cuda_rintf (2.0f * a); |
---|
4214 | quot = (int)i; |
---|
4215 | i = a - 0.5f * i; |
---|
4216 | i = i * 3.141592654f; |
---|
4217 | if (quot & 1) { |
---|
4218 | i = __internal_cos_kernel(i); |
---|
4219 | } else { |
---|
4220 | i = __internal_sin_kernel(i); |
---|
4221 | } |
---|
4222 | i = __cuda_fabsf(i); |
---|
4223 | t = 1.144729886f - __internal_accurate_logf(i * a) - t; |
---|
4224 | return t; |
---|
4225 | } |
---|
4226 | |
---|
4227 | extern __attribute__((weak)) float __cuda_ldexpf(float a, int b); float __cuda_ldexpf(float a, int b) |
---|
4228 | { |
---|
4229 | float fa = __cuda_fabsf(a); |
---|
4230 | |
---|
4231 | if (fa == 0.0f || __cuda___isinff(fa) || b == 0) { |
---|
4232 | return a; |
---|
4233 | } |
---|
4234 | else if (__cuda_abs(b) < 126) { |
---|
4235 | return a * __cuda_exp2f((float)b); |
---|
4236 | } |
---|
4237 | else if (__cuda_abs(b) < 252) { |
---|
4238 | int bhalf = b / 2; |
---|
4239 | return a * __cuda_exp2f((float)bhalf) * __cuda_exp2f((float)(b - bhalf)); |
---|
4240 | } |
---|
4241 | else { |
---|
4242 | int bquarter = b / 4; |
---|
4243 | float t = __cuda_exp2f((float)bquarter); |
---|
4244 | return a * t * t * t * __cuda_exp2f((float)(b - 3 * bquarter)); |
---|
4245 | } |
---|
4246 | } |
---|
4247 | |
---|
4248 | extern __attribute__((weak)) float __cuda_scalbnf(float a, int b); float __cuda_scalbnf(float a, int b) |
---|
4249 | { |
---|
4250 | |
---|
4251 | return __cuda_ldexpf(a, b); |
---|
4252 | } |
---|
4253 | |
---|
4254 | extern __attribute__((weak)) float __cuda_scalblnf(float a, long int b); float __cuda_scalblnf(float a, long int b) |
---|
4255 | { |
---|
4256 | int t; |
---|
4257 | if (b > 2147483647L) { |
---|
4258 | t = 2147483647; |
---|
4259 | } else if (b < (-2147483647 - 1)) { |
---|
4260 | t = (-2147483647 - 1); |
---|
4261 | } else { |
---|
4262 | t = (int)b; |
---|
4263 | } |
---|
4264 | return __cuda_scalbnf(a, t); |
---|
4265 | } |
---|
4266 | |
---|
4267 | extern __attribute__((weak)) float __cuda_frexpf(float a, int *b); float __cuda_frexpf(float a, int *b) |
---|
4268 | { |
---|
4269 | float fa = __cuda_fabsf(a); |
---|
4270 | unsigned int expo; |
---|
4271 | unsigned int denorm; |
---|
4272 | |
---|
4273 | if (fa < 1.175494351e-38f) { |
---|
4274 | a *= 16777216.0f; |
---|
4275 | denorm = 24; |
---|
4276 | } else { |
---|
4277 | denorm = 0; |
---|
4278 | } |
---|
4279 | expo = ((__float_as_int(a) >> 23) & 0xff); |
---|
4280 | if ((fa == 0.0f) || (expo == 0xff)) { |
---|
4281 | expo = 0; |
---|
4282 | a = a + a; |
---|
4283 | } else { |
---|
4284 | expo = expo - denorm - 126; |
---|
4285 | a = __int_as_float(((__float_as_int(a) & 0x807fffff) | 0x3f000000)); |
---|
4286 | } |
---|
4287 | *b = expo; |
---|
4288 | return a; |
---|
4289 | } |
---|
4290 | |
---|
4291 | extern __attribute__((weak)) float __cuda_modff(float a, float *b); float __cuda_modff(float a, float *b) |
---|
4292 | { |
---|
4293 | float t; |
---|
4294 | if (__cuda___finitef(a)) { |
---|
4295 | t = __cuda_truncf(a); |
---|
4296 | *b = t; |
---|
4297 | t = a - t; |
---|
4298 | return __cuda_copysignf(t, a); |
---|
4299 | } else if (__cuda___isinff(a)) { |
---|
4300 | t = 0.0f; |
---|
4301 | *b = a; |
---|
4302 | return __cuda_copysignf(t, a); |
---|
4303 | } else { |
---|
4304 | *b = a; |
---|
4305 | return a; |
---|
4306 | } |
---|
4307 | } |
---|
4308 | |
---|
4309 | extern __attribute__((weak)) float __cuda_fmodf(float a, float b); float __cuda_fmodf(float a, float b) |
---|
4310 | { |
---|
4311 | float orig_a; |
---|
4312 | |
---|
4313 | if (__cuda___isnanf(a) || __cuda___isnanf(b)) { |
---|
4314 | return a + b; |
---|
4315 | } |
---|
4316 | orig_a = a; |
---|
4317 | a = __cuda_fabsf(a); |
---|
4318 | b = __cuda_fabsf(b); |
---|
4319 | if (__cuda___isinff(a) || b == 0.0f) { |
---|
4320 | return __int_as_float(0x7fffffff); |
---|
4321 | } else if (a >= b) { |
---|
4322 | |
---|
4323 | |
---|
4324 | int expoa = (a < 1.175494351e-38f) ? |
---|
4325 | ((int)__log2f(a)) : (((__float_as_int(a) >> 23) & 0xff) - 127); |
---|
4326 | int expob = (b < 1.175494351e-38f) ? |
---|
4327 | ((int)__log2f(b)) : (((__float_as_int(b) >> 23) & 0xff) - 127); |
---|
4328 | int scale = expoa - expob; |
---|
4329 | float scaled_b = __cuda_ldexpf(b, scale); |
---|
4330 | if (scaled_b <= 0.5f * a) { |
---|
4331 | scaled_b *= 2.0f; |
---|
4332 | } |
---|
4333 | |
---|
4334 | |
---|
4335 | |
---|
4336 | |
---|
4337 | |
---|
4338 | |
---|
4339 | |
---|
4340 | while (scaled_b >= b) { |
---|
4341 | if (a >= scaled_b) { |
---|
4342 | a -= scaled_b; |
---|
4343 | } |
---|
4344 | scaled_b *= 0.5f; |
---|
4345 | } |
---|
4346 | return __cuda_copysignf(a, orig_a); |
---|
4347 | } else { |
---|
4348 | return orig_a; |
---|
4349 | } |
---|
4350 | } |
---|
4351 | |
---|
4352 | extern __attribute__((weak)) float __cuda_remainderf(float a, float b); float __cuda_remainderf(float a, float b) |
---|
4353 | { |
---|
4354 | float orig_a; |
---|
4355 | float twoa = 0.0f; |
---|
4356 | unsigned int quot0 = 0; |
---|
4357 | |
---|
4358 | if (__cuda___isnanf(a) || __cuda___isnanf(b)) { |
---|
4359 | return a + b; |
---|
4360 | } |
---|
4361 | orig_a = a; |
---|
4362 | a = __cuda_fabsf(a); |
---|
4363 | b = __cuda_fabsf(b); |
---|
4364 | if (__cuda___isinff(a) || (b == 0.0f)) { |
---|
4365 | return __int_as_float(0x7fffffff); |
---|
4366 | } else if (a >= b) { |
---|
4367 | |
---|
4368 | int expoa = (a < 1.175494351e-38f) ? |
---|
4369 | ((int)__log2f(a)) : (((__float_as_int(a) >> 23) & 0xff) - 127); |
---|
4370 | int expob = (b < 1.175494351e-38f) ? |
---|
4371 | ((int)__log2f(b)) : (((__float_as_int(b) >> 23) & 0xff) - 127); |
---|
4372 | int scale = expoa - expob; |
---|
4373 | float scaled_b = __cuda_ldexpf(b, scale); |
---|
4374 | if (scaled_b <= 0.5f * a) { |
---|
4375 | scaled_b *= 2.0f; |
---|
4376 | } |
---|
4377 | # 2255 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
4378 | while (scaled_b >= b) { |
---|
4379 | quot0 = 0; |
---|
4380 | if (a >= scaled_b) { |
---|
4381 | twoa = (2.0f * a - scaled_b) - scaled_b; |
---|
4382 | a -= scaled_b; |
---|
4383 | quot0 = 1; |
---|
4384 | } |
---|
4385 | scaled_b *= 0.5f; |
---|
4386 | } |
---|
4387 | } |
---|
4388 | |
---|
4389 | |
---|
4390 | twoa = 2.0f * a; |
---|
4391 | if ((twoa > b) || ((twoa == b) && quot0)) { |
---|
4392 | a -= b; |
---|
4393 | a = __cuda_copysignf (a, -1.0f); |
---|
4394 | } |
---|
4395 | # 2287 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
4396 | a = __int_as_float((__float_as_int(orig_a) & 0x80000000)^ |
---|
4397 | __float_as_int(a)); |
---|
4398 | return a; |
---|
4399 | } |
---|
4400 | |
---|
4401 | extern __attribute__((weak)) float __cuda_remquof(float a, float b, int* quo); float __cuda_remquof(float a, float b, int* quo) |
---|
4402 | { |
---|
4403 | float orig_a; |
---|
4404 | float twoa = 0.0f; |
---|
4405 | unsigned int quot = 0; |
---|
4406 | unsigned int sign; |
---|
4407 | |
---|
4408 | if (__cuda___isnanf(a) || __cuda___isnanf(b)) { |
---|
4409 | *quo = quot; |
---|
4410 | return a + b; |
---|
4411 | } |
---|
4412 | orig_a = a; |
---|
4413 | |
---|
4414 | sign = 0 - (__cuda___signbitf(a) != __cuda___signbitf(b)); |
---|
4415 | a = __cuda_fabsf(a); |
---|
4416 | b = __cuda_fabsf(b); |
---|
4417 | if (__cuda___isinff(a) || (b == 0.0f)) { |
---|
4418 | *quo = quot; |
---|
4419 | return __int_as_float(0x7fffffff); |
---|
4420 | } else if (a >= b) { |
---|
4421 | |
---|
4422 | |
---|
4423 | int expoa = (a < 1.175494351e-38f) ? |
---|
4424 | ((int)__log2f(a)) : (((__float_as_int(a) >> 23) & 0xff) - 127); |
---|
4425 | int expob = (b < 1.175494351e-38f) ? |
---|
4426 | ((int)__log2f(b)) : (((__float_as_int(b) >> 23) & 0xff) - 127); |
---|
4427 | int scale = expoa - expob; |
---|
4428 | float scaled_b = __cuda_ldexpf(b, scale); |
---|
4429 | if (scaled_b <= 0.5f * a) { |
---|
4430 | scaled_b *= 2.0f; |
---|
4431 | } |
---|
4432 | # 2340 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
4433 | while (scaled_b >= b) { |
---|
4434 | quot <<= 1; |
---|
4435 | if (a >= scaled_b) { |
---|
4436 | twoa = (2.0f * a - scaled_b) - scaled_b; |
---|
4437 | a -= scaled_b; |
---|
4438 | quot += 1; |
---|
4439 | } |
---|
4440 | scaled_b *= 0.5f; |
---|
4441 | } |
---|
4442 | } |
---|
4443 | |
---|
4444 | |
---|
4445 | twoa = 2.0f * a; |
---|
4446 | if ((twoa > b) || ((twoa == b) && (quot & 1))) { |
---|
4447 | quot++; |
---|
4448 | a -= b; |
---|
4449 | a = __cuda_copysignf (a, -1.0f); |
---|
4450 | } |
---|
4451 | # 2375 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
4452 | a = __int_as_float((__float_as_int(orig_a) & 0x80000000)^ |
---|
4453 | __float_as_int(a)); |
---|
4454 | quot = quot & (~((~0)<<3)); |
---|
4455 | quot = quot ^ sign; |
---|
4456 | quot = quot - sign; |
---|
4457 | *quo = quot; |
---|
4458 | return a; |
---|
4459 | } |
---|
4460 | |
---|
4461 | extern __attribute__((weak)) float __cuda_fmaf(float a, float b, float c); float __cuda_fmaf(float a, float b, float c) |
---|
4462 | { |
---|
4463 | unsigned int xx, yy, zz, ww; |
---|
4464 | unsigned int temp, s, u; |
---|
4465 | unsigned int expo_x, expo_y, expo_z; |
---|
4466 | |
---|
4467 | xx = __float_as_int(a); |
---|
4468 | yy = __float_as_int(b); |
---|
4469 | zz = __float_as_int(c); |
---|
4470 | # 2401 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
4471 | temp = 0xff; |
---|
4472 | expo_x = temp & (xx >> 23); |
---|
4473 | expo_x = expo_x - 1; |
---|
4474 | expo_y = temp & (yy >> 23); |
---|
4475 | expo_y = expo_y - 1; |
---|
4476 | expo_z = temp & (zz >> 23); |
---|
4477 | expo_z = expo_z - 1; |
---|
4478 | |
---|
4479 | if (!((expo_x <= 0xFD) && |
---|
4480 | (expo_y <= 0xFD) && |
---|
4481 | (expo_z <= 0xFD))) { |
---|
4482 | |
---|
4483 | |
---|
4484 | |
---|
4485 | |
---|
4486 | if ((yy << 1) > 0xff000000) { |
---|
4487 | return __int_as_float(0x7fffffff); |
---|
4488 | } |
---|
4489 | if ((zz << 1) > 0xff000000) { |
---|
4490 | return __int_as_float(0x7fffffff); |
---|
4491 | } |
---|
4492 | if ((xx << 1) > 0xff000000) { |
---|
4493 | return __int_as_float(0x7fffffff); |
---|
4494 | } |
---|
4495 | # 2436 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
4496 | if ((((xx << 1) == 0) && ((yy << 1) == 0xff000000)) || |
---|
4497 | (((yy << 1) == 0) && ((xx << 1) == 0xff000000))) { |
---|
4498 | return __int_as_float(0x7fffffff); |
---|
4499 | } |
---|
4500 | if ((zz << 1) == 0xff000000) { |
---|
4501 | if (((yy << 1) == 0xff000000) || ((xx << 1) == 0xff000000)) { |
---|
4502 | if ((int)(xx ^ yy ^ zz) < 0) { |
---|
4503 | return __int_as_float(0x7fffffff); |
---|
4504 | } |
---|
4505 | } |
---|
4506 | } |
---|
4507 | |
---|
4508 | |
---|
4509 | |
---|
4510 | |
---|
4511 | if ((xx << 1) == 0xff000000) { |
---|
4512 | xx = xx ^ (yy & 0x80000000); |
---|
4513 | return __int_as_float(xx); |
---|
4514 | } |
---|
4515 | if ((yy << 1) == 0xff000000) { |
---|
4516 | yy = yy ^ (xx & 0x80000000); |
---|
4517 | return __int_as_float(yy); |
---|
4518 | } |
---|
4519 | if ((zz << 1) == 0xff000000) { |
---|
4520 | return __int_as_float(zz); |
---|
4521 | } |
---|
4522 | |
---|
4523 | |
---|
4524 | |
---|
4525 | |
---|
4526 | |
---|
4527 | if (zz == 0x80000000) { |
---|
4528 | if (((xx << 1) == 0) || ((yy << 1) == 0)) { |
---|
4529 | if ((int)(xx ^ yy) < 0) { |
---|
4530 | return __int_as_float(zz); |
---|
4531 | } |
---|
4532 | } |
---|
4533 | } |
---|
4534 | |
---|
4535 | |
---|
4536 | |
---|
4537 | if (((zz << 1) == 0) && |
---|
4538 | (((xx << 1) == 0) || ((yy << 1) == 0))) { |
---|
4539 | zz &= 0x7fffffff; |
---|
4540 | return __int_as_float(zz); |
---|
4541 | } |
---|
4542 | |
---|
4543 | |
---|
4544 | |
---|
4545 | if (((xx << 1) == 0) || ((yy << 1) == 0)) { |
---|
4546 | return __int_as_float(zz); |
---|
4547 | } |
---|
4548 | |
---|
4549 | if (expo_x == (unsigned int)-1) { |
---|
4550 | temp = xx & 0x80000000; |
---|
4551 | xx = xx << 8; |
---|
4552 | while (!(xx & 0x80000000)) { |
---|
4553 | xx <<= 1; |
---|
4554 | expo_x--; |
---|
4555 | } |
---|
4556 | expo_x++; |
---|
4557 | xx = (xx >> 8) | temp; |
---|
4558 | } |
---|
4559 | |
---|
4560 | if (expo_y == (unsigned int)-1) { |
---|
4561 | temp = yy & 0x80000000; |
---|
4562 | yy = yy << 8; |
---|
4563 | while (!(yy & 0x80000000)) { |
---|
4564 | yy <<= 1; |
---|
4565 | expo_y--; |
---|
4566 | } |
---|
4567 | expo_y++; |
---|
4568 | yy = (yy >> 8) | temp; |
---|
4569 | } |
---|
4570 | |
---|
4571 | if ((expo_z == (unsigned int)-1) && ((zz << 1) != 0)) { |
---|
4572 | temp = zz & 0x80000000; |
---|
4573 | zz = zz << 8; |
---|
4574 | while (!(zz & 0x80000000)) { |
---|
4575 | zz <<= 1; |
---|
4576 | expo_z--; |
---|
4577 | } |
---|
4578 | expo_z++; |
---|
4579 | zz = (zz >> 8) | temp; |
---|
4580 | } |
---|
4581 | } |
---|
4582 | |
---|
4583 | expo_x = expo_x + expo_y; |
---|
4584 | expo_y = xx ^ yy; |
---|
4585 | xx = xx & 0x00ffffff; |
---|
4586 | yy = yy << 8; |
---|
4587 | xx = xx | 0x00800000; |
---|
4588 | yy = yy | 0x80000000; |
---|
4589 | |
---|
4590 | s = __umulhi(xx, yy); |
---|
4591 | yy = xx * yy; |
---|
4592 | xx = s; |
---|
4593 | expo_x = expo_x - 127 + 2; |
---|
4594 | expo_y = expo_y & 0x80000000; |
---|
4595 | |
---|
4596 | |
---|
4597 | if (xx < 0x00800000) { |
---|
4598 | xx = (xx << 1) | (yy >> 31); |
---|
4599 | yy = (yy << 1); |
---|
4600 | expo_x--; |
---|
4601 | } |
---|
4602 | temp = 0; |
---|
4603 | if ((zz << 1) != 0) { |
---|
4604 | s = zz & 0x80000000; |
---|
4605 | zz &= 0x00ffffff; |
---|
4606 | zz |= 0x00800000; |
---|
4607 | ww = 0; |
---|
4608 | |
---|
4609 | if ((int)expo_z > (int)expo_x) { |
---|
4610 | temp = expo_z; |
---|
4611 | expo_z = expo_x; |
---|
4612 | expo_x = temp; |
---|
4613 | temp = zz; |
---|
4614 | zz = xx; |
---|
4615 | xx = temp; |
---|
4616 | temp = ww; |
---|
4617 | ww = yy; |
---|
4618 | yy = temp; |
---|
4619 | temp = expo_y; |
---|
4620 | expo_y = s; |
---|
4621 | s = temp; |
---|
4622 | } |
---|
4623 | |
---|
4624 | |
---|
4625 | expo_z = expo_x - expo_z; |
---|
4626 | u = expo_y ^ s; |
---|
4627 | if (expo_z <= 49) { |
---|
4628 | |
---|
4629 | temp = 0; |
---|
4630 | while (expo_z >= 32) { |
---|
4631 | temp = ww | (temp != 0); |
---|
4632 | ww = zz; |
---|
4633 | zz = 0; |
---|
4634 | expo_z -= 32; |
---|
4635 | } |
---|
4636 | if (expo_z) { |
---|
4637 | temp = ((temp >> expo_z) | (ww << (32 - expo_z)) | |
---|
4638 | ((temp << (32 - expo_z)) != 0)); |
---|
4639 | ww = (ww >> expo_z) | (zz << (32 - expo_z)); |
---|
4640 | zz = (zz >> expo_z); |
---|
4641 | } |
---|
4642 | } else { |
---|
4643 | temp = 1; |
---|
4644 | ww = 0; |
---|
4645 | zz = 0; |
---|
4646 | } |
---|
4647 | if ((int)u < 0) { |
---|
4648 | |
---|
4649 | temp = (unsigned)(-(int)temp); |
---|
4650 | s = (temp != 0); |
---|
4651 | u = yy - s; |
---|
4652 | s = u > yy; |
---|
4653 | yy = u - ww; |
---|
4654 | s += yy > u; |
---|
4655 | xx = (xx - zz) - s; |
---|
4656 | if (!(xx | yy | temp)) { |
---|
4657 | |
---|
4658 | return __int_as_float(xx); |
---|
4659 | } |
---|
4660 | if ((int)xx < 0) { |
---|
4661 | |
---|
4662 | |
---|
4663 | |
---|
4664 | temp = ~temp; |
---|
4665 | yy = ~yy; |
---|
4666 | xx = ~xx; |
---|
4667 | if (++temp == 0) { |
---|
4668 | if (++yy == 0) { |
---|
4669 | ++xx; |
---|
4670 | } |
---|
4671 | } |
---|
4672 | expo_y ^= 0x80000000; |
---|
4673 | } |
---|
4674 | |
---|
4675 | while (!(xx & 0x00800000)) { |
---|
4676 | xx = (xx << 1) | (yy >> 31); |
---|
4677 | yy = (yy << 1); |
---|
4678 | expo_x--; |
---|
4679 | } |
---|
4680 | } else { |
---|
4681 | |
---|
4682 | yy = yy + ww; |
---|
4683 | s = yy < ww; |
---|
4684 | xx = xx + zz + s; |
---|
4685 | if (xx & 0x01000000) { |
---|
4686 | temp = temp | (yy << 31); |
---|
4687 | yy = (yy >> 1) | (xx << 31); |
---|
4688 | xx = ((xx & 0x80000000) | (xx >> 1)) & ~0x40000000; |
---|
4689 | expo_x++; |
---|
4690 | } |
---|
4691 | } |
---|
4692 | } |
---|
4693 | temp = yy | (temp != 0); |
---|
4694 | if (expo_x <= 0xFD) { |
---|
4695 | |
---|
4696 | xx |= expo_y; |
---|
4697 | s = xx & 1; |
---|
4698 | xx += (temp == 0x80000000) ? s : (temp >> 31); |
---|
4699 | xx = xx + (expo_x << 23); |
---|
4700 | return __int_as_float(xx); |
---|
4701 | } else if ((int)expo_x >= 126) { |
---|
4702 | |
---|
4703 | xx = expo_y | 0x7f800000; |
---|
4704 | return __int_as_float(xx); |
---|
4705 | } |
---|
4706 | |
---|
4707 | expo_x = (unsigned int)(-(int)expo_x); |
---|
4708 | if (expo_x > 25) { |
---|
4709 | |
---|
4710 | return __int_as_float(expo_y); |
---|
4711 | } |
---|
4712 | yy = (xx << (32 - expo_x)) | ((yy) ? 1 : 0); |
---|
4713 | xx = expo_y + (xx >> expo_x); |
---|
4714 | xx = xx + ((yy==0x80000000) ? (xx & 1) : (yy >> 31)); |
---|
4715 | xx |= expo_y; |
---|
4716 | |
---|
4717 | |
---|
4718 | |
---|
4719 | |
---|
4720 | return __int_as_float(xx); |
---|
4721 | } |
---|
4722 | |
---|
4723 | static float __cudart_A1[32] = |
---|
4724 | { |
---|
4725 | 1.0000000000e+000f, |
---|
4726 | 1.0218971968e+000f, |
---|
4727 | 1.0442737341e+000f, |
---|
4728 | 1.0671404600e+000f, |
---|
4729 | 1.0905077457e+000f, |
---|
4730 | 1.1143867970e+000f, |
---|
4731 | 1.1387885809e+000f, |
---|
4732 | 1.1637248993e+000f, |
---|
4733 | 1.1892070770e+000f, |
---|
4734 | 1.2152473927e+000f, |
---|
4735 | 1.2418577671e+000f, |
---|
4736 | 1.2690509558e+000f, |
---|
4737 | 1.2968395948e+000f, |
---|
4738 | 1.3252366781e+000f, |
---|
4739 | 1.3542555571e+000f, |
---|
4740 | 1.3839099407e+000f, |
---|
4741 | 1.4142135382e+000f, |
---|
4742 | 1.4451807737e+000f, |
---|
4743 | 1.4768261909e+000f, |
---|
4744 | 1.5091644526e+000f, |
---|
4745 | 1.5422108173e+000f, |
---|
4746 | 1.5759809017e+000f, |
---|
4747 | 1.6104903221e+000f, |
---|
4748 | 1.6457555294e+000f, |
---|
4749 | 1.6817928553e+000f, |
---|
4750 | 1.7186193466e+000f, |
---|
4751 | 1.7562521696e+000f, |
---|
4752 | 1.7947090864e+000f, |
---|
4753 | 1.8340080976e+000f, |
---|
4754 | 1.8741676807e+000f, |
---|
4755 | 1.9152065516e+000f, |
---|
4756 | 1.9571441412e+000f |
---|
4757 | }; |
---|
4758 | |
---|
4759 | static float __cudart_A2[32] = |
---|
4760 | { |
---|
4761 | 0.0000000000e+000f, |
---|
4762 | -4.8115598617e-008f, |
---|
4763 | 4.8334701575e-008f, |
---|
4764 | -5.9337519787e-008f, |
---|
4765 | -1.3077539940e-008f, |
---|
4766 | -5.4355400181e-008f, |
---|
4767 | 5.3862223126e-008f, |
---|
4768 | -4.0514414934e-008f, |
---|
4769 | 3.7976352729e-008f, |
---|
4770 | -3.2673948880e-008f, |
---|
4771 | 4.4968381019e-008f, |
---|
4772 | 1.4193333175e-009f, |
---|
4773 | -4.0189995332e-008f, |
---|
4774 | -3.4963733242e-008f, |
---|
4775 | -1.0123349270e-008f, |
---|
4776 | -5.8755773580e-008f, |
---|
4777 | 2.4203234972e-008f, |
---|
4778 | 3.3241999375e-008f, |
---|
4779 | -4.5008988536e-008f, |
---|
4780 | -2.4959373235e-008f, |
---|
4781 | 8.0709048333e-009f, |
---|
4782 | -5.6610254262e-008f, |
---|
4783 | 9.8362171741e-009f, |
---|
4784 | -5.1249720912e-008f, |
---|
4785 | -2.4755326677e-008f, |
---|
4786 | -4.8496175964e-008f, |
---|
4787 | -9.2357703707e-009f, |
---|
4788 | -1.1415044909e-008f, |
---|
4789 | -1.1239277953e-008f, |
---|
4790 | -4.6630056261e-008f, |
---|
4791 | 9.8453281083e-009f, |
---|
4792 | -1.7021804410e-008f |
---|
4793 | }; |
---|
4794 | |
---|
4795 | static float __cudart_Ainv[32] = |
---|
4796 | { |
---|
4797 | 1.0000000000e+000f, |
---|
4798 | 9.7857207060e-001f, |
---|
4799 | 9.5760327578e-001f, |
---|
4800 | 9.3708384037e-001f, |
---|
4801 | 9.1700404882e-001f, |
---|
4802 | 8.9735454321e-001f, |
---|
4803 | 8.7812608480e-001f, |
---|
4804 | 8.5930967331e-001f, |
---|
4805 | 8.4089642763e-001f, |
---|
4806 | 8.2287776470e-001f, |
---|
4807 | 8.0524516106e-001f, |
---|
4808 | 7.8799045086e-001f, |
---|
4809 | 7.7110540867e-001f, |
---|
4810 | 7.5458222628e-001f, |
---|
4811 | 7.3841309547e-001f, |
---|
4812 | 7.2259038687e-001f, |
---|
4813 | 7.0710676908e-001f, |
---|
4814 | 6.9195497036e-001f, |
---|
4815 | 6.7712777853e-001f, |
---|
4816 | 6.6261833906e-001f, |
---|
4817 | 6.4841979742e-001f, |
---|
4818 | 6.3452547789e-001f, |
---|
4819 | 6.2092888355e-001f, |
---|
4820 | 6.0762369633e-001f, |
---|
4821 | 5.9460353851e-001f, |
---|
4822 | 5.8186244965e-001f, |
---|
4823 | 5.6939429045e-001f, |
---|
4824 | 5.5719339848e-001f, |
---|
4825 | 5.4525387287e-001f, |
---|
4826 | 5.3357023001e-001f, |
---|
4827 | 5.2213686705e-001f, |
---|
4828 | 5.1094859838e-001f |
---|
4829 | }; |
---|
4830 | |
---|
4831 | extern __attribute__((weak)) float __internal_accurate_powf(float a, float b); float __internal_accurate_powf(float a, float b) |
---|
4832 | { |
---|
4833 | int i; |
---|
4834 | float t; |
---|
4835 | int expo; |
---|
4836 | float log_hi, log_lo; |
---|
4837 | float b_hi, b_lo; |
---|
4838 | float prod_hi, prod_lo; |
---|
4839 | |
---|
4840 | if ((a > 0.707106781f) && (a < 1.414213562f)) { |
---|
4841 | float f, g, u, v, q; |
---|
4842 | |
---|
4843 | |
---|
4844 | |
---|
4845 | |
---|
4846 | |
---|
4847 | |
---|
4848 | f = a - 1.0f; |
---|
4849 | g = a + 1.0f; |
---|
4850 | g = 1.0f / g; |
---|
4851 | u = 2.0f * f * g; |
---|
4852 | v = u * u; |
---|
4853 | q = 1.49356810919559350E-001f/64.0f; |
---|
4854 | q = q * v + 1.99887797540072460E-001f/16.0f; |
---|
4855 | q = q * v + 3.33333880955515580E-001f/4.0f; |
---|
4856 | q = q * v; |
---|
4857 | q = q * u; |
---|
4858 | log_hi = __int_as_float(__float_as_int(u) & 0xfffff000); |
---|
4859 | v = __int_as_float(__float_as_int(f) & 0xfffff000); |
---|
4860 | u = 2.0f * (f - log_hi); |
---|
4861 | f = f - v; |
---|
4862 | u = u - log_hi * v; |
---|
4863 | u = u - log_hi * f; |
---|
4864 | u = g * u; |
---|
4865 | log_lo = q + u; |
---|
4866 | |
---|
4867 | |
---|
4868 | b_hi = __int_as_float(__float_as_int(b) & 0xfffff000); |
---|
4869 | b_lo = b - b_hi; |
---|
4870 | prod_lo = b_lo * log_lo; |
---|
4871 | prod_lo += b_lo * log_hi; |
---|
4872 | prod_lo += b_hi * log_lo; |
---|
4873 | prod_hi = b_hi * log_hi; |
---|
4874 | |
---|
4875 | |
---|
4876 | return __cuda_expf(prod_hi) * __cuda_expf(prod_lo); |
---|
4877 | } |
---|
4878 | |
---|
4879 | |
---|
4880 | if (a >= 1.175494351e-38f) { |
---|
4881 | i = __float_as_int(a); |
---|
4882 | expo = ((i >> 23) & 0xff) - 127; |
---|
4883 | } else { |
---|
4884 | a *= 16777216.0f; |
---|
4885 | i = __float_as_int(a); |
---|
4886 | expo = ((i >> 23) & 0xff) - 127 - 24; |
---|
4887 | } |
---|
4888 | i = (i & 0x007fffff) | (0x3f800000); |
---|
4889 | t = __int_as_float(i); |
---|
4890 | |
---|
4891 | i = 0; |
---|
4892 | if (t >= __cudart_A1[i+16]) i += 16; |
---|
4893 | if (t >= __cudart_A1[i+8]) i += 8; |
---|
4894 | if (t >= __cudart_A1[i+4]) i += 4; |
---|
4895 | if (t >= __cudart_A1[i+2]) i += 2; |
---|
4896 | if (t >= __cudart_A1[i+1]) i += 1; |
---|
4897 | |
---|
4898 | t = t - __cudart_A1[i]; |
---|
4899 | t = t - __cudart_A2[i]; |
---|
4900 | |
---|
4901 | t = t * __cudart_Ainv[i]; |
---|
4902 | |
---|
4903 | |
---|
4904 | log_hi = (float)expo + (float)i * 0.03125f; |
---|
4905 | |
---|
4906 | log_lo = - 3.42338934684934650E-001f; |
---|
4907 | log_lo = log_lo * t + 4.80524913518140690E-001f; |
---|
4908 | log_lo = log_lo * t - 7.21345070621603800E-001f; |
---|
4909 | log_lo = log_lo * t + 1.44269503837073180E+000f; |
---|
4910 | log_lo = log_lo * t; |
---|
4911 | |
---|
4912 | |
---|
4913 | b_hi = __int_as_float(__float_as_int(b) & 0xfffff000); |
---|
4914 | b_lo = b - b_hi; |
---|
4915 | prod_lo = b_lo * log_lo; |
---|
4916 | prod_lo = prod_lo + b_lo * log_hi; |
---|
4917 | prod_lo = prod_lo + b_hi * log_lo; |
---|
4918 | prod_hi = b_hi * log_hi; |
---|
4919 | |
---|
4920 | |
---|
4921 | if (prod_hi >= 256.0f) { |
---|
4922 | return __int_as_float(0x7f800000); |
---|
4923 | } |
---|
4924 | if (prod_hi <= -256.0f) { |
---|
4925 | return 0.0f; |
---|
4926 | } |
---|
4927 | |
---|
4928 | |
---|
4929 | b = __cuda_exp2f (0.5f * prod_hi); |
---|
4930 | t = __cuda_exp2f (prod_lo); |
---|
4931 | t = t * b; |
---|
4932 | t = t * b; |
---|
4933 | return t; |
---|
4934 | } |
---|
4935 | |
---|
4936 | extern __attribute__((weak)) float __cuda_powif(float a, int b); float __cuda_powif(float a, int b) |
---|
4937 | { |
---|
4938 | unsigned int e = __cuda_abs(b); |
---|
4939 | float r = 1.0f; |
---|
4940 | |
---|
4941 | while (1) { |
---|
4942 | if ((e & 1) != 0) { |
---|
4943 | r = r * a; |
---|
4944 | } |
---|
4945 | e = e >> 1; |
---|
4946 | if (e == 0) { |
---|
4947 | return b < 0 ? 1.0f/r : r; |
---|
4948 | } |
---|
4949 | a = a * a; |
---|
4950 | } |
---|
4951 | } |
---|
4952 | |
---|
4953 | extern __attribute__((weak)) double __cuda_powi(double a, int b); double __cuda_powi(double a, int b) |
---|
4954 | { |
---|
4955 | unsigned int e = __cuda_abs(b); |
---|
4956 | double r = 1.0; |
---|
4957 | |
---|
4958 | while (1) { |
---|
4959 | if ((e & 1) != 0) { |
---|
4960 | r = r * a; |
---|
4961 | } |
---|
4962 | e = e >> 1; |
---|
4963 | if (e == 0) { |
---|
4964 | return b < 0 ? 1.0/r : r; |
---|
4965 | } |
---|
4966 | a = a * a; |
---|
4967 | } |
---|
4968 | } |
---|
4969 | |
---|
4970 | extern __attribute__((weak)) float __cuda_powf(float a, float b); float __cuda_powf(float a, float b) |
---|
4971 | { |
---|
4972 | |
---|
4973 | |
---|
4974 | |
---|
4975 | int bIsOddInteger; |
---|
4976 | float t; |
---|
4977 | if (a == 1.0f || b == 0.0f) { |
---|
4978 | return 1.0f; |
---|
4979 | } |
---|
4980 | if (__cuda___isnanf(a) || __cuda___isnanf(b)) { |
---|
4981 | return a + b; |
---|
4982 | } |
---|
4983 | if (a == __int_as_float(0x7f800000)) { |
---|
4984 | return __cuda___signbitf(b) ? 0.0f : __int_as_float(0x7f800000); |
---|
4985 | } |
---|
4986 | if (__cuda___isinff(b)) { |
---|
4987 | if (a == -1.0f) { |
---|
4988 | return 1.0f; |
---|
4989 | } |
---|
4990 | t = (__cuda_fabsf(a) > 1.0f) ? __int_as_float(0x7f800000) : 0.0f; |
---|
4991 | if (b < 0.0f) { |
---|
4992 | t = 1.0f / t; |
---|
4993 | } |
---|
4994 | return t; |
---|
4995 | } |
---|
4996 | bIsOddInteger = (b - (2.0f * floorf(0.5f * b))) == 1.0f; |
---|
4997 | if (a == 0.0f) { |
---|
4998 | t = bIsOddInteger ? a : 0.0f; |
---|
4999 | if (b < 0.0f) { |
---|
5000 | t = 1.0f / t; |
---|
5001 | } |
---|
5002 | return t; |
---|
5003 | } |
---|
5004 | if (a == -__int_as_float(0x7f800000)) { |
---|
5005 | t = (b < 0.0f) ? -1.0f/a : -a; |
---|
5006 | if (bIsOddInteger) { |
---|
5007 | t = __int_as_float(__float_as_int(t) ^ 0x80000000); |
---|
5008 | } |
---|
5009 | return t; |
---|
5010 | } |
---|
5011 | if ((a < 0.0f) && (b != __cuda_truncf(b))) { |
---|
5012 | return __int_as_float(0x7fffffff); |
---|
5013 | } |
---|
5014 | t = __cuda_fabsf(a); |
---|
5015 | t = __internal_accurate_powf(t, b); |
---|
5016 | if ((a < 0.0f) && bIsOddInteger) { |
---|
5017 | t = __int_as_float(__float_as_int(t) ^ 0x80000000); |
---|
5018 | } |
---|
5019 | return t; |
---|
5020 | |
---|
5021 | } |
---|
5022 | |
---|
5023 | |
---|
5024 | extern __attribute__((weak)) float __internal_tgammaf_kernel(float a); float __internal_tgammaf_kernel(float a) |
---|
5025 | { |
---|
5026 | float t; |
---|
5027 | t = - 1.05767296987211380E-003f; |
---|
5028 | t = t * a + 7.09279059435508670E-003f; |
---|
5029 | t = t * a - 9.65347121958557050E-003f; |
---|
5030 | t = t * a - 4.21736613253687960E-002f; |
---|
5031 | t = t * a + 1.66542401247154280E-001f; |
---|
5032 | t = t * a - 4.20043267827838460E-002f; |
---|
5033 | t = t * a - 6.55878234051332940E-001f; |
---|
5034 | t = t * a + 5.77215696929794240E-001f; |
---|
5035 | t = t * a + 1.00000000000000000E+000f; |
---|
5036 | return t; |
---|
5037 | } |
---|
5038 | |
---|
5039 | |
---|
5040 | |
---|
5041 | |
---|
5042 | |
---|
5043 | extern __attribute__((weak)) float __cuda_tgammaf(float a); float __cuda_tgammaf(float a) |
---|
5044 | { |
---|
5045 | float s, xx, x=a; |
---|
5046 | if (x >= 0.0f) { |
---|
5047 | if (x > 36.0f) x = 36.0f; |
---|
5048 | s = 1.0f; |
---|
5049 | xx = x; |
---|
5050 | if (x > 34.03f) { |
---|
5051 | xx -= 1.0f; |
---|
5052 | } |
---|
5053 | while (xx > 1.5f) { |
---|
5054 | xx = xx - 1.0f; |
---|
5055 | s = s * xx; |
---|
5056 | } |
---|
5057 | if (x >= 0.5f) { |
---|
5058 | xx = xx - 1.0f; |
---|
5059 | } |
---|
5060 | xx = __internal_tgammaf_kernel(xx); |
---|
5061 | if (x < 0.5f) { |
---|
5062 | xx = xx * x; |
---|
5063 | } |
---|
5064 | s = s / xx; |
---|
5065 | if (x > 34.03f) { |
---|
5066 | |
---|
5067 | xx = x - 1.0f; |
---|
5068 | s = s * xx; |
---|
5069 | } |
---|
5070 | return s; |
---|
5071 | } else { |
---|
5072 | if (x == __cuda_floorf(x)) { |
---|
5073 | x = __int_as_float(0x7fffffff); |
---|
5074 | |
---|
5075 | return x; |
---|
5076 | |
---|
5077 | } |
---|
5078 | if (x < -41.1f) x = -41.1f; |
---|
5079 | xx = x; |
---|
5080 | if (x < -34.03f) { |
---|
5081 | xx += 6.0f; |
---|
5082 | } |
---|
5083 | s = xx; |
---|
5084 | while (xx < -0.5f) { |
---|
5085 | xx = xx + 1.0f; |
---|
5086 | s = s * xx; |
---|
5087 | } |
---|
5088 | xx = __internal_tgammaf_kernel(xx); |
---|
5089 | s = s * xx; |
---|
5090 | s = 1.0f / s; |
---|
5091 | if (x < -34.03f) { |
---|
5092 | xx = x; |
---|
5093 | xx *= (x + 1.0f); |
---|
5094 | xx *= (x + 2.0f); |
---|
5095 | xx *= (x + 3.0f); |
---|
5096 | xx *= (x + 4.0f); |
---|
5097 | xx *= (x + 5.0f); |
---|
5098 | xx = 1.0f / xx; |
---|
5099 | s = s * xx; |
---|
5100 | if ((a < -42.0f) && !(((int)a)&1)) { |
---|
5101 | s = __int_as_float(0x80000000); |
---|
5102 | } |
---|
5103 | } |
---|
5104 | return s; |
---|
5105 | } |
---|
5106 | } |
---|
5107 | |
---|
5108 | extern __attribute__((weak)) float __cuda_roundf(float a); float __cuda_roundf(float a) |
---|
5109 | { |
---|
5110 | float fa = __cuda_fabsf(a); |
---|
5111 | if (fa > 8388608.0f) { |
---|
5112 | return a; |
---|
5113 | } else { |
---|
5114 | float u = __cuda_floorf(fa + 0.5f); |
---|
5115 | if (fa < 0.5f) u = 0.0f; |
---|
5116 | return __cuda_copysignf(u, a); |
---|
5117 | } |
---|
5118 | } |
---|
5119 | |
---|
5120 | extern __attribute__((weak)) long long int __internal_llroundf_kernel(float a); long long int __internal_llroundf_kernel(float a) |
---|
5121 | { |
---|
5122 | unsigned long long int res, t = 0LL; |
---|
5123 | int shift; |
---|
5124 | unsigned int ia = __float_as_int(a); |
---|
5125 | if ((ia << 1) > 0xff000000) return 0LL; |
---|
5126 | if ((int)ia >= 0x5f000000) return 0x7fffffffffffffffLL; |
---|
5127 | if (ia >= 0xdf000000) return 0x8000000000000000LL; |
---|
5128 | shift = 189 - ((ia >> 23) & 0xff); |
---|
5129 | res = ((long long int)(((ia << 8) | 0x80000000) >> 1)) << 32; |
---|
5130 | if (shift >= 64) { |
---|
5131 | t = res; |
---|
5132 | res = 0; |
---|
5133 | } else if (shift) { |
---|
5134 | t = res << (64 - shift); |
---|
5135 | res = res >> shift; |
---|
5136 | } |
---|
5137 | if (t >= 0x8000000000000000LL) { |
---|
5138 | res++; |
---|
5139 | } |
---|
5140 | if ((int)ia < 0) res = (unsigned long long int)(-(long long int)res); |
---|
5141 | return (long long int)res; |
---|
5142 | } |
---|
5143 | |
---|
5144 | extern __attribute__((weak)) long long int __cuda_llroundf(float a); long long int __cuda_llroundf(float a) |
---|
5145 | { |
---|
5146 | return __internal_llroundf_kernel(a); |
---|
5147 | } |
---|
5148 | |
---|
5149 | extern __attribute__((weak)) long int __cuda_lroundf(float a); long int __cuda_lroundf(float a) |
---|
5150 | { |
---|
5151 | |
---|
5152 | return (long int)__cuda_llroundf(a); |
---|
5153 | # 3101 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
5154 | } |
---|
5155 | |
---|
5156 | extern __attribute__((weak)) float __cuda_fdimf(float a, float b); float __cuda_fdimf(float a, float b) |
---|
5157 | { |
---|
5158 | float t; |
---|
5159 | t = a - b; |
---|
5160 | if (a <= b) { |
---|
5161 | t = 0.0f; |
---|
5162 | } |
---|
5163 | return t; |
---|
5164 | } |
---|
5165 | |
---|
5166 | extern __attribute__((weak)) int __cuda_ilogbf(float a); int __cuda_ilogbf(float a) |
---|
5167 | { |
---|
5168 | unsigned int i; |
---|
5169 | int expo; |
---|
5170 | a = __cuda_fabsf(a); |
---|
5171 | if (a <= 1.175494351e-38f) { |
---|
5172 | |
---|
5173 | if (a == 0.0f) { |
---|
5174 | expo = -((int)((unsigned int)-1 >> 1))-1; |
---|
5175 | } else { |
---|
5176 | expo = -126; |
---|
5177 | i = __float_as_int(a); |
---|
5178 | i = i << 8; |
---|
5179 | while ((int)i >= 0) { |
---|
5180 | expo--; |
---|
5181 | i = i + i; |
---|
5182 | } |
---|
5183 | } |
---|
5184 | } else { |
---|
5185 | i = __float_as_int(a); |
---|
5186 | expo = ((int)((i >> 23) & 0xff)) - 127; |
---|
5187 | if ((i == 0x7f800000)) { |
---|
5188 | expo = ((int)((unsigned int)-1 >> 1)); |
---|
5189 | } |
---|
5190 | if ((i > 0x7f800000)) { |
---|
5191 | expo = -((int)((unsigned int)-1 >> 1))-1; |
---|
5192 | } |
---|
5193 | } |
---|
5194 | return expo; |
---|
5195 | } |
---|
5196 | |
---|
5197 | extern __attribute__((weak)) float __cuda_logbf(float a); float __cuda_logbf(float a) |
---|
5198 | { |
---|
5199 | unsigned int i; |
---|
5200 | int expo; |
---|
5201 | float res; |
---|
5202 | |
---|
5203 | if (__cuda___isnanf(a)) return a + a; |
---|
5204 | |
---|
5205 | a = __cuda_fabsf(a); |
---|
5206 | if (a <= 1.175494351e-38f) { |
---|
5207 | |
---|
5208 | if (a == 0.0f) { |
---|
5209 | res = -__int_as_float(0x7f800000); |
---|
5210 | } else { |
---|
5211 | expo = -126; |
---|
5212 | i = __float_as_int(a); |
---|
5213 | i = i << 8; |
---|
5214 | while ((int)i >= 0) { |
---|
5215 | expo--; |
---|
5216 | i = i + i; |
---|
5217 | } |
---|
5218 | res = (float)expo; |
---|
5219 | } |
---|
5220 | } else { |
---|
5221 | i = __float_as_int(a); |
---|
5222 | expo = ((int)((i >> 23) & 0xff)) - 127; |
---|
5223 | res = (float)expo; |
---|
5224 | if ((i >= 0x7f800000)) { |
---|
5225 | |
---|
5226 | res = a + a; |
---|
5227 | } |
---|
5228 | } |
---|
5229 | return res; |
---|
5230 | } |
---|
5231 | |
---|
5232 | extern __attribute__((weak)) void __cuda_sincosf(float a, float *sptr, float *cptr); void __cuda_sincosf(float a, float *sptr, float *cptr) |
---|
5233 | { |
---|
5234 | |
---|
5235 | |
---|
5236 | |
---|
5237 | float t, u, s, c; |
---|
5238 | int quadrant; |
---|
5239 | if (__cuda___isinff(a)) { |
---|
5240 | *sptr = __int_as_float(0x7fffffff); |
---|
5241 | *cptr = __int_as_float(0x7fffffff); |
---|
5242 | return; |
---|
5243 | } |
---|
5244 | if (a == 0.0f) { |
---|
5245 | *sptr = a; |
---|
5246 | *cptr = 1.0f; |
---|
5247 | return; |
---|
5248 | } |
---|
5249 | t = __internal_trig_reduction_kernel(a, &quadrant); |
---|
5250 | u = __internal_cos_kernel(t); |
---|
5251 | t = __internal_sin_kernel(t); |
---|
5252 | if (quadrant & 1) { |
---|
5253 | s = u; |
---|
5254 | c = t; |
---|
5255 | } else { |
---|
5256 | s = t; |
---|
5257 | c = u; |
---|
5258 | } |
---|
5259 | if (quadrant & 2) { |
---|
5260 | s = -s; |
---|
5261 | } |
---|
5262 | quadrant++; |
---|
5263 | if (quadrant & 2) { |
---|
5264 | c = -c; |
---|
5265 | } |
---|
5266 | *sptr = s; |
---|
5267 | *cptr = c; |
---|
5268 | |
---|
5269 | } |
---|
5270 | # 3226 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
5271 | extern __attribute__((weak)) double rsqrt(double a); double rsqrt(double a) |
---|
5272 | { |
---|
5273 | return 1.0 / sqrt(a); |
---|
5274 | } |
---|
5275 | |
---|
5276 | extern __attribute__((weak)) float rsqrtf(float a); float rsqrtf(float a) |
---|
5277 | { |
---|
5278 | return (float)rsqrt((double)a); |
---|
5279 | } |
---|
5280 | # 4167 "/usr/local/cuda/bin/../include/math_functions.h" 3 |
---|
5281 | # 1 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h" 1 3 |
---|
5282 | # 45 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h" 3 |
---|
5283 | extern __attribute__((weak)) double __cuda_fabs(double a); double __cuda_fabs(double a) |
---|
5284 | { |
---|
5285 | return (float)__cuda_fabsf((float)a); |
---|
5286 | } |
---|
5287 | |
---|
5288 | extern __attribute__((weak)) double __cuda_fmax(double a, double b); double __cuda_fmax(double a, double b) |
---|
5289 | { |
---|
5290 | return (float)__cuda_fmaxf((float)a, (float)b); |
---|
5291 | } |
---|
5292 | |
---|
5293 | extern __attribute__((weak)) double __cuda_fmin(double a, double b); double __cuda_fmin(double a, double b) |
---|
5294 | { |
---|
5295 | return (float)__cuda_fminf((float)a, (float)b); |
---|
5296 | } |
---|
5297 | |
---|
5298 | extern __attribute__((weak)) int __cuda___finite(double a); int __cuda___finite(double a) |
---|
5299 | { |
---|
5300 | return __cuda___finitef((float)a); |
---|
5301 | } |
---|
5302 | |
---|
5303 | extern __attribute__((weak)) int __cuda___isinf(double a); int __cuda___isinf(double a) |
---|
5304 | { |
---|
5305 | return __cuda___isinff((float)a); |
---|
5306 | } |
---|
5307 | |
---|
5308 | extern __attribute__((weak)) int __cuda___isnan(double a); int __cuda___isnan(double a) |
---|
5309 | { |
---|
5310 | return __cuda___isnanf((float)a); |
---|
5311 | } |
---|
5312 | |
---|
5313 | extern __attribute__((weak)) int __cuda___signbit(double a); int __cuda___signbit(double a) |
---|
5314 | { |
---|
5315 | return __cuda___signbitf((float)a); |
---|
5316 | } |
---|
5317 | |
---|
5318 | extern __attribute__((weak)) double __cuda_sqrt(double a); double __cuda_sqrt(double a) |
---|
5319 | { |
---|
5320 | return (double)__cuda_sqrtf((float)a); |
---|
5321 | } |
---|
5322 | |
---|
5323 | extern __attribute__((weak)) double __cuda_rsqrt(double a); double __cuda_rsqrt(double a) |
---|
5324 | { |
---|
5325 | return (double)__cuda_rsqrtf((float)a); |
---|
5326 | } |
---|
5327 | |
---|
5328 | extern __attribute__((weak)) double __cuda_ceil(double a); double __cuda_ceil(double a) |
---|
5329 | { |
---|
5330 | return (double)__cuda_ceilf((float)a); |
---|
5331 | } |
---|
5332 | |
---|
5333 | extern __attribute__((weak)) double __cuda_trunc(double a); double __cuda_trunc(double a) |
---|
5334 | { |
---|
5335 | return (double)__cuda_truncf((float)a); |
---|
5336 | } |
---|
5337 | |
---|
5338 | extern __attribute__((weak)) double __cuda_floor(double a); double __cuda_floor(double a) |
---|
5339 | { |
---|
5340 | return (double)__cuda_floorf((float)a); |
---|
5341 | } |
---|
5342 | |
---|
5343 | extern __attribute__((weak)) double __cuda_copysign(double a, double b); double __cuda_copysign(double a, double b) |
---|
5344 | { |
---|
5345 | return (double)__cuda_copysignf((float)a, (float)b); |
---|
5346 | } |
---|
5347 | |
---|
5348 | extern __attribute__((weak)) double __cuda_sin(double a); double __cuda_sin(double a) |
---|
5349 | { |
---|
5350 | return (double)__cuda_sinf((float)a); |
---|
5351 | } |
---|
5352 | |
---|
5353 | extern __attribute__((weak)) double __cuda_cos(double a); double __cuda_cos(double a) |
---|
5354 | { |
---|
5355 | return (double)__cuda_cosf((float)a); |
---|
5356 | } |
---|
5357 | |
---|
5358 | extern __attribute__((weak)) void __cuda_sincos(double a, double *sptr, double *cptr); void __cuda_sincos(double a, double *sptr, double *cptr) |
---|
5359 | { |
---|
5360 | float fs, fc; |
---|
5361 | |
---|
5362 | __cuda_sincosf((float)a, &fs, &fc); |
---|
5363 | |
---|
5364 | *sptr = (double)fs; |
---|
5365 | *cptr = (double)fc; |
---|
5366 | } |
---|
5367 | |
---|
5368 | extern __attribute__((weak)) double __cuda_tan(double a); double __cuda_tan(double a) |
---|
5369 | { |
---|
5370 | return (double)__cuda_tanf((float)a); |
---|
5371 | } |
---|
5372 | |
---|
5373 | extern __attribute__((weak)) double __cuda_exp(double a); double __cuda_exp(double a) |
---|
5374 | { |
---|
5375 | return (double)__cuda_expf((float)a); |
---|
5376 | } |
---|
5377 | |
---|
5378 | extern __attribute__((weak)) double __cuda_exp2(double a); double __cuda_exp2(double a) |
---|
5379 | { |
---|
5380 | return (double)__cuda_exp2f((float)a); |
---|
5381 | } |
---|
5382 | |
---|
5383 | extern __attribute__((weak)) double __cuda_exp10(double a); double __cuda_exp10(double a) |
---|
5384 | { |
---|
5385 | return (double)__cuda_exp10f((float)a); |
---|
5386 | } |
---|
5387 | |
---|
5388 | extern __attribute__((weak)) double __cuda_expm1(double a); double __cuda_expm1(double a) |
---|
5389 | { |
---|
5390 | return (double)__cuda_expm1f((float)a); |
---|
5391 | } |
---|
5392 | |
---|
5393 | extern __attribute__((weak)) double __cuda_cosh(double a); double __cuda_cosh(double a) |
---|
5394 | { |
---|
5395 | return (double)__cuda_coshf((float)a); |
---|
5396 | } |
---|
5397 | |
---|
5398 | extern __attribute__((weak)) double __cuda_sinh(double a); double __cuda_sinh(double a) |
---|
5399 | { |
---|
5400 | return (double)__cuda_sinhf((float)a); |
---|
5401 | } |
---|
5402 | |
---|
5403 | extern __attribute__((weak)) double __cuda_tanh(double a); double __cuda_tanh(double a) |
---|
5404 | { |
---|
5405 | return (double)__cuda_tanhf((float)a); |
---|
5406 | } |
---|
5407 | |
---|
5408 | extern __attribute__((weak)) double __cuda_asin(double a); double __cuda_asin(double a) |
---|
5409 | { |
---|
5410 | return (double)__cuda_asinf((float)a); |
---|
5411 | } |
---|
5412 | |
---|
5413 | extern __attribute__((weak)) double __cuda_acos(double a); double __cuda_acos(double a) |
---|
5414 | { |
---|
5415 | return (double)__cuda_acosf((float)a); |
---|
5416 | } |
---|
5417 | |
---|
5418 | extern __attribute__((weak)) double __cuda_atan(double a); double __cuda_atan(double a) |
---|
5419 | { |
---|
5420 | return (double)__cuda_atanf((float)a); |
---|
5421 | } |
---|
5422 | |
---|
5423 | extern __attribute__((weak)) double __cuda_atan2(double a, double b); double __cuda_atan2(double a, double b) |
---|
5424 | { |
---|
5425 | return (double)__cuda_atan2f((float)a, (float)b); |
---|
5426 | } |
---|
5427 | |
---|
5428 | extern __attribute__((weak)) double __cuda_log(double a); double __cuda_log(double a) |
---|
5429 | { |
---|
5430 | return (double)__cuda_logf((float)a); |
---|
5431 | } |
---|
5432 | |
---|
5433 | extern __attribute__((weak)) double __cuda_log2(double a); double __cuda_log2(double a) |
---|
5434 | { |
---|
5435 | return (double)__cuda_log2f((float)a); |
---|
5436 | } |
---|
5437 | |
---|
5438 | extern __attribute__((weak)) double __cuda_log10(double a); double __cuda_log10(double a) |
---|
5439 | { |
---|
5440 | return (double)__cuda_log10f((float)a); |
---|
5441 | } |
---|
5442 | |
---|
5443 | extern __attribute__((weak)) double __cuda_log1p(double a); double __cuda_log1p(double a) |
---|
5444 | { |
---|
5445 | return (double)__cuda_log1pf((float)a); |
---|
5446 | } |
---|
5447 | |
---|
5448 | extern __attribute__((weak)) double __cuda_acosh(double a); double __cuda_acosh(double a) |
---|
5449 | { |
---|
5450 | return (double)__cuda_acoshf((float)a); |
---|
5451 | } |
---|
5452 | |
---|
5453 | extern __attribute__((weak)) double __cuda_asinh(double a); double __cuda_asinh(double a) |
---|
5454 | { |
---|
5455 | return (double)__cuda_asinhf((float)a); |
---|
5456 | } |
---|
5457 | |
---|
5458 | extern __attribute__((weak)) double __cuda_atanh(double a); double __cuda_atanh(double a) |
---|
5459 | { |
---|
5460 | return (double)__cuda_atanhf((float)a); |
---|
5461 | } |
---|
5462 | |
---|
5463 | extern __attribute__((weak)) double __cuda_hypot(double a, double b); double __cuda_hypot(double a, double b) |
---|
5464 | { |
---|
5465 | return (double)__cuda_hypotf((float)a, (float)b); |
---|
5466 | } |
---|
5467 | |
---|
5468 | extern __attribute__((weak)) double __cuda_cbrt(double a); double __cuda_cbrt(double a) |
---|
5469 | { |
---|
5470 | return (double)__cuda_cbrtf((float)a); |
---|
5471 | } |
---|
5472 | |
---|
5473 | extern __attribute__((weak)) double __cuda_erf(double a); double __cuda_erf(double a) |
---|
5474 | { |
---|
5475 | return (double)__cuda_erff((float)a); |
---|
5476 | } |
---|
5477 | |
---|
5478 | extern __attribute__((weak)) double __cuda_erfc(double a); double __cuda_erfc(double a) |
---|
5479 | { |
---|
5480 | return (double)__cuda_erfcf((float)a); |
---|
5481 | } |
---|
5482 | |
---|
5483 | extern __attribute__((weak)) double __cuda_lgamma(double a); double __cuda_lgamma(double a) |
---|
5484 | { |
---|
5485 | return (double)__cuda_lgammaf((float)a); |
---|
5486 | } |
---|
5487 | |
---|
5488 | extern __attribute__((weak)) double __cuda_tgamma(double a); double __cuda_tgamma(double a) |
---|
5489 | { |
---|
5490 | return (double)__cuda_tgammaf((float)a); |
---|
5491 | } |
---|
5492 | |
---|
5493 | extern __attribute__((weak)) double __cuda_ldexp(double a, int b); double __cuda_ldexp(double a, int b) |
---|
5494 | { |
---|
5495 | return (double)__cuda_ldexpf((float)a, b); |
---|
5496 | } |
---|
5497 | |
---|
5498 | extern __attribute__((weak)) double __cuda_scalbn(double a, int b); double __cuda_scalbn(double a, int b) |
---|
5499 | { |
---|
5500 | return (double)__cuda_scalbnf((float)a, b); |
---|
5501 | } |
---|
5502 | |
---|
5503 | extern __attribute__((weak)) double __cuda_scalbln(double a, long b); double __cuda_scalbln(double a, long b) |
---|
5504 | { |
---|
5505 | return (double)__cuda_scalblnf((float)a, b); |
---|
5506 | } |
---|
5507 | |
---|
5508 | extern __attribute__((weak)) double __cuda_frexp(double a, int *b); double __cuda_frexp(double a, int *b) |
---|
5509 | { |
---|
5510 | return (double)__cuda_frexpf((float)a, b); |
---|
5511 | } |
---|
5512 | |
---|
5513 | extern __attribute__((weak)) double __cuda_modf(double a, double *b); double __cuda_modf(double a, double *b) |
---|
5514 | { |
---|
5515 | float fb; |
---|
5516 | float fa = __cuda_modff((float)a, &fb); |
---|
5517 | |
---|
5518 | *b = (double)fb; |
---|
5519 | |
---|
5520 | return (double)fa; |
---|
5521 | } |
---|
5522 | |
---|
5523 | extern __attribute__((weak)) double __cuda_fmod(double a, double b); double __cuda_fmod(double a, double b) |
---|
5524 | { |
---|
5525 | return (double)__cuda_fmodf((float)a, (float)b); |
---|
5526 | } |
---|
5527 | |
---|
5528 | extern __attribute__((weak)) double __cuda_remainder(double a, double b); double __cuda_remainder(double a, double b) |
---|
5529 | { |
---|
5530 | return (double)__cuda_remainderf((float)a, (float)b); |
---|
5531 | } |
---|
5532 | |
---|
5533 | extern __attribute__((weak)) double __cuda_remquo(double a, double b, int *c); double __cuda_remquo(double a, double b, int *c) |
---|
5534 | { |
---|
5535 | return (double)__cuda_remquof((float)a, (float)b, c); |
---|
5536 | } |
---|
5537 | |
---|
5538 | extern __attribute__((weak)) double __cuda_nextafter(double a, double b); double __cuda_nextafter(double a, double b) |
---|
5539 | { |
---|
5540 | return (double)__cuda_nextafterf((float)a, (float)b); |
---|
5541 | } |
---|
5542 | |
---|
5543 | extern __attribute__((weak)) double __cuda_nan(const char *tagp); double __cuda_nan(const char *tagp) |
---|
5544 | { |
---|
5545 | return (double)__cuda_nanf(tagp); |
---|
5546 | } |
---|
5547 | |
---|
5548 | extern __attribute__((weak)) double __cuda_pow(double a, double b); double __cuda_pow(double a, double b) |
---|
5549 | { |
---|
5550 | return (double)__cuda_powf((float)a, (float)b); |
---|
5551 | } |
---|
5552 | |
---|
5553 | extern __attribute__((weak)) double __cuda_round(double a); double __cuda_round(double a) |
---|
5554 | { |
---|
5555 | return (double)__cuda_roundf((float)a); |
---|
5556 | } |
---|
5557 | |
---|
5558 | extern __attribute__((weak)) long __cuda_lround(double a); long __cuda_lround(double a) |
---|
5559 | { |
---|
5560 | return __cuda_lroundf((float)a); |
---|
5561 | } |
---|
5562 | |
---|
5563 | extern __attribute__((weak)) long long __cuda_llround(double a); long long __cuda_llround(double a) |
---|
5564 | { |
---|
5565 | return __cuda_llroundf((float)a); |
---|
5566 | } |
---|
5567 | |
---|
5568 | extern __attribute__((weak)) double __cuda_rint(double a); double __cuda_rint(double a) |
---|
5569 | { |
---|
5570 | return (double)__cuda_rintf((float)a); |
---|
5571 | } |
---|
5572 | |
---|
5573 | extern __attribute__((weak)) long __cuda_lrint(double a); long __cuda_lrint(double a) |
---|
5574 | { |
---|
5575 | return __cuda_lrintf((float)a); |
---|
5576 | } |
---|
5577 | |
---|
5578 | extern __attribute__((weak)) long long __cuda_llrint(double a); long long __cuda_llrint(double a) |
---|
5579 | { |
---|
5580 | return __cuda_llrintf((float)a); |
---|
5581 | } |
---|
5582 | |
---|
5583 | extern __attribute__((weak)) double __cuda_nearbyint(double a); double __cuda_nearbyint(double a) |
---|
5584 | { |
---|
5585 | return (double)__cuda_nearbyintf((float)a); |
---|
5586 | } |
---|
5587 | |
---|
5588 | extern __attribute__((weak)) double __cuda_fdim(double a, double b); double __cuda_fdim(double a, double b) |
---|
5589 | { |
---|
5590 | return (double)__cuda_fdimf((float)a, (float)b); |
---|
5591 | } |
---|
5592 | |
---|
5593 | extern __attribute__((weak)) int __cuda_ilogb(double a); int __cuda_ilogb(double a) |
---|
5594 | { |
---|
5595 | return __cuda_ilogbf((float)a); |
---|
5596 | } |
---|
5597 | |
---|
5598 | extern __attribute__((weak)) double __cuda_logb(double a); double __cuda_logb(double a) |
---|
5599 | { |
---|
5600 | return (double)__cuda_logbf((float)a); |
---|
5601 | } |
---|
5602 | |
---|
5603 | extern __attribute__((weak)) double __cuda_fma(double a, double b, double c); double __cuda_fma(double a, double b, double c) |
---|
5604 | { |
---|
5605 | return (double)__cuda_fmaf((float)a, (float)b, (float)c); |
---|
5606 | } |
---|
5607 | # 4168 "/usr/local/cuda/bin/../include/math_functions.h" 2 3 |
---|
5608 | # 89 "/usr/local/cuda/bin/../include/common_functions.h" 2 |
---|
5609 | # 196 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 |
---|
5610 | # 6 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 2 |
---|
5611 | extern void __sti____cudaRegisterAll_29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) __attribute__((__constructor__)); |
---|
5612 | void __sti____cudaRegisterAll_29_tmpxft_00001ecc_00000000_2_ii_91788a12(void){__cudaFatCubinHandle = __cudaRegisterFatBinary((void*)(&__fatDeviceText));} |
---|
5613 | # 475 "y.cu" 2 |
---|