ac/llvm: add better code for fsign
[mesa.git] / src / amd / llvm / ac_llvm_helper.cpp
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include <cstring>
27
28 #include <llvm-c/Core.h>
29 #include <llvm/Target/TargetMachine.h>
30 #include <llvm/IR/IRBuilder.h>
31 #include <llvm/Analysis/TargetLibraryInfo.h>
32 #include <llvm/Transforms/IPO.h>
33
34 #include <llvm/IR/LegacyPassManager.h>
35
36 /* DO NOT REORDER THE HEADERS
37 * The LLVM headers need to all be included before any Mesa header,
38 * as they use the `restrict` keyword in ways that are incompatible
39 * with our #define in include/c99_compat.h
40 */
41
42 #include "ac_binary.h"
43 #include "ac_llvm_util.h"
44 #include "ac_llvm_build.h"
45
46 #include "util/macros.h"
47
48 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
49 {
50 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
51 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
52 }
53
54 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
55 {
56 #if LLVM_VERSION_MAJOR >= 10
57 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
58 A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
59 #else
60 /* Avoid unused parameter warnings. */
61 (void)val;
62 (void)bytes;
63 #endif
64 }
65
66 bool ac_is_sgpr_param(LLVMValueRef arg)
67 {
68 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
69 llvm::AttributeList AS = A->getParent()->getAttributes();
70 unsigned ArgNo = A->getArgNo();
71 return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
72 }
73
74 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
75 {
76 return LLVMGetCalledValue(call);
77 }
78
79 bool ac_llvm_is_function(LLVMValueRef v)
80 {
81 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
82 }
83
84 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
85 {
86 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
87 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
88
89 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
90 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
91 return module;
92 }
93
94 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
95 enum ac_float_mode float_mode)
96 {
97 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
98
99 llvm::FastMathFlags flags;
100
101 switch (float_mode) {
102 case AC_FLOAT_MODE_DEFAULT:
103 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
104 break;
105
106 case AC_FLOAT_MODE_DEFAULT_OPENGL:
107 /* Allow optimizations to treat the sign of a zero argument or
108 * result as insignificant.
109 */
110 flags.setNoSignedZeros(); /* nsz */
111
112 /* Allow optimizations to use the reciprocal of an argument
113 * rather than perform division.
114 */
115 flags.setAllowReciprocal(); /* arcp */
116
117 llvm::unwrap(builder)->setFastMathFlags(flags);
118 break;
119 }
120
121 return builder;
122 }
123
124 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
125 {
126 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
127 auto *b = llvm::unwrap(ctx->builder);
128 llvm::FastMathFlags flags = b->getFastMathFlags();
129
130 /* This disables the optimization of (x + 0), which is used
131 * to convert negative zero to positive zero.
132 */
133 flags.setNoSignedZeros(false);
134 b->setFastMathFlags(flags);
135 }
136 }
137
138 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
139 {
140 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
141 auto *b = llvm::unwrap(ctx->builder);
142 llvm::FastMathFlags flags = b->getFastMathFlags();
143
144 flags.setNoSignedZeros();
145 b->setFastMathFlags(flags);
146 }
147 }
148
149 LLVMTargetLibraryInfoRef
150 ac_create_target_library_info(const char *triple)
151 {
152 return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
153 }
154
155 void
156 ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
157 {
158 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
159 }
160
161 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
162 * better compatibility with C code. */
163 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
164 char *buffer;
165 size_t written;
166 size_t bufsize;
167
168 raw_memory_ostream()
169 {
170 buffer = NULL;
171 written = 0;
172 bufsize = 0;
173 SetUnbuffered();
174 }
175
176 ~raw_memory_ostream()
177 {
178 free(buffer);
179 }
180
181 void clear()
182 {
183 written = 0;
184 }
185
186 void take(char *&out_buffer, size_t &out_size)
187 {
188 out_buffer = buffer;
189 out_size = written;
190 buffer = NULL;
191 written = 0;
192 bufsize = 0;
193 }
194
195 void flush() = delete;
196
197 void write_impl(const char *ptr, size_t size) override
198 {
199 if (unlikely(written + size < written))
200 abort();
201 if (written + size > bufsize) {
202 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
203 buffer = (char *)realloc(buffer, bufsize);
204 if (!buffer) {
205 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
206 abort();
207 }
208 }
209 memcpy(buffer + written, ptr, size);
210 written += size;
211 }
212
213 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
214 {
215 assert(offset == (size_t)offset &&
216 offset + size >= offset && offset + size <= written);
217 memcpy(buffer + offset, ptr, size);
218 }
219
220 uint64_t current_pos() const override
221 {
222 return written;
223 }
224 };
225
226 /* The LLVM compiler is represented as a pass manager containing passes for
227 * optimizations, instruction selection, and code generation.
228 */
229 struct ac_compiler_passes {
230 raw_memory_ostream ostream; /* ELF shader binary stream */
231 llvm::legacy::PassManager passmgr; /* list of passes */
232 };
233
234 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
235 {
236 struct ac_compiler_passes *p = new ac_compiler_passes();
237 if (!p)
238 return NULL;
239
240 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
241
242 if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
243 nullptr,
244 #if LLVM_VERSION_MAJOR >= 10
245 llvm::CGFT_ObjectFile)) {
246 #else
247 llvm::TargetMachine::CGFT_ObjectFile)) {
248 #endif
249 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
250 delete p;
251 return NULL;
252 }
253 return p;
254 }
255
256 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
257 {
258 delete p;
259 }
260
261 /* This returns false on failure. */
262 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
263 char **pelf_buffer, size_t *pelf_size)
264 {
265 p->passmgr.run(*llvm::unwrap(module));
266 p->ostream.take(*pelf_buffer, *pelf_size);
267 return true;
268 }
269
270 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
271 {
272 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
273 }
274
275 void ac_enable_global_isel(LLVMTargetMachineRef tm)
276 {
277 reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
278 }
279
280 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
281 LLVMValueRef ptr, LLVMValueRef val,
282 const char *sync_scope) {
283 llvm::AtomicRMWInst::BinOp binop;
284 switch (op) {
285 case LLVMAtomicRMWBinOpXchg:
286 binop = llvm::AtomicRMWInst::Xchg;
287 break;
288 case LLVMAtomicRMWBinOpAdd:
289 binop = llvm::AtomicRMWInst::Add;
290 break;
291 case LLVMAtomicRMWBinOpSub:
292 binop = llvm::AtomicRMWInst::Sub;
293 break;
294 case LLVMAtomicRMWBinOpAnd:
295 binop = llvm::AtomicRMWInst::And;
296 break;
297 case LLVMAtomicRMWBinOpNand:
298 binop = llvm::AtomicRMWInst::Nand;
299 break;
300 case LLVMAtomicRMWBinOpOr:
301 binop = llvm::AtomicRMWInst::Or;
302 break;
303 case LLVMAtomicRMWBinOpXor:
304 binop = llvm::AtomicRMWInst::Xor;
305 break;
306 case LLVMAtomicRMWBinOpMax:
307 binop = llvm::AtomicRMWInst::Max;
308 break;
309 case LLVMAtomicRMWBinOpMin:
310 binop = llvm::AtomicRMWInst::Min;
311 break;
312 case LLVMAtomicRMWBinOpUMax:
313 binop = llvm::AtomicRMWInst::UMax;
314 break;
315 case LLVMAtomicRMWBinOpUMin:
316 binop = llvm::AtomicRMWInst::UMin;
317 break;
318 default:
319 unreachable(!"invalid LLVMAtomicRMWBinOp");
320 break;
321 }
322 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
323 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
324 binop, llvm::unwrap(ptr), llvm::unwrap(val),
325 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
326 }
327
328 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
329 LLVMValueRef cmp, LLVMValueRef val,
330 const char *sync_scope) {
331 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
332 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
333 llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
334 llvm::AtomicOrdering::SequentiallyConsistent,
335 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
336 }