--- /dev/null
-noinst_PROGRAMS = glsl_compiler
+ #
+ # Copyright © 2012 Jon TURNEY
+ # Copyright (C) 2015 Intel Corporation
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+
+ include Makefile.sources
+
+ AM_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/mapi \
+ -I$(top_srcdir)/src/mesa/ \
+ -I$(top_builddir)/src/compiler/glsl\
+ -I$(top_srcdir)/src/compiler/glsl\
+ -I$(top_srcdir)/src/compiler/glsl/glcpp\
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gtest/include \
+ $(DEFINES)
+
+ AM_CFLAGS = \
+ $(VISIBILITY_CFLAGS) \
+ $(MSVC2013_COMPAT_CFLAGS)
+
+ AM_CXXFLAGS = \
+ $(VISIBILITY_CXXFLAGS) \
+ $(MSVC2013_COMPAT_CXXFLAGS)
+
+ noinst_LTLIBRARIES = libcompiler.la
+
+ libcompiler_la_SOURCES = $(LIBCOMPILER_FILES)
+
+ check_PROGRAMS =
+ TESTS =
+ BUILT_SOURCES =
+ CLEANFILES =
+ EXTRA_DIST = SConscript
+
+
+ EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \
+ glsl/TODO glsl/glcpp/README \
+ glsl/glsl_lexer.ll \
+ glsl/glsl_parser.yy \
+ glsl/glcpp/glcpp-lex.l \
+ glsl/glcpp/glcpp-parse.y \
+ glsl/Makefile.sources \
+ glsl/SConscript
+
+ TESTS += glsl/glcpp/tests/glcpp-test \
+ glsl/glcpp/tests/glcpp-test-cr-lf \
+ glsl/tests/blob-test \
+ glsl/tests/general-ir-test \
+ glsl/tests/optimization-test \
+ glsl/tests/sampler-types-test \
+ glsl/tests/uniform-initializer-test
+
+ TESTS_ENVIRONMENT= \
+ export PYTHON2=$(PYTHON2); \
+ export PYTHON_FLAGS=$(PYTHON_FLAGS);
+
+ check_PROGRAMS += \
+ glsl/glcpp/glcpp \
+ glsl/glsl_test \
+ glsl/tests/blob-test \
+ glsl/tests/general-ir-test \
+ glsl/tests/sampler-types-test \
+ glsl/tests/uniform-initializer-test
+
++noinst_PROGRAMS = glsl_compiler spirv2nir
+
+ glsl_tests_blob_test_SOURCES = \
+ glsl/tests/blob_test.c
+ glsl_tests_blob_test_LDADD = \
+ glsl/libglsl.la
+
+ glsl_tests_general_ir_test_SOURCES = \
+ glsl/standalone_scaffolding.cpp \
+ glsl/tests/builtin_variable_test.cpp \
+ glsl/tests/invalidate_locations_test.cpp \
+ glsl/tests/general_ir_test.cpp \
+ glsl/tests/varyings_test.cpp
+ glsl_tests_general_ir_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ glsl_tests_general_ir_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+ glsl_tests_uniform_initializer_test_SOURCES = \
+ glsl/tests/copy_constant_to_storage_tests.cpp \
+ glsl/tests/set_uniform_initializer_tests.cpp \
+ glsl/tests/uniform_initializer_utils.cpp \
+ glsl/tests/uniform_initializer_utils.h
+ glsl_tests_uniform_initializer_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ glsl_tests_uniform_initializer_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+ glsl_tests_sampler_types_test_SOURCES = \
+ glsl/tests/sampler_types_test.cpp
+ glsl_tests_sampler_types_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ glsl_tests_sampler_types_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+ noinst_LTLIBRARIES += glsl/libglsl.la glsl/libglcpp.la
+
+ glsl_libglcpp_la_LIBADD = \
+ $(top_builddir)/src/util/libmesautil.la
+ glsl_libglcpp_la_SOURCES = \
+ glsl/glcpp/glcpp-lex.c \
+ glsl/glcpp/glcpp-parse.c \
+ glsl/glcpp/glcpp-parse.h \
+ $(LIBGLCPP_FILES)
+
+ glsl_glcpp_glcpp_SOURCES = \
+ glsl/glcpp/glcpp.c
+ glsl_glcpp_glcpp_LDADD = \
+ glsl/libglcpp.la \
+ $(top_builddir)/src/libglsl_util.la \
+ -lm
+
+ glsl_libglsl_la_LIBADD = \
+ nir/libnir.la \
+ glsl/libglcpp.la
+
+ glsl_libglsl_la_SOURCES = \
+ glsl/glsl_lexer.cpp \
+ glsl/glsl_parser.cpp \
+ glsl/glsl_parser.h \
+ $(LIBGLSL_FILES)
+
+
+ glsl_compiler_SOURCES = \
+ $(GLSL_COMPILER_CXX_FILES)
+
+ glsl_compiler_LDADD = \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
+
+ glsl_glsl_test_SOURCES = \
+ glsl/standalone_scaffolding.cpp \
+ glsl/test.cpp \
+ glsl/test_optpass.cpp \
+ glsl/test_optpass.h
+
+ glsl_glsl_test_LDADD = \
+ glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
++spirv2nir_SOURCES = \
++ nir/spirv2nir.c
++
++spirv2nir_LDADD = \
++ nir/libnir.la \
++ $(top_builddir)/src/util/libmesautil.la \
++ -lm -lstdc++ \
++ $(PTHREAD_LIBS)
++
+ # We write our own rules for yacc and lex below. We'd rather use automake,
+ # but automake makes it especially difficult for a number of reasons:
+ #
+ # * < automake-1.12 generates .h files from .yy and .ypp files, but
+ # >=automake-1.12 generates .hh and .hpp files respectively. There's no
+ # good way of making a project that uses C++ yacc files compatible with
+ # both versions of automake. Strong work automake developers.
+ #
+ # * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
+ # we'd like the resulting generated code to also go in glcpp/ for purposes
+ # of distribution. Automake gives no way to do this.
+ #
+ # * Since we're building multiple yacc parsers into one library (and via one
+ # Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
+ # automake to name the resulting generated code as <library-name>_filename.c.
+ # Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
+
+ # In order to make build output print "LEX" and "YACC", we reproduce the
+ # automake variables below.
+
+ AM_V_LEX = $(am__v_LEX_$(V))
+ am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
+ am__v_LEX_0 = @echo " LEX " $@;
+ am__v_LEX_1 =
+
+ AM_V_YACC = $(am__v_YACC_$(V))
+ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
+ am__v_YACC_0 = @echo " YACC " $@;
+ am__v_YACC_1 =
+
+ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+ LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
+ glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
+ $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
+
+ glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
+ $(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
+
+ glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
+ $(MKDIR_GEN)
+ $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glsl/glcpp/glcpp-parse.h $(srcdir)/glsl/glcpp/glcpp-parse.y
+
+ glsl/glcpp/glcpp-lex.c: glsl/glcpp/glcpp-lex.l
+ $(MKDIR_GEN)
+ $(LEX_GEN) -o $@ $(srcdir)/glsl/glcpp/glcpp-lex.l
+
+ # Only the parsers (specifically the header files generated at the same time)
+ # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
+ # called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
+ # YACC is only executed once for each parser. The rest of the generated code
+ # will be created at the appropriate times according to standard automake
+ # dependency rules.
+ BUILT_SOURCES += \
+ glsl/glsl_parser.cpp \
+ glsl/glsl_lexer.cpp \
+ glsl/glcpp/glcpp-parse.c \
+ glsl/glcpp/glcpp-lex.c
+ CLEANFILES += \
+ glsl/glcpp/glcpp-parse.h \
+ glsl/glsl_parser.h \
+ glsl/glsl_parser.cpp \
+ glsl/glsl_lexer.cpp \
+ glsl/glcpp/glcpp-parse.c \
+ glsl/glcpp/glcpp-lex.c
+
+ clean-local:
+ $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
+
+ dist-hook:
+ $(RM) glsl/glcpp/tests/*.out
+ $(RM) glsl/glcpp/tests/subtest*/*.out
+
+ noinst_LTLIBRARIES += nir/libnir.la
+
+ nir_libnir_la_CPPFLAGS = \
+ $(AM_CPPFLAGS) \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_srcdir)/src/compiler/nir
+
+ nir_libnir_la_LIBADD = \
+ libcompiler.la
+
+ nir_libnir_la_SOURCES = \
+ $(NIR_FILES) \
++ $(SPIRV_FILES) \
+ $(NIR_GENERATED_FILES)
+
+ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+ nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false)
+
+ nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false)
+
+ nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false)
+
+ nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false)
+
+ nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false)
+
+
+ check_PROGRAMS += nir/tests/control_flow_tests
+
+ nir_tests_control_flow_tests_CPPFLAGS = \
+ $(AM_CPPFLAGS) \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_srcdir)/src/compiler/nir
+
+ nir_tests_control_flow_tests_SOURCES = \
+ nir/tests/control_flow_tests.cpp
+ nir_tests_control_flow_tests_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ nir_tests_control_flow_tests_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ nir/libnir.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
+
+
+ TESTS += nir/tests/control_flow_tests
+
+
+ BUILT_SOURCES += $(NIR_GENERATED_FILES)
+ CLEANFILES += $(NIR_GENERATED_FILES)
+
+ EXTRA_DIST += \
+ nir/nir_algebraic.py \
+ nir/nir_builder_opcodes_h.py \
+ nir/nir_constant_expressions.py \
+ nir/nir_opcodes.py \
+ nir/nir_opcodes_c.py \
+ nir/nir_opcodes_h.py \
+ nir/nir_opt_algebraic.py \
+ nir/tests \
+ nir/Makefile.sources
--- /dev/null
+ LIBCOMPILER_FILES = \
+ builtin_type_macros.h \
+ glsl_types.cpp \
+ glsl_types.h \
+ nir_types.cpp \
+ nir_types.h \
+ shader_enums.c \
+ shader_enums.h
+
+ # libglsl
+
+ LIBGLSL_FILES = \
+ glsl/ast.h \
+ glsl/ast_array_index.cpp \
+ glsl/ast_expr.cpp \
+ glsl/ast_function.cpp \
+ glsl/ast_to_hir.cpp \
+ glsl/ast_type.cpp \
+ glsl/blob.c \
+ glsl/blob.h \
+ glsl/builtin_functions.cpp \
+ glsl/builtin_types.cpp \
+ glsl/builtin_variables.cpp \
+ glsl/glsl_parser_extras.cpp \
+ glsl/glsl_parser_extras.h \
+ glsl/glsl_symbol_table.cpp \
+ glsl/glsl_symbol_table.h \
+ glsl/hir_field_selection.cpp \
+ glsl/ir_basic_block.cpp \
+ glsl/ir_basic_block.h \
+ glsl/ir_builder.cpp \
+ glsl/ir_builder.h \
+ glsl/ir_clone.cpp \
+ glsl/ir_constant_expression.cpp \
+ glsl/ir.cpp \
+ glsl/ir.h \
+ glsl/ir_equals.cpp \
+ glsl/ir_expression_flattening.cpp \
+ glsl/ir_expression_flattening.h \
+ glsl/ir_function_can_inline.cpp \
+ glsl/ir_function_detect_recursion.cpp \
+ glsl/ir_function_inlining.h \
+ glsl/ir_function.cpp \
+ glsl/ir_hierarchical_visitor.cpp \
+ glsl/ir_hierarchical_visitor.h \
+ glsl/ir_hv_accept.cpp \
+ glsl/ir_import_prototypes.cpp \
+ glsl/ir_optimization.h \
+ glsl/ir_print_visitor.cpp \
+ glsl/ir_print_visitor.h \
+ glsl/ir_reader.cpp \
+ glsl/ir_reader.h \
+ glsl/ir_rvalue_visitor.cpp \
+ glsl/ir_rvalue_visitor.h \
+ glsl/ir_set_program_inouts.cpp \
+ glsl/ir_uniform.h \
+ glsl/ir_validate.cpp \
+ glsl/ir_variable_refcount.cpp \
+ glsl/ir_variable_refcount.h \
+ glsl/ir_visitor.h \
+ glsl/linker.cpp \
+ glsl/linker.h \
+ glsl/link_atomics.cpp \
+ glsl/link_functions.cpp \
+ glsl/link_interface_blocks.cpp \
+ glsl/link_uniforms.cpp \
+ glsl/link_uniform_initializers.cpp \
+ glsl/link_uniform_block_active_visitor.cpp \
+ glsl/link_uniform_block_active_visitor.h \
+ glsl/link_uniform_blocks.cpp \
+ glsl/link_varyings.cpp \
+ glsl/link_varyings.h \
+ glsl/list.h \
+ glsl/loop_analysis.cpp \
+ glsl/loop_analysis.h \
+ glsl/loop_controls.cpp \
+ glsl/loop_unroll.cpp \
+ glsl/lower_buffer_access.cpp \
+ glsl/lower_buffer_access.h \
+ glsl/lower_clip_distance.cpp \
+ glsl/lower_const_arrays_to_uniforms.cpp \
+ glsl/lower_discard.cpp \
+ glsl/lower_discard_flow.cpp \
+ glsl/lower_if_to_cond_assign.cpp \
+ glsl/lower_instructions.cpp \
+ glsl/lower_jumps.cpp \
+ glsl/lower_mat_op_to_vec.cpp \
+ glsl/lower_noise.cpp \
+ glsl/lower_offset_array.cpp \
+ glsl/lower_packed_varyings.cpp \
+ glsl/lower_named_interface_blocks.cpp \
+ glsl/lower_packing_builtins.cpp \
+ glsl/lower_subroutine.cpp \
+ glsl/lower_tess_level.cpp \
+ glsl/lower_texture_projection.cpp \
+ glsl/lower_variable_index_to_cond_assign.cpp \
+ glsl/lower_vec_index_to_cond_assign.cpp \
+ glsl/lower_vec_index_to_swizzle.cpp \
+ glsl/lower_vector.cpp \
+ glsl/lower_vector_derefs.cpp \
+ glsl/lower_vector_insert.cpp \
+ glsl/lower_vertex_id.cpp \
+ glsl/lower_output_reads.cpp \
+ glsl/lower_shared_reference.cpp \
+ glsl/lower_ubo_reference.cpp \
+ glsl/opt_algebraic.cpp \
+ glsl/opt_array_splitting.cpp \
+ glsl/opt_conditional_discard.cpp \
+ glsl/opt_constant_folding.cpp \
+ glsl/opt_constant_propagation.cpp \
+ glsl/opt_constant_variable.cpp \
+ glsl/opt_copy_propagation.cpp \
+ glsl/opt_copy_propagation_elements.cpp \
+ glsl/opt_dead_builtin_variables.cpp \
+ glsl/opt_dead_builtin_varyings.cpp \
+ glsl/opt_dead_code.cpp \
+ glsl/opt_dead_code_local.cpp \
+ glsl/opt_dead_functions.cpp \
+ glsl/opt_flatten_nested_if_blocks.cpp \
+ glsl/opt_flip_matrices.cpp \
+ glsl/opt_function_inlining.cpp \
+ glsl/opt_if_simplification.cpp \
+ glsl/opt_minmax.cpp \
+ glsl/opt_noop_swizzle.cpp \
+ glsl/opt_rebalance_tree.cpp \
+ glsl/opt_redundant_jumps.cpp \
+ glsl/opt_structure_splitting.cpp \
+ glsl/opt_swizzle_swizzle.cpp \
+ glsl/opt_tree_grafting.cpp \
+ glsl/opt_vectorize.cpp \
+ glsl/program.h \
+ glsl/s_expression.cpp \
+ glsl/s_expression.h
+
+ # glsl_compiler
+
+ GLSL_COMPILER_CXX_FILES = \
+ glsl/standalone_scaffolding.cpp \
+ glsl/standalone_scaffolding.h \
+ glsl/main.cpp
+
+ # libglsl generated sources
+ LIBGLSL_GENERATED_CXX_FILES = \
+ glsl/glsl_lexer.cpp \
+ glsl/glsl_parser.cpp
+
+ # libglcpp
+
+ LIBGLCPP_FILES = \
+ glsl/glcpp/glcpp.h \
+ glsl/glcpp/pp.c
+
+ LIBGLCPP_GENERATED_FILES = \
+ glsl/glcpp/glcpp-lex.c \
+ glsl/glcpp/glcpp-parse.c
+
+ NIR_GENERATED_FILES = \
+ nir/nir_builder_opcodes.h \
+ nir/nir_constant_expressions.c \
+ nir/nir_opcodes.c \
+ nir/nir_opcodes.h \
+ nir/nir_opt_algebraic.c
+
+ NIR_FILES = \
+ nir/glsl_to_nir.cpp \
+ nir/glsl_to_nir.h \
+ nir/nir.c \
+ nir/nir.h \
+ nir/nir_array.h \
+ nir/nir_builder.h \
+ nir/nir_clone.c \
+ nir/nir_constant_expressions.h \
+ nir/nir_control_flow.c \
+ nir/nir_control_flow.h \
+ nir/nir_control_flow_private.h \
+ nir/nir_dominance.c \
+ nir/nir_from_ssa.c \
++ nir/nir_gather_info.c \
+ nir/nir_gs_count_vertices.c \
++ nir/nir_inline_functions.c \
+ nir/nir_intrinsics.c \
+ nir/nir_intrinsics.h \
+ nir/nir_instr_set.c \
+ nir/nir_instr_set.h \
+ nir/nir_liveness.c \
+ nir/nir_lower_alu_to_scalar.c \
+ nir/nir_lower_atomics.c \
+ nir/nir_lower_clip.c \
+ nir/nir_lower_global_vars_to_local.c \
+ nir/nir_lower_gs_intrinsics.c \
++ nir/nir_lower_indirect_derefs.c \
+ nir/nir_lower_load_const_to_scalar.c \
+ nir/nir_lower_locals_to_regs.c \
+ nir/nir_lower_idiv.c \
+ nir/nir_lower_io.c \
+ nir/nir_lower_outputs_to_temporaries.c \
+ nir/nir_lower_phis_to_scalar.c \
++ nir/nir_lower_returns.c \
+ nir/nir_lower_samplers.c \
+ nir/nir_lower_system_values.c \
+ nir/nir_lower_tex.c \
+ nir/nir_lower_to_source_mods.c \
+ nir/nir_lower_two_sided_color.c \
+ nir/nir_lower_vars_to_ssa.c \
+ nir/nir_lower_var_copies.c \
+ nir/nir_lower_vec_to_movs.c \
+ nir/nir_metadata.c \
+ nir/nir_move_vec_src_uses_to_dest.c \
+ nir/nir_normalize_cubemap_coords.c \
+ nir/nir_opt_constant_folding.c \
+ nir/nir_opt_copy_propagate.c \
+ nir/nir_opt_cse.c \
+ nir/nir_opt_dce.c \
+ nir/nir_opt_dead_cf.c \
+ nir/nir_opt_gcm.c \
+ nir/nir_opt_global_to_local.c \
+ nir/nir_opt_peephole_select.c \
+ nir/nir_opt_remove_phis.c \
+ nir/nir_opt_undef.c \
++ nir/nir_phi_builder.c \
++ nir/nir_phi_builder.h \
+ nir/nir_print.c \
+ nir/nir_remove_dead_variables.c \
++ nir/nir_repair_ssa.c \
+ nir/nir_search.c \
+ nir/nir_search.h \
+ nir/nir_split_var_copies.c \
+ nir/nir_sweep.c \
+ nir/nir_to_ssa.c \
+ nir/nir_validate.c \
+ nir/nir_vla.h \
+ nir/nir_worklist.c \
+ nir/nir_worklist.h
++
++SPIRV_FILES = \
++ nir/spirv/nir_spirv.h \
++ nir/spirv/spirv_to_nir.c \
++ nir/spirv/vtn_alu.c \
++ nir/spirv/vtn_cfg.c \
++ nir/spirv/vtn_glsl450.c \
++ nir/spirv/vtn_private.h \
++ nir/spirv/vtn_variables.c
--- /dev/null
+ glsl_compiler
+ glsl_lexer.cpp
+ glsl_parser.cpp
+ glsl_parser.h
+ glsl_parser.output
+ glsl_test
++spirv2nir
+ subtest-cr/
+ subtest-lf/
+ subtest-cr-lf/
+ subtest-lf-cr/
--- /dev/null
-noinst_PROGRAMS = glsl_compiler
+ # Copyright © 2012 Jon TURNEY
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+
+ AM_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/mapi \
+ -I$(top_srcdir)/src/mesa/ \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/glsl/glcpp \
+ -I$(top_srcdir)/src/gtest/include \
+ $(DEFINES)
+ AM_CFLAGS = \
+ $(VISIBILITY_CFLAGS) \
+ $(MSVC2013_COMPAT_CFLAGS)
+ AM_CXXFLAGS = \
+ $(VISIBILITY_CXXFLAGS) \
+ $(MSVC2013_COMPAT_CXXFLAGS)
+
+ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \
+ glsl_lexer.ll \
+ glsl_parser.yy \
+ glcpp/glcpp-lex.l \
+ glcpp/glcpp-parse.y \
+ SConscript
+
+ include Makefile.sources
+
+ TESTS = glcpp/tests/glcpp-test \
+ glcpp/tests/glcpp-test-cr-lf \
+ tests/blob-test \
+ tests/general-ir-test \
+ tests/optimization-test \
+ tests/sampler-types-test \
+ tests/uniform-initializer-test
+
+ TESTS_ENVIRONMENT= \
+ export PYTHON2=$(PYTHON2); \
+ export PYTHON_FLAGS=$(PYTHON_FLAGS);
+
+ noinst_LTLIBRARIES = libglsl.la libglcpp.la
+ check_PROGRAMS = \
+ glcpp/glcpp \
+ glsl_test \
+ tests/blob-test \
+ tests/general-ir-test \
+ tests/sampler-types-test \
+ tests/uniform-initializer-test
+
-
++noinst_PROGRAMS = glsl_compiler spirv2nir
+
+ tests_blob_test_SOURCES = \
+ tests/blob_test.c
+ tests_blob_test_LDADD = \
+ $(top_builddir)/src/glsl/libglsl.la
+
+ tests_general_ir_test_SOURCES = \
+ standalone_scaffolding.cpp \
+ tests/builtin_variable_test.cpp \
+ tests/invalidate_locations_test.cpp \
+ tests/general_ir_test.cpp \
+ tests/varyings_test.cpp
+ tests_general_ir_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ tests_general_ir_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(top_builddir)/src/glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+ tests_uniform_initializer_test_SOURCES = \
+ tests/copy_constant_to_storage_tests.cpp \
+ tests/set_uniform_initializer_tests.cpp \
+ tests/uniform_initializer_utils.cpp \
+ tests/uniform_initializer_utils.h
+ tests_uniform_initializer_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ tests_uniform_initializer_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(top_builddir)/src/glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+ tests_sampler_types_test_SOURCES = \
+ tests/sampler_types_test.cpp
+ tests_sampler_types_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ tests_sampler_types_test_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(top_builddir)/src/glsl/libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+ libglcpp_la_LIBADD = \
+ $(top_builddir)/src/util/libmesautil.la
+ libglcpp_la_SOURCES = \
+ glcpp/glcpp-lex.c \
+ glcpp/glcpp-parse.c \
+ glcpp/glcpp-parse.h \
+ $(LIBGLCPP_FILES)
+
+ glcpp_glcpp_SOURCES = \
+ glcpp/glcpp.c
+ glcpp_glcpp_LDADD = \
+ libglcpp.la \
+ $(top_builddir)/src/libglsl_util.la \
+ -lm
+
+ libglsl_la_LIBADD = \
+ $(top_builddir)/src/compiler/nir/libnir.la \
+ libglcpp.la
+
+ libglsl_la_SOURCES = \
+ glsl_lexer.cpp \
+ glsl_parser.cpp \
+ glsl_parser.h \
+ $(LIBGLSL_FILES)
+
+ glsl_compiler_SOURCES = \
+ $(GLSL_COMPILER_CXX_FILES)
+
+ glsl_compiler_LDADD = \
+ libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
+
++spirv2nir_SOURCES = \
++ standalone_scaffolding.cpp \
++ standalone_scaffolding.h \
++ nir/spirv2nir.c
++
++spirv2nir_LDADD = \
++ libglsl.la \
++ $(top_builddir)/src/libglsl_util.la \
++ $(PTHREAD_LIBS)
++
+ glsl_test_SOURCES = \
+ standalone_scaffolding.cpp \
+ test.cpp \
+ test_optpass.cpp \
+ test_optpass.h
+
+ glsl_test_LDADD = \
+ libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+ # We write our own rules for yacc and lex below. We'd rather use automake,
+ # but automake makes it especially difficult for a number of reasons:
+ #
+ # * < automake-1.12 generates .h files from .yy and .ypp files, but
+ # >=automake-1.12 generates .hh and .hpp files respectively. There's no
+ # good way of making a project that uses C++ yacc files compatible with
+ # both versions of automake. Strong work automake developers.
+ #
+ # * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
+ # we'd like the resulting generated code to also go in glcpp/ for purposes
+ # of distribution. Automake gives no way to do this.
+ #
+ # * Since we're building multiple yacc parsers into one library (and via one
+ # Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
+ # automake to name the resulting generated code as <library-name>_filename.c.
+ # Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
+
+ # In order to make build output print "LEX" and "YACC", we reproduce the
+ # automake variables below.
+
+ AM_V_LEX = $(am__v_LEX_$(V))
+ am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
+ am__v_LEX_0 = @echo " LEX " $@;
+ am__v_LEX_1 =
+
+ AM_V_YACC = $(am__v_YACC_$(V))
+ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
+ am__v_YACC_0 = @echo " YACC " $@;
+ am__v_YACC_1 =
+
+ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+ LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
+ glsl_parser.cpp glsl_parser.h: glsl_parser.yy
+ $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
+
+ glsl_lexer.cpp: glsl_lexer.ll
+ $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
+
+ glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
+ $(MKDIR_GEN)
+ $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
+
+ glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
+ $(MKDIR_GEN)
+ $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
+
+ # Only the parsers (specifically the header files generated at the same time)
+ # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
+ # called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
+ # YACC is only executed once for each parser. The rest of the generated code
+ # will be created at the appropriate times according to standard automake
+ # dependency rules.
+ BUILT_SOURCES = \
+ glsl_parser.cpp \
+ glsl_lexer.cpp \
+ glcpp/glcpp-parse.c \
+ glcpp/glcpp-lex.c
+ CLEANFILES = \
+ glcpp/glcpp-parse.h \
+ glsl_parser.h \
+ $(BUILT_SOURCES)
+
+ clean-local:
+ $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
+
+ dist-hook:
+ $(RM) glcpp/tests/*.out
+ $(RM) glcpp/tests/subtest*/*.out
--- /dev/null
+ # shared source lists for Makefile, SConscript, and Android.mk
+
+ # libglcpp
+
+ LIBGLCPP_FILES = \
+ glcpp/glcpp.h \
+ glcpp/pp.c
+
+ LIBGLCPP_GENERATED_FILES = \
+ glcpp/glcpp-lex.c \
+ glcpp/glcpp-parse.c
+
+ NIR_GENERATED_FILES = \
+ nir/nir_builder_opcodes.h \
+ nir/nir_constant_expressions.c \
+ nir/nir_opcodes.c \
+ nir/nir_opcodes.h \
+ nir/nir_opt_algebraic.c
+
+ NIR_FILES = \
+ nir/nir.c \
+ nir/nir.h \
+ nir/nir_array.h \
+ nir/nir_builder.h \
+ nir/nir_clone.c \
+ nir/nir_constant_expressions.h \
+ nir/nir_control_flow.c \
+ nir/nir_control_flow.h \
+ nir/nir_control_flow_private.h \
+ nir/nir_dominance.c \
+ nir/nir_from_ssa.c \
++ nir/nir_gather_info.c \
+ nir/nir_gs_count_vertices.c \
++ nir/nir_inline_functions.c \
+ nir/nir_intrinsics.c \
+ nir/nir_intrinsics.h \
+ nir/nir_instr_set.c \
+ nir/nir_instr_set.h \
+ nir/nir_liveness.c \
+ nir/nir_lower_alu_to_scalar.c \
+ nir/nir_lower_atomics.c \
+ nir/nir_lower_clip.c \
++ nir/nir_lower_returns.c \
+ nir/nir_lower_global_vars_to_local.c \
+ nir/nir_lower_gs_intrinsics.c \
++ nir/nir_lower_indirect_derefs.c \
+ nir/nir_lower_load_const_to_scalar.c \
+ nir/nir_lower_locals_to_regs.c \
+ nir/nir_lower_idiv.c \
+ nir/nir_lower_io.c \
+ nir/nir_lower_outputs_to_temporaries.c \
+ nir/nir_lower_phis_to_scalar.c \
+ nir/nir_lower_samplers.c \
+ nir/nir_lower_system_values.c \
+ nir/nir_lower_tex.c \
+ nir/nir_lower_to_source_mods.c \
+ nir/nir_lower_two_sided_color.c \
+ nir/nir_lower_vars_to_ssa.c \
+ nir/nir_lower_var_copies.c \
+ nir/nir_lower_vec_to_movs.c \
+ nir/nir_metadata.c \
+ nir/nir_move_vec_src_uses_to_dest.c \
+ nir/nir_normalize_cubemap_coords.c \
+ nir/nir_opt_constant_folding.c \
+ nir/nir_opt_copy_propagate.c \
+ nir/nir_opt_cse.c \
+ nir/nir_opt_dce.c \
+ nir/nir_opt_dead_cf.c \
+ nir/nir_opt_gcm.c \
+ nir/nir_opt_global_to_local.c \
+ nir/nir_opt_peephole_select.c \
+ nir/nir_opt_remove_phis.c \
+ nir/nir_opt_undef.c \
++ nir/nir_phi_builder.c \
++ nir/nir_phi_builder.h \
+ nir/nir_print.c \
+ nir/nir_remove_dead_variables.c \
++ nir/nir_repair_ssa.c \
+ nir/nir_search.c \
+ nir/nir_search.h \
+ nir/nir_split_var_copies.c \
+ nir/nir_sweep.c \
+ nir/nir_to_ssa.c \
+ nir/nir_validate.c \
+ nir/nir_vla.h \
+ nir/nir_worklist.c \
+ nir/nir_worklist.h
+
++SPIRV_FILES = \
++ nir/spirv/nir_spirv.h \
++ nir/spirv/spirv_to_nir.c \
++ nir/spirv/vtn_alu.c \
++ nir/spirv/vtn_cfg.c \
++ nir/spirv/vtn_glsl450.c \
++ nir/spirv/vtn_private.h \
++ nir/spirv/vtn_variables.c
++
+ # libglsl
+
+ LIBGLSL_FILES = \
+ ast.h \
+ ast_array_index.cpp \
+ ast_expr.cpp \
+ ast_function.cpp \
+ ast_to_hir.cpp \
+ ast_type.cpp \
+ blob.c \
+ blob.h \
+ builtin_functions.cpp \
+ builtin_types.cpp \
+ builtin_variables.cpp \
+ glsl_parser_extras.cpp \
+ glsl_parser_extras.h \
+ glsl_symbol_table.cpp \
+ glsl_symbol_table.h \
+ hir_field_selection.cpp \
+ ir_basic_block.cpp \
+ ir_basic_block.h \
+ ir_builder.cpp \
+ ir_builder.h \
+ ir_clone.cpp \
+ ir_constant_expression.cpp \
+ ir.cpp \
+ ir.h \
+ ir_equals.cpp \
+ ir_expression_flattening.cpp \
+ ir_expression_flattening.h \
+ ir_function_can_inline.cpp \
+ ir_function_detect_recursion.cpp \
+ ir_function_inlining.h \
+ ir_function.cpp \
+ ir_hierarchical_visitor.cpp \
+ ir_hierarchical_visitor.h \
+ ir_hv_accept.cpp \
+ ir_import_prototypes.cpp \
+ ir_optimization.h \
+ ir_print_visitor.cpp \
+ ir_print_visitor.h \
+ ir_reader.cpp \
+ ir_reader.h \
+ ir_rvalue_visitor.cpp \
+ ir_rvalue_visitor.h \
+ ir_set_program_inouts.cpp \
+ ir_uniform.h \
+ ir_validate.cpp \
+ ir_variable_refcount.cpp \
+ ir_variable_refcount.h \
+ ir_visitor.h \
+ linker.cpp \
+ linker.h \
+ link_atomics.cpp \
+ link_functions.cpp \
+ link_interface_blocks.cpp \
+ link_uniforms.cpp \
+ link_uniform_initializers.cpp \
+ link_uniform_block_active_visitor.cpp \
+ link_uniform_block_active_visitor.h \
+ link_uniform_blocks.cpp \
+ link_varyings.cpp \
+ link_varyings.h \
+ list.h \
+ loop_analysis.cpp \
+ loop_analysis.h \
+ loop_controls.cpp \
+ loop_unroll.cpp \
+ lower_buffer_access.cpp \
+ lower_buffer_access.h \
+ lower_clip_distance.cpp \
+ lower_const_arrays_to_uniforms.cpp \
+ lower_discard.cpp \
+ lower_discard_flow.cpp \
+ lower_if_to_cond_assign.cpp \
+ lower_instructions.cpp \
+ lower_jumps.cpp \
+ lower_mat_op_to_vec.cpp \
+ lower_noise.cpp \
+ lower_offset_array.cpp \
+ lower_packed_varyings.cpp \
+ lower_named_interface_blocks.cpp \
+ lower_packing_builtins.cpp \
+ lower_subroutine.cpp \
+ lower_tess_level.cpp \
+ lower_texture_projection.cpp \
+ lower_variable_index_to_cond_assign.cpp \
+ lower_vec_index_to_cond_assign.cpp \
+ lower_vec_index_to_swizzle.cpp \
+ lower_vector.cpp \
+ lower_vector_derefs.cpp \
+ lower_vector_insert.cpp \
+ lower_vertex_id.cpp \
+ lower_output_reads.cpp \
+ lower_shared_reference.cpp \
+ lower_ubo_reference.cpp \
+ opt_algebraic.cpp \
+ opt_array_splitting.cpp \
+ opt_conditional_discard.cpp \
+ opt_constant_folding.cpp \
+ opt_constant_propagation.cpp \
+ opt_constant_variable.cpp \
+ opt_copy_propagation.cpp \
+ opt_copy_propagation_elements.cpp \
+ opt_dead_builtin_variables.cpp \
+ opt_dead_builtin_varyings.cpp \
+ opt_dead_code.cpp \
+ opt_dead_code_local.cpp \
+ opt_dead_functions.cpp \
+ opt_flatten_nested_if_blocks.cpp \
+ opt_flip_matrices.cpp \
+ opt_function_inlining.cpp \
+ opt_if_simplification.cpp \
+ opt_minmax.cpp \
+ opt_noop_swizzle.cpp \
+ opt_rebalance_tree.cpp \
+ opt_redundant_jumps.cpp \
+ opt_structure_splitting.cpp \
+ opt_swizzle_swizzle.cpp \
+ opt_tree_grafting.cpp \
+ opt_vectorize.cpp \
+ program.h \
+ s_expression.cpp \
+ s_expression.h
+
+ # glsl to nir pass
+ GLSL_TO_NIR_FILES = \
+ nir/glsl_to_nir.cpp \
+ nir/glsl_to_nir.h
+
+ # glsl_compiler
+
+ GLSL_COMPILER_CXX_FILES = \
+ standalone_scaffolding.cpp \
+ standalone_scaffolding.h \
+ main.cpp
+
+ # libglsl generated sources
+ LIBGLSL_GENERATED_CXX_FILES = \
+ glsl_lexer.cpp \
+ glsl_parser.cpp
--- /dev/null
+ /*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file ast_to_hir.c
+ * Convert abstract syntax to to high-level intermediate reprensentation (HIR).
+ *
+ * During the conversion to HIR, the majority of the symantic checking is
+ * preformed on the program. This includes:
+ *
+ * * Symbol table management
+ * * Type checking
+ * * Function binding
+ *
+ * The majority of this work could be done during parsing, and the parser could
+ * probably generate HIR directly. However, this results in frequent changes
+ * to the parser code. Since we do not assume that every system this complier
+ * is built on will have Flex and Bison installed, we have to store the code
+ * generated by these tools in our version control system. In other parts of
+ * the system we've seen problems where a parser was changed but the generated
+ * code was not committed, merge conflicts where created because two developers
+ * had slightly different versions of Bison installed, etc.
+ *
+ * I have also noticed that running Bison generated parsers in GDB is very
+ * irritating. When you get a segfault on '$$ = $1->foo', you can't very
+ * well 'print $1' in GDB.
+ *
+ * As a result, my preference is to put as little C code as possible in the
+ * parser (and lexer) sources.
+ */
+
+ #include "glsl_symbol_table.h"
+ #include "glsl_parser_extras.h"
+ #include "ast.h"
+ #include "compiler/glsl_types.h"
+ #include "program/hash_table.h"
+ #include "main/shaderobj.h"
+ #include "ir.h"
+ #include "ir_builder.h"
+
+ using namespace ir_builder;
+
+ static void
+ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
+ exec_list *instructions);
+ static void
+ remove_per_vertex_blocks(exec_list *instructions,
+ _mesa_glsl_parse_state *state, ir_variable_mode mode);
+
+ /**
+ * Visitor class that finds the first instance of any write-only variable that
+ * is ever read, if any
+ */
+ class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+ {
+ public:
+ read_from_write_only_variable_visitor() : found(NULL)
+ {
+ }
+
+ virtual ir_visitor_status visit(ir_dereference_variable *ir)
+ {
+ if (this->in_assignee)
+ return visit_continue;
+
+ ir_variable *var = ir->variable_referenced();
+ /* We can have image_write_only set on both images and buffer variables,
+ * but in the former there is a distinction between reads from
+ * the variable itself (write_only) and from the memory they point to
+ * (image_write_only), while in the case of buffer variables there is
+ * no such distinction, that is why this check here is limited to
+ * buffer variables alone.
+ */
+ if (!var || var->data.mode != ir_var_shader_storage)
+ return visit_continue;
+
+ if (var->data.image_write_only) {
+ found = var;
+ return visit_stop;
+ }
+
+ return visit_continue;
+ }
+
+ ir_variable *get_variable() {
+ return found;
+ }
+
+ virtual ir_visitor_status visit_enter(ir_expression *ir)
+ {
+ /* .length() doesn't actually read anything */
+ if (ir->operation == ir_unop_ssbo_unsized_array_length)
+ return visit_continue_with_parent;
+
+ return visit_continue;
+ }
+
+ private:
+ ir_variable *found;
+ };
+
+ void
+ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
+ {
+ _mesa_glsl_initialize_variables(instructions, state);
+
+ state->symbols->separate_function_namespace = state->language_version == 110;
+
+ state->current_function = NULL;
+
+ state->toplevel_ir = instructions;
+
+ state->gs_input_prim_type_specified = false;
+ state->tcs_output_vertices_specified = false;
+ state->cs_input_local_size_specified = false;
+
+ /* Section 4.2 of the GLSL 1.20 specification states:
+ * "The built-in functions are scoped in a scope outside the global scope
+ * users declare global variables in. That is, a shader's global scope,
+ * available for user-defined functions and global variables, is nested
+ * inside the scope containing the built-in functions."
+ *
+ * Since built-in functions like ftransform() access built-in variables,
+ * it follows that those must be in the outer scope as well.
+ *
+ * We push scope here to create this nesting effect...but don't pop.
+ * This way, a shader's globals are still in the symbol table for use
+ * by the linker.
+ */
+ state->symbols->push_scope();
+
+ foreach_list_typed (ast_node, ast, link, & state->translation_unit)
+ ast->hir(instructions, state);
+
+ detect_recursion_unlinked(state, instructions);
+ detect_conflicting_assignments(state, instructions);
+
+ state->toplevel_ir = NULL;
+
+ /* Move all of the variable declarations to the front of the IR list, and
+ * reverse the order. This has the (intended!) side effect that vertex
+ * shader inputs and fragment shader outputs will appear in the IR in the
+ * same order that they appeared in the shader code. This results in the
+ * locations being assigned in the declared order. Many (arguably buggy)
+ * applications depend on this behavior, and it matches what nearly all
+ * other drivers do.
+ */
+ foreach_in_list_safe(ir_instruction, node, instructions) {
+ ir_variable *const var = node->as_variable();
+
+ if (var == NULL)
+ continue;
+
+ var->remove();
+ instructions->push_head(var);
+ }
+
+ /* Figure out if gl_FragCoord is actually used in fragment shader */
+ ir_variable *const var = state->symbols->get_variable("gl_FragCoord");
+ if (var != NULL)
+ state->fs_uses_gl_fragcoord = var->data.used;
+
+ /* From section 7.1 (Built-In Language Variables) of the GLSL 4.10 spec:
+ *
+ * If multiple shaders using members of a built-in block belonging to
+ * the same interface are linked together in the same program, they
+ * must all redeclare the built-in block in the same way, as described
+ * in section 4.3.7 "Interface Blocks" for interface block matching, or
+ * a link error will result.
+ *
+ * The phrase "using members of a built-in block" implies that if two
+ * shaders are linked together and one of them *does not use* any members
+ * of the built-in block, then that shader does not need to have a matching
+ * redeclaration of the built-in block.
+ *
+ * This appears to be a clarification to the behaviour established for
+ * gl_PerVertex by GLSL 1.50, therefore implement it regardless of GLSL
+ * version.
+ *
+ * The definition of "interface" in section 4.3.7 that applies here is as
+ * follows:
+ *
+ * The boundary between adjacent programmable pipeline stages: This
+ * spans all the outputs in all compilation units of the first stage
+ * and all the inputs in all compilation units of the second stage.
+ *
+ * Therefore this rule applies to both inter- and intra-stage linking.
+ *
+ * The easiest way to implement this is to check whether the shader uses
+ * gl_PerVertex right after ast-to-ir conversion, and if it doesn't, simply
+ * remove all the relevant variable declaration from the IR, so that the
+ * linker won't see them and complain about mismatches.
+ */
+ remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
+ remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+
+ /* Check that we don't have reads from write-only variables */
+ read_from_write_only_variable_visitor v;
+ v.run(instructions);
+ ir_variable *error_var = v.get_variable();
+ if (error_var) {
+ /* It would be nice to have proper location information, but for that
+ * we would need to check this as we process each kind of AST node
+ */
+ YYLTYPE loc;
+ memset(&loc, 0, sizeof(loc));
+ _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+ error_var->name);
+ }
+ }
+
+
+ static ir_expression_operation
+ get_conversion_operation(const glsl_type *to, const glsl_type *from,
+ struct _mesa_glsl_parse_state *state)
+ {
+ switch (to->base_type) {
+ case GLSL_TYPE_FLOAT:
+ switch (from->base_type) {
+ case GLSL_TYPE_INT: return ir_unop_i2f;
+ case GLSL_TYPE_UINT: return ir_unop_u2f;
+ case GLSL_TYPE_DOUBLE: return ir_unop_d2f;
+ default: return (ir_expression_operation)0;
+ }
+
+ case GLSL_TYPE_UINT:
+ if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable)
+ return (ir_expression_operation)0;
+ switch (from->base_type) {
+ case GLSL_TYPE_INT: return ir_unop_i2u;
+ default: return (ir_expression_operation)0;
+ }
+
+ case GLSL_TYPE_DOUBLE:
+ if (!state->has_double())
+ return (ir_expression_operation)0;
+ switch (from->base_type) {
+ case GLSL_TYPE_INT: return ir_unop_i2d;
+ case GLSL_TYPE_UINT: return ir_unop_u2d;
+ case GLSL_TYPE_FLOAT: return ir_unop_f2d;
+ default: return (ir_expression_operation)0;
+ }
+
+ default: return (ir_expression_operation)0;
+ }
+ }
+
+
+ /**
+ * If a conversion is available, convert one operand to a different type
+ *
+ * The \c from \c ir_rvalue is converted "in place".
+ *
+ * \param to Type that the operand it to be converted to
+ * \param from Operand that is being converted
+ * \param state GLSL compiler state
+ *
+ * \return
+ * If a conversion is possible (or unnecessary), \c true is returned.
+ * Otherwise \c false is returned.
+ */
+ bool
+ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+ if (to->base_type == from->type->base_type)
+ return true;
+
+ /* Prior to GLSL 1.20, there are no implicit conversions */
+ if (!state->is_version(120, 0))
+ return false;
+
+ /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "There are no implicit array or structure conversions. For
+ * example, an array of int cannot be implicitly converted to an
+ * array of float.
+ */
+ if (!to->is_numeric() || !from->type->is_numeric())
+ return false;
+
+ /* We don't actually want the specific type `to`, we want a type
+ * with the same base type as `to`, but the same vector width as
+ * `from`.
+ */
+ to = glsl_type::get_instance(to->base_type, from->type->vector_elements,
+ from->type->matrix_columns);
+
+ ir_expression_operation op = get_conversion_operation(to, from->type, state);
+ if (op) {
+ from = new(ctx) ir_expression(op, to, from, NULL);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+
+ static const struct glsl_type *
+ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+ bool multiply,
+ struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+ const glsl_type *type_a = value_a->type;
+ const glsl_type *type_b = value_b->type;
+
+ /* From GLSL 1.50 spec, page 56:
+ *
+ * "The arithmetic binary operators add (+), subtract (-),
+ * multiply (*), and divide (/) operate on integer and
+ * floating-point scalars, vectors, and matrices."
+ */
+ if (!type_a->is_numeric() || !type_b->is_numeric()) {
+ _mesa_glsl_error(loc, state,
+ "operands to arithmetic operators must be numeric");
+ return glsl_type::error_type;
+ }
+
+
+ /* "If one operand is floating-point based and the other is
+ * not, then the conversions from Section 4.1.10 "Implicit
+ * Conversions" are applied to the non-floating-point-based operand."
+ */
+ if (!apply_implicit_conversion(type_a, value_b, state)
+ && !apply_implicit_conversion(type_b, value_a, state)) {
+ _mesa_glsl_error(loc, state,
+ "could not implicitly convert operands to "
+ "arithmetic operator");
+ return glsl_type::error_type;
+ }
+ type_a = value_a->type;
+ type_b = value_b->type;
+
+ /* "If the operands are integer types, they must both be signed or
+ * both be unsigned."
+ *
+ * From this rule and the preceeding conversion it can be inferred that
+ * both types must be GLSL_TYPE_FLOAT, or GLSL_TYPE_UINT, or GLSL_TYPE_INT.
+ * The is_numeric check above already filtered out the case where either
+ * type is not one of these, so now the base types need only be tested for
+ * equality.
+ */
+ if (type_a->base_type != type_b->base_type) {
+ _mesa_glsl_error(loc, state,
+ "base type mismatch for arithmetic operator");
+ return glsl_type::error_type;
+ }
+
+ /* "All arithmetic binary operators result in the same fundamental type
+ * (signed integer, unsigned integer, or floating-point) as the
+ * operands they operate on, after operand type conversion. After
+ * conversion, the following cases are valid
+ *
+ * * The two operands are scalars. In this case the operation is
+ * applied, resulting in a scalar."
+ */
+ if (type_a->is_scalar() && type_b->is_scalar())
+ return type_a;
+
+ /* "* One operand is a scalar, and the other is a vector or matrix.
+ * In this case, the scalar operation is applied independently to each
+ * component of the vector or matrix, resulting in the same size
+ * vector or matrix."
+ */
+ if (type_a->is_scalar()) {
+ if (!type_b->is_scalar())
+ return type_b;
+ } else if (type_b->is_scalar()) {
+ return type_a;
+ }
+
+ /* All of the combinations of <scalar, scalar>, <vector, scalar>,
+ * <scalar, vector>, <scalar, matrix>, and <matrix, scalar> have been
+ * handled.
+ */
+ assert(!type_a->is_scalar());
+ assert(!type_b->is_scalar());
+
+ /* "* The two operands are vectors of the same size. In this case, the
+ * operation is done component-wise resulting in the same size
+ * vector."
+ */
+ if (type_a->is_vector() && type_b->is_vector()) {
+ if (type_a == type_b) {
+ return type_a;
+ } else {
+ _mesa_glsl_error(loc, state,
+ "vector size mismatch for arithmetic operator");
+ return glsl_type::error_type;
+ }
+ }
+
+ /* All of the combinations of <scalar, scalar>, <vector, scalar>,
+ * <scalar, vector>, <scalar, matrix>, <matrix, scalar>, and
+ * <vector, vector> have been handled. At least one of the operands must
+ * be matrix. Further, since there are no integer matrix types, the base
+ * type of both operands must be float.
+ */
+ assert(type_a->is_matrix() || type_b->is_matrix());
+ assert(type_a->base_type == GLSL_TYPE_FLOAT ||
+ type_a->base_type == GLSL_TYPE_DOUBLE);
+ assert(type_b->base_type == GLSL_TYPE_FLOAT ||
+ type_b->base_type == GLSL_TYPE_DOUBLE);
+
+ /* "* The operator is add (+), subtract (-), or divide (/), and the
+ * operands are matrices with the same number of rows and the same
+ * number of columns. In this case, the operation is done component-
+ * wise resulting in the same size matrix."
+ * * The operator is multiply (*), where both operands are matrices or
+ * one operand is a vector and the other a matrix. A right vector
+ * operand is treated as a column vector and a left vector operand as a
+ * row vector. In all these cases, it is required that the number of
+ * columns of the left operand is equal to the number of rows of the
+ * right operand. Then, the multiply (*) operation does a linear
+ * algebraic multiply, yielding an object that has the same number of
+ * rows as the left operand and the same number of columns as the right
+ * operand. Section 5.10 "Vector and Matrix Operations" explains in
+ * more detail how vectors and matrices are operated on."
+ */
+ if (! multiply) {
+ if (type_a == type_b)
+ return type_a;
+ } else {
+ const glsl_type *type = glsl_type::get_mul_type(type_a, type_b);
+
+ if (type == glsl_type::error_type) {
+ _mesa_glsl_error(loc, state,
+ "size mismatch for matrix multiplication");
+ }
+
+ return type;
+ }
+
+
+ /* "All other cases are illegal."
+ */
+ _mesa_glsl_error(loc, state, "type mismatch");
+ return glsl_type::error_type;
+ }
+
+
+ static const struct glsl_type *
+ unary_arithmetic_result_type(const struct glsl_type *type,
+ struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+ /* From GLSL 1.50 spec, page 57:
+ *
+ * "The arithmetic unary operators negate (-), post- and pre-increment
+ * and decrement (-- and ++) operate on integer or floating-point
+ * values (including vectors and matrices). All unary operators work
+ * component-wise on their operands. These result with the same type
+ * they operated on."
+ */
+ if (!type->is_numeric()) {
+ _mesa_glsl_error(loc, state,
+ "operands to arithmetic operators must be numeric");
+ return glsl_type::error_type;
+ }
+
+ return type;
+ }
+
+ /**
+ * \brief Return the result type of a bit-logic operation.
+ *
+ * If the given types to the bit-logic operator are invalid, return
+ * glsl_type::error_type.
+ *
+ * \param value_a LHS of bit-logic op
+ * \param value_b RHS of bit-logic op
+ */
+ static const struct glsl_type *
+ bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+ ast_operators op,
+ struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+ const glsl_type *type_a = value_a->type;
+ const glsl_type *type_b = value_b->type;
+
+ if (!state->check_bitwise_operations_allowed(loc)) {
+ return glsl_type::error_type;
+ }
+
+ /* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
+ *
+ * "The bitwise operators and (&), exclusive-or (^), and inclusive-or
+ * (|). The operands must be of type signed or unsigned integers or
+ * integer vectors."
+ */
+ if (!type_a->is_integer()) {
+ _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
+ ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
+ if (!type_b->is_integer()) {
+ _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
+ ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
+
+ /* Prior to GLSL 4.0 / GL_ARB_gpu_shader5, implicit conversions didn't
+ * make sense for bitwise operations, as they don't operate on floats.
+ *
+ * GLSL 4.0 added implicit int -> uint conversions, which are relevant
+ * here. It wasn't clear whether or not we should apply them to bitwise
+ * operations. However, Khronos has decided that they should in future
+ * language revisions. Applications also rely on this behavior. We opt
+ * to apply them in general, but issue a portability warning.
+ *
+ * See https://www.khronos.org/bugzilla/show_bug.cgi?id=1405
+ */
+ if (type_a->base_type != type_b->base_type) {
+ if (!apply_implicit_conversion(type_a, value_b, state)
+ && !apply_implicit_conversion(type_b, value_a, state)) {
+ _mesa_glsl_error(loc, state,
+ "could not implicitly convert operands to "
+ "`%s` operator",
+ ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ } else {
+ _mesa_glsl_warning(loc, state,
+ "some implementations may not support implicit "
+ "int -> uint conversions for `%s' operators; "
+ "consider casting explicitly for portability",
+ ast_expression::operator_string(op));
+ }
+ type_a = value_a->type;
+ type_b = value_b->type;
+ }
+
+ /* "The fundamental types of the operands (signed or unsigned) must
+ * match,"
+ */
+ if (type_a->base_type != type_b->base_type) {
+ _mesa_glsl_error(loc, state, "operands of `%s' must have the same "
+ "base type", ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
+
+ /* "The operands cannot be vectors of differing size." */
+ if (type_a->is_vector() &&
+ type_b->is_vector() &&
+ type_a->vector_elements != type_b->vector_elements) {
+ _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
+ "different sizes", ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
+
+ /* "If one operand is a scalar and the other a vector, the scalar is
+ * applied component-wise to the vector, resulting in the same type as
+ * the vector. The fundamental types of the operands [...] will be the
+ * resulting fundamental type."
+ */
+ if (type_a->is_scalar())
+ return type_b;
+ else
+ return type_a;
+ }
+
+ static const struct glsl_type *
+ modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+ struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+ const glsl_type *type_a = value_a->type;
+ const glsl_type *type_b = value_b->type;
+
+ if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) {
+ return glsl_type::error_type;
+ }
+
+ /* Section 5.9 (Expressions) of the GLSL 4.00 specification says:
+ *
+ * "The operator modulus (%) operates on signed or unsigned integers or
+ * integer vectors."
+ */
+ if (!type_a->is_integer()) {
+ _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer");
+ return glsl_type::error_type;
+ }
+ if (!type_b->is_integer()) {
+ _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer");
+ return glsl_type::error_type;
+ }
+
+ /* "If the fundamental types in the operands do not match, then the
+ * conversions from section 4.1.10 "Implicit Conversions" are applied
+ * to create matching types."
+ *
+ * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit
+ * int -> uint conversion rules. Prior to that, there were no implicit
+ * conversions. So it's harmless to apply them universally - no implicit
+ * conversions will exist. If the types don't match, we'll receive false,
+ * and raise an error, satisfying the GLSL 1.50 spec, page 56:
+ *
+ * "The operand types must both be signed or unsigned."
+ */
+ if (!apply_implicit_conversion(type_a, value_b, state) &&
+ !apply_implicit_conversion(type_b, value_a, state)) {
+ _mesa_glsl_error(loc, state,
+ "could not implicitly convert operands to "
+ "modulus (%%) operator");
+ return glsl_type::error_type;
+ }
+ type_a = value_a->type;
+ type_b = value_b->type;
+
+ /* "The operands cannot be vectors of differing size. If one operand is
+ * a scalar and the other vector, then the scalar is applied component-
+ * wise to the vector, resulting in the same type as the vector. If both
+ * are vectors of the same size, the result is computed component-wise."
+ */
+ if (type_a->is_vector()) {
+ if (!type_b->is_vector()
+ || (type_a->vector_elements == type_b->vector_elements))
+ return type_a;
+ } else
+ return type_b;
+
+ /* "The operator modulus (%) is not defined for any other data types
+ * (non-integer types)."
+ */
+ _mesa_glsl_error(loc, state, "type mismatch");
+ return glsl_type::error_type;
+ }
+
+
+ static const struct glsl_type *
+ relational_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+ struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+ const glsl_type *type_a = value_a->type;
+ const glsl_type *type_b = value_b->type;
+
+ /* From GLSL 1.50 spec, page 56:
+ * "The relational operators greater than (>), less than (<), greater
+ * than or equal (>=), and less than or equal (<=) operate only on
+ * scalar integer and scalar floating-point expressions."
+ */
+ if (!type_a->is_numeric()
+ || !type_b->is_numeric()
+ || !type_a->is_scalar()
+ || !type_b->is_scalar()) {
+ _mesa_glsl_error(loc, state,
+ "operands to relational operators must be scalar and "
+ "numeric");
+ return glsl_type::error_type;
+ }
+
+ /* "Either the operands' types must match, or the conversions from
+ * Section 4.1.10 "Implicit Conversions" will be applied to the integer
+ * operand, after which the types must match."
+ */
+ if (!apply_implicit_conversion(type_a, value_b, state)
+ && !apply_implicit_conversion(type_b, value_a, state)) {
+ _mesa_glsl_error(loc, state,
+ "could not implicitly convert operands to "
+ "relational operator");
+ return glsl_type::error_type;
+ }
+ type_a = value_a->type;
+ type_b = value_b->type;
+
+ if (type_a->base_type != type_b->base_type) {
+ _mesa_glsl_error(loc, state, "base type mismatch");
+ return glsl_type::error_type;
+ }
+
+ /* "The result is scalar Boolean."
+ */
+ return glsl_type::bool_type;
+ }
+
+ /**
+ * \brief Return the result type of a bit-shift operation.
+ *
+ * If the given types to the bit-shift operator are invalid, return
+ * glsl_type::error_type.
+ *
+ * \param type_a Type of LHS of bit-shift op
+ * \param type_b Type of RHS of bit-shift op
+ */
+ static const struct glsl_type *
+ shift_result_type(const struct glsl_type *type_a,
+ const struct glsl_type *type_b,
+ ast_operators op,
+ struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+ if (!state->check_bitwise_operations_allowed(loc)) {
+ return glsl_type::error_type;
+ }
+
+ /* From page 50 (page 56 of the PDF) of the GLSL 1.30 spec:
+ *
+ * "The shift operators (<<) and (>>). For both operators, the operands
+ * must be signed or unsigned integers or integer vectors. One operand
+ * can be signed while the other is unsigned."
+ */
+ if (!type_a->is_integer()) {
+ _mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or "
+ "integer vector", ast_expression::operator_string(op));
+ return glsl_type::error_type;
+
+ }
+ if (!type_b->is_integer()) {
+ _mesa_glsl_error(loc, state, "RHS of operator %s must be an integer or "
+ "integer vector", ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
+
+ /* "If the first operand is a scalar, the second operand has to be
+ * a scalar as well."
+ */
+ if (type_a->is_scalar() && !type_b->is_scalar()) {
+ _mesa_glsl_error(loc, state, "if the first operand of %s is scalar, the "
+ "second must be scalar as well",
+ ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
+
+ /* If both operands are vectors, check that they have same number of
+ * elements.
+ */
+ if (type_a->is_vector() &&
+ type_b->is_vector() &&
+ type_a->vector_elements != type_b->vector_elements) {
+ _mesa_glsl_error(loc, state, "vector operands to operator %s must "
+ "have same number of elements",
+ ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
+
+ /* "In all cases, the resulting type will be the same type as the left
+ * operand."
+ */
+ return type_a;
+ }
+
+ /**
+ * Returns the innermost array index expression in an rvalue tree.
+ * This is the largest indexing level -- if an array of blocks, then
+ * it is the block index rather than an indexing expression for an
+ * array-typed member of an array of blocks.
+ */
+ static ir_rvalue *
+ find_innermost_array_index(ir_rvalue *rv)
+ {
+ ir_dereference_array *last = NULL;
+ while (rv) {
+ if (rv->as_dereference_array()) {
+ last = rv->as_dereference_array();
+ rv = last->array;
+ } else if (rv->as_dereference_record())
+ rv = rv->as_dereference_record()->record;
+ else if (rv->as_swizzle())
+ rv = rv->as_swizzle()->val;
+ else
+ rv = NULL;
+ }
+
+ if (last)
+ return last->array_index;
+
+ return NULL;
+ }
+
+ /**
+ * Validates that a value can be assigned to a location with a specified type
+ *
+ * Validates that \c rhs can be assigned to some location. If the types are
+ * not an exact match but an automatic conversion is possible, \c rhs will be
+ * converted.
+ *
+ * \return
+ * \c NULL if \c rhs cannot be assigned to a location with type \c lhs_type.
+ * Otherwise the actual RHS to be assigned will be returned. This may be
+ * \c rhs, or it may be \c rhs after some type conversion.
+ *
+ * \note
+ * In addition to being used for assignments, this function is used to
+ * type-check return values.
+ */
+ static ir_rvalue *
+ validate_assignment(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_rvalue *lhs,
+ ir_rvalue *rhs, bool is_initializer)
+ {
+ /* If there is already some error in the RHS, just return it. Anything
+ * else will lead to an avalanche of error message back to the user.
+ */
+ if (rhs->type->is_error())
+ return rhs;
+
+ /* In the Tessellation Control Shader:
+ * If a per-vertex output variable is used as an l-value, it is an error
+ * if the expression indicating the vertex number is not the identifier
+ * `gl_InvocationID`.
+ */
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ ir_variable *var = lhs->variable_referenced();
+ if (var->data.mode == ir_var_shader_out && !var->data.patch) {
+ ir_rvalue *index = find_innermost_array_index(lhs);
+ ir_variable *index_var = index ? index->variable_referenced() : NULL;
+ if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) {
+ _mesa_glsl_error(&loc, state,
+ "Tessellation control shader outputs can only "
+ "be indexed by gl_InvocationID");
+ return NULL;
+ }
+ }
+ }
+
+ /* If the types are identical, the assignment can trivially proceed.
+ */
+ if (rhs->type == lhs->type)
+ return rhs;
+
+ /* If the array element types are the same and the LHS is unsized,
+ * the assignment is okay for initializers embedded in variable
+ * declarations.
+ *
+ * Note: Whole-array assignments are not permitted in GLSL 1.10, but this
+ * is handled by ir_dereference::is_lvalue.
+ */
+ const glsl_type *lhs_t = lhs->type;
+ const glsl_type *rhs_t = rhs->type;
+ bool unsized_array = false;
+ while(lhs_t->is_array()) {
+ if (rhs_t == lhs_t)
+ break; /* the rest of the inner arrays match so break out early */
+ if (!rhs_t->is_array()) {
+ unsized_array = false;
+ break; /* number of dimensions mismatch */
+ }
+ if (lhs_t->length == rhs_t->length) {
+ lhs_t = lhs_t->fields.array;
+ rhs_t = rhs_t->fields.array;
+ continue;
+ } else if (lhs_t->is_unsized_array()) {
+ unsized_array = true;
+ } else {
+ unsized_array = false;
+ break; /* sized array mismatch */
+ }
+ lhs_t = lhs_t->fields.array;
+ rhs_t = rhs_t->fields.array;
+ }
+ if (unsized_array) {
+ if (is_initializer) {
+ return rhs;
+ } else {
+ _mesa_glsl_error(&loc, state,
+ "implicitly sized arrays cannot be assigned");
+ return NULL;
+ }
+ }
+
+ /* Check for implicit conversion in GLSL 1.20 */
+ if (apply_implicit_conversion(lhs->type, rhs, state)) {
+ if (rhs->type == lhs->type)
+ return rhs;
+ }
+
+ _mesa_glsl_error(&loc, state,
+ "%s of type %s cannot be assigned to "
+ "variable of type %s",
+ is_initializer ? "initializer" : "value",
+ rhs->type->name, lhs->type->name);
+
+ return NULL;
+ }
+
+ static void
+ mark_whole_array_access(ir_rvalue *access)
+ {
+ ir_dereference_variable *deref = access->as_dereference_variable();
+
+ if (deref && deref->var) {
+ deref->var->data.max_array_access = deref->type->length - 1;
+ }
+ }
+
+ static bool
+ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
+ const char *non_lvalue_description,
+ ir_rvalue *lhs, ir_rvalue *rhs,
+ ir_rvalue **out_rvalue, bool needs_rvalue,
+ bool is_initializer,
+ YYLTYPE lhs_loc)
+ {
+ void *ctx = state;
+ bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
+
+ ir_variable *lhs_var = lhs->variable_referenced();
+ if (lhs_var)
+ lhs_var->data.assigned = true;
+
+ if (!error_emitted) {
+ if (non_lvalue_description != NULL) {
+ _mesa_glsl_error(&lhs_loc, state,
+ "assignment to %s",
+ non_lvalue_description);
+ error_emitted = true;
+ } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+ (lhs_var->data.mode == ir_var_shader_storage &&
+ lhs_var->data.image_read_only))) {
+ /* We can have image_read_only set on both images and buffer variables,
+ * but in the former there is a distinction between assignments to
+ * the variable itself (read_only) and to the memory they point to
+ * (image_read_only), while in the case of buffer variables there is
+ * no such distinction, that is why this check here is limited to
+ * buffer variables alone.
+ */
+ _mesa_glsl_error(&lhs_loc, state,
+ "assignment to read-only variable '%s'",
+ lhs_var->name);
+ error_emitted = true;
+ } else if (lhs->type->is_array() &&
+ !state->check_version(120, 300, &lhs_loc,
+ "whole array assignment forbidden")) {
+ /* From page 32 (page 38 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "Other binary or unary expressions, non-dereferenced
+ * arrays, function names, swizzles with repeated fields,
+ * and constants cannot be l-values."
+ *
+ * The restriction on arrays is lifted in GLSL 1.20 and GLSL ES 3.00.
+ */
+ error_emitted = true;
+ } else if (!lhs->is_lvalue()) {
+ _mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment");
+ error_emitted = true;
+ }
+ }
+
+ ir_rvalue *new_rhs =
+ validate_assignment(state, lhs_loc, lhs, rhs, is_initializer);
+ if (new_rhs != NULL) {
+ rhs = new_rhs;
+
+ /* If the LHS array was not declared with a size, it takes it size from
+ * the RHS. If the LHS is an l-value and a whole array, it must be a
+ * dereference of a variable. Any other case would require that the LHS
+ * is either not an l-value or not a whole array.
+ */
+ if (lhs->type->is_unsized_array()) {
+ ir_dereference *const d = lhs->as_dereference();
+
+ assert(d != NULL);
+
+ ir_variable *const var = d->variable_referenced();
+
+ assert(var != NULL);
+
+ if (var->data.max_array_access >= unsigned(rhs->type->array_size())) {
+ /* FINISHME: This should actually log the location of the RHS. */
+ _mesa_glsl_error(& lhs_loc, state, "array size must be > %u due to "
+ "previous access",
+ var->data.max_array_access);
+ }
+
+ var->type = glsl_type::get_array_instance(lhs->type->fields.array,
+ rhs->type->array_size());
+ d->type = var->type;
+ }
+ if (lhs->type->is_array()) {
+ mark_whole_array_access(rhs);
+ mark_whole_array_access(lhs);
+ }
+ }
+
+ /* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
+ * but not post_inc) need the converted assigned value as an rvalue
+ * to handle things like:
+ *
+ * i = j += 1;
+ */
+ if (needs_rvalue) {
+ ir_variable *var = new(ctx) ir_variable(rhs->type, "assignment_tmp",
+ ir_var_temporary);
+ instructions->push_tail(var);
+ instructions->push_tail(assign(var, rhs));
+
+ if (!error_emitted) {
+ ir_dereference_variable *deref_var = new(ctx) ir_dereference_variable(var);
+ instructions->push_tail(new(ctx) ir_assignment(lhs, deref_var));
+ }
+ ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var);
+
+ *out_rvalue = rvalue;
+ } else {
+ if (!error_emitted)
+ instructions->push_tail(new(ctx) ir_assignment(lhs, rhs));
+ *out_rvalue = NULL;
+ }
+
+ return error_emitted;
+ }
+
+ static ir_rvalue *
+ get_lvalue_copy(exec_list *instructions, ir_rvalue *lvalue)
+ {
+ void *ctx = ralloc_parent(lvalue);
+ ir_variable *var;
+
+ var = new(ctx) ir_variable(lvalue->type, "_post_incdec_tmp",
+ ir_var_temporary);
+ instructions->push_tail(var);
+
+ instructions->push_tail(new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var),
+ lvalue));
+
+ return new(ctx) ir_dereference_variable(var);
+ }
+
+
+ ir_rvalue *
+ ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
+ {
+ (void) instructions;
+ (void) state;
+
+ return NULL;
+ }
+
+ bool
+ ast_node::has_sequence_subexpression() const
+ {
+ return false;
+ }
+
+ void
+ ast_function_expression::hir_no_rvalue(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ (void)hir(instructions, state);
+ }
+
+ void
+ ast_aggregate_initializer::hir_no_rvalue(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ (void)hir(instructions, state);
+ }
+
+ static ir_rvalue *
+ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
+ {
+ int join_op;
+ ir_rvalue *cmp = NULL;
+
+ if (operation == ir_binop_all_equal)
+ join_op = ir_binop_logic_and;
+ else
+ join_op = ir_binop_logic_or;
+
+ switch (op0->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_DOUBLE:
+ return new(mem_ctx) ir_expression(operation, op0, op1);
+
+ case GLSL_TYPE_ARRAY: {
+ for (unsigned int i = 0; i < op0->type->length; i++) {
+ ir_rvalue *e0, *e1, *result;
+
+ e0 = new(mem_ctx) ir_dereference_array(op0->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+ e1 = new(mem_ctx) ir_dereference_array(op1->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+ result = do_comparison(mem_ctx, operation, e0, e1);
+
+ if (cmp) {
+ cmp = new(mem_ctx) ir_expression(join_op, cmp, result);
+ } else {
+ cmp = result;
+ }
+ }
+
+ mark_whole_array_access(op0);
+ mark_whole_array_access(op1);
+ break;
+ }
+
+ case GLSL_TYPE_STRUCT: {
+ for (unsigned int i = 0; i < op0->type->length; i++) {
+ ir_rvalue *e0, *e1, *result;
+ const char *field_name = op0->type->fields.structure[i].name;
+
+ e0 = new(mem_ctx) ir_dereference_record(op0->clone(mem_ctx, NULL),
+ field_name);
+ e1 = new(mem_ctx) ir_dereference_record(op1->clone(mem_ctx, NULL),
+ field_name);
+ result = do_comparison(mem_ctx, operation, e0, e1);
+
+ if (cmp) {
+ cmp = new(mem_ctx) ir_expression(join_op, cmp, result);
+ } else {
+ cmp = result;
+ }
+ }
+ break;
+ }
+
+ case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_INTERFACE:
++ case GLSL_TYPE_FUNCTION:
+ case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_SUBROUTINE:
+ /* I assume a comparison of a struct containing a sampler just
+ * ignores the sampler present in the type.
+ */
+ break;
+ }
+
+ if (cmp == NULL)
+ cmp = new(mem_ctx) ir_constant(true);
+
+ return cmp;
+ }
+
+ /* For logical operations, we want to ensure that the operands are
+ * scalar booleans. If it isn't, emit an error and return a constant
+ * boolean to avoid triggering cascading error messages.
+ */
+ ir_rvalue *
+ get_scalar_boolean_operand(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state,
+ ast_expression *parent_expr,
+ int operand,
+ const char *operand_name,
+ bool *error_emitted)
+ {
+ ast_expression *expr = parent_expr->subexpressions[operand];
+ void *ctx = state;
+ ir_rvalue *val = expr->hir(instructions, state);
+
+ if (val->type->is_boolean() && val->type->is_scalar())
+ return val;
+
+ if (!*error_emitted) {
+ YYLTYPE loc = expr->get_location();
+ _mesa_glsl_error(&loc, state, "%s of `%s' must be scalar boolean",
+ operand_name,
+ parent_expr->operator_string(parent_expr->oper));
+ *error_emitted = true;
+ }
+
+ return new(ctx) ir_constant(true);
+ }
+
+ /**
+ * If name refers to a builtin array whose maximum allowed size is less than
+ * size, report an error and return true. Otherwise return false.
+ */
+ void
+ check_builtin_array_max_size(const char *name, unsigned size,
+ YYLTYPE loc, struct _mesa_glsl_parse_state *state)
+ {
+ if ((strcmp("gl_TexCoord", name) == 0)
+ && (size > state->Const.MaxTextureCoords)) {
+ /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec:
+ *
+ * "The size [of gl_TexCoord] can be at most
+ * gl_MaxTextureCoords."
+ */
+ _mesa_glsl_error(&loc, state, "`gl_TexCoord' array size cannot "
+ "be larger than gl_MaxTextureCoords (%u)",
+ state->Const.MaxTextureCoords);
+ } else if (strcmp("gl_ClipDistance", name) == 0
+ && size > state->Const.MaxClipPlanes) {
+ /* From section 7.1 (Vertex Shader Special Variables) of the
+ * GLSL 1.30 spec:
+ *
+ * "The gl_ClipDistance array is predeclared as unsized and
+ * must be sized by the shader either redeclaring it with a
+ * size or indexing it only with integral constant
+ * expressions. ... The size can be at most
+ * gl_MaxClipDistances."
+ */
+ _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array size cannot "
+ "be larger than gl_MaxClipDistances (%u)",
+ state->Const.MaxClipPlanes);
+ }
+ }
+
+ /**
+ * Create the constant 1, of a which is appropriate for incrementing and
+ * decrementing values of the given GLSL type. For example, if type is vec4,
+ * this creates a constant value of 1.0 having type float.
+ *
+ * If the given type is invalid for increment and decrement operators, return
+ * a floating point 1--the error will be detected later.
+ */
+ static ir_rvalue *
+ constant_one_for_inc_dec(void *ctx, const glsl_type *type)
+ {
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ return new(ctx) ir_constant((unsigned) 1);
+ case GLSL_TYPE_INT:
+ return new(ctx) ir_constant(1);
+ default:
+ case GLSL_TYPE_FLOAT:
+ return new(ctx) ir_constant(1.0f);
+ }
+ }
+
+ ir_rvalue *
+ ast_expression::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ return do_hir(instructions, state, true);
+ }
+
+ void
+ ast_expression::hir_no_rvalue(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ do_hir(instructions, state, false);
+ }
+
+ ir_rvalue *
+ ast_expression::do_hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state,
+ bool needs_rvalue)
+ {
+ void *ctx = state;
+ static const int operations[AST_NUM_OPERATORS] = {
+ -1, /* ast_assign doesn't convert to ir_expression. */
+ -1, /* ast_plus doesn't convert to ir_expression. */
+ ir_unop_neg,
+ ir_binop_add,
+ ir_binop_sub,
+ ir_binop_mul,
+ ir_binop_div,
+ ir_binop_mod,
+ ir_binop_lshift,
+ ir_binop_rshift,
+ ir_binop_less,
+ ir_binop_greater,
+ ir_binop_lequal,
+ ir_binop_gequal,
+ ir_binop_all_equal,
+ ir_binop_any_nequal,
+ ir_binop_bit_and,
+ ir_binop_bit_xor,
+ ir_binop_bit_or,
+ ir_unop_bit_not,
+ ir_binop_logic_and,
+ ir_binop_logic_xor,
+ ir_binop_logic_or,
+ ir_unop_logic_not,
+
+ /* Note: The following block of expression types actually convert
+ * to multiple IR instructions.
+ */
+ ir_binop_mul, /* ast_mul_assign */
+ ir_binop_div, /* ast_div_assign */
+ ir_binop_mod, /* ast_mod_assign */
+ ir_binop_add, /* ast_add_assign */
+ ir_binop_sub, /* ast_sub_assign */
+ ir_binop_lshift, /* ast_ls_assign */
+ ir_binop_rshift, /* ast_rs_assign */
+ ir_binop_bit_and, /* ast_and_assign */
+ ir_binop_bit_xor, /* ast_xor_assign */
+ ir_binop_bit_or, /* ast_or_assign */
+
+ -1, /* ast_conditional doesn't convert to ir_expression. */
+ ir_binop_add, /* ast_pre_inc. */
+ ir_binop_sub, /* ast_pre_dec. */
+ ir_binop_add, /* ast_post_inc. */
+ ir_binop_sub, /* ast_post_dec. */
+ -1, /* ast_field_selection doesn't conv to ir_expression. */
+ -1, /* ast_array_index doesn't convert to ir_expression. */
+ -1, /* ast_function_call doesn't conv to ir_expression. */
+ -1, /* ast_identifier doesn't convert to ir_expression. */
+ -1, /* ast_int_constant doesn't convert to ir_expression. */
+ -1, /* ast_uint_constant doesn't conv to ir_expression. */
+ -1, /* ast_float_constant doesn't conv to ir_expression. */
+ -1, /* ast_bool_constant doesn't conv to ir_expression. */
+ -1, /* ast_sequence doesn't convert to ir_expression. */
+ };
+ ir_rvalue *result = NULL;
+ ir_rvalue *op[3];
+ const struct glsl_type *type; /* a temporary variable for switch cases */
+ bool error_emitted = false;
+ YYLTYPE loc;
+
+ loc = this->get_location();
+
+ switch (this->oper) {
+ case ast_aggregate:
+ assert(!"ast_aggregate: Should never get here.");
+ break;
+
+ case ast_assign: {
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+
+ error_emitted =
+ do_assignment(instructions, state,
+ this->subexpressions[0]->non_lvalue_description,
+ op[0], op[1], &result, needs_rvalue, false,
+ this->subexpressions[0]->get_location());
+ break;
+ }
+
+ case ast_plus:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+
+ type = unary_arithmetic_result_type(op[0]->type, state, & loc);
+
+ error_emitted = type->is_error();
+
+ result = op[0];
+ break;
+
+ case ast_neg:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+
+ type = unary_arithmetic_result_type(op[0]->type, state, & loc);
+
+ error_emitted = type->is_error();
+
+ result = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], NULL);
+ break;
+
+ case ast_add:
+ case ast_sub:
+ case ast_mul:
+ case ast_div:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+
+ type = arithmetic_result_type(op[0], op[1],
+ (this->oper == ast_mul),
+ state, & loc);
+ error_emitted = type->is_error();
+
+ result = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+ break;
+
+ case ast_mod:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+
+ type = modulus_result_type(op[0], op[1], state, &loc);
+
+ assert(operations[this->oper] == ir_binop_mod);
+
+ result = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+ error_emitted = type->is_error();
+ break;
+
+ case ast_lshift:
+ case ast_rshift:
+ if (!state->check_bitwise_operations_allowed(&loc)) {
+ error_emitted = true;
+ }
+
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+ type = shift_result_type(op[0]->type, op[1]->type, this->oper, state,
+ &loc);
+ result = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+ error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
+ break;
+
+ case ast_less:
+ case ast_greater:
+ case ast_lequal:
+ case ast_gequal:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+
+ type = relational_result_type(op[0], op[1], state, & loc);
+
+ /* The relational operators must either generate an error or result
+ * in a scalar boolean. See page 57 of the GLSL 1.50 spec.
+ */
+ assert(type->is_error()
+ || ((type->base_type == GLSL_TYPE_BOOL)
+ && type->is_scalar()));
+
+ result = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+ error_emitted = type->is_error();
+ break;
+
+ case ast_nequal:
+ case ast_equal:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+
+ /* From page 58 (page 64 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "The equality operators equal (==), and not equal (!=)
+ * operate on all types. They result in a scalar Boolean. If
+ * the operand types do not match, then there must be a
+ * conversion from Section 4.1.10 "Implicit Conversions"
+ * applied to one operand that can make them match, in which
+ * case this conversion is done."
+ */
+
+ if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) {
+ _mesa_glsl_error(& loc, state, "`%s': wrong operand types: "
+ "no operation `%1$s' exists that takes a left-hand "
+ "operand of type 'void' or a right operand of type "
+ "'void'", (this->oper == ast_equal) ? "==" : "!=");
+ error_emitted = true;
+ } else if ((!apply_implicit_conversion(op[0]->type, op[1], state)
+ && !apply_implicit_conversion(op[1]->type, op[0], state))
+ || (op[0]->type != op[1]->type)) {
+ _mesa_glsl_error(& loc, state, "operands of `%s' must have the same "
+ "type", (this->oper == ast_equal) ? "==" : "!=");
+ error_emitted = true;
+ } else if ((op[0]->type->is_array() || op[1]->type->is_array()) &&
+ !state->check_version(120, 300, &loc,
+ "array comparisons forbidden")) {
+ error_emitted = true;
+ } else if ((op[0]->type->contains_opaque() ||
+ op[1]->type->contains_opaque())) {
+ _mesa_glsl_error(&loc, state, "opaque type comparisons forbidden");
+ error_emitted = true;
+ }
+
+ if (error_emitted) {
+ result = new(ctx) ir_constant(false);
+ } else {
+ result = do_comparison(ctx, operations[this->oper], op[0], op[1]);
+ assert(result->type == glsl_type::bool_type);
+ }
+ break;
+
+ case ast_bit_and:
+ case ast_bit_xor:
+ case ast_bit_or:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+ type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc);
+ result = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+ error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
+ break;
+
+ case ast_bit_not:
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+
+ if (!state->check_bitwise_operations_allowed(&loc)) {
+ error_emitted = true;
+ }
+
+ if (!op[0]->type->is_integer()) {
+ _mesa_glsl_error(&loc, state, "operand of `~' must be an integer");
+ error_emitted = true;
+ }
+
+ type = error_emitted ? glsl_type::error_type : op[0]->type;
+ result = new(ctx) ir_expression(ir_unop_bit_not, type, op[0], NULL);
+ break;
+
+ case ast_logic_and: {
+ exec_list rhs_instructions;
+ op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+ "LHS", &error_emitted);
+ op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1,
+ "RHS", &error_emitted);
+
+ if (rhs_instructions.is_empty()) {
+ result = new(ctx) ir_expression(ir_binop_logic_and, op[0], op[1]);
+ type = result->type;
+ } else {
+ ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type,
+ "and_tmp",
+ ir_var_temporary);
+ instructions->push_tail(tmp);
+
+ ir_if *const stmt = new(ctx) ir_if(op[0]);
+ instructions->push_tail(stmt);
+
+ stmt->then_instructions.append_list(&rhs_instructions);
+ ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp);
+ ir_assignment *const then_assign =
+ new(ctx) ir_assignment(then_deref, op[1]);
+ stmt->then_instructions.push_tail(then_assign);
+
+ ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp);
+ ir_assignment *const else_assign =
+ new(ctx) ir_assignment(else_deref, new(ctx) ir_constant(false));
+ stmt->else_instructions.push_tail(else_assign);
+
+ result = new(ctx) ir_dereference_variable(tmp);
+ type = tmp->type;
+ }
+ break;
+ }
+
+ case ast_logic_or: {
+ exec_list rhs_instructions;
+ op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+ "LHS", &error_emitted);
+ op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1,
+ "RHS", &error_emitted);
+
+ if (rhs_instructions.is_empty()) {
+ result = new(ctx) ir_expression(ir_binop_logic_or, op[0], op[1]);
+ type = result->type;
+ } else {
+ ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type,
+ "or_tmp",
+ ir_var_temporary);
+ instructions->push_tail(tmp);
+
+ ir_if *const stmt = new(ctx) ir_if(op[0]);
+ instructions->push_tail(stmt);
+
+ ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp);
+ ir_assignment *const then_assign =
+ new(ctx) ir_assignment(then_deref, new(ctx) ir_constant(true));
+ stmt->then_instructions.push_tail(then_assign);
+
+ stmt->else_instructions.append_list(&rhs_instructions);
+ ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp);
+ ir_assignment *const else_assign =
+ new(ctx) ir_assignment(else_deref, op[1]);
+ stmt->else_instructions.push_tail(else_assign);
+
+ result = new(ctx) ir_dereference_variable(tmp);
+ type = tmp->type;
+ }
+ break;
+ }
+
+ case ast_logic_xor:
+ /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "The logical binary operators and (&&), or ( | | ), and
+ * exclusive or (^^). They operate only on two Boolean
+ * expressions and result in a Boolean expression."
+ */
+ op[0] = get_scalar_boolean_operand(instructions, state, this, 0, "LHS",
+ &error_emitted);
+ op[1] = get_scalar_boolean_operand(instructions, state, this, 1, "RHS",
+ &error_emitted);
+
+ result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type,
+ op[0], op[1]);
+ break;
+
+ case ast_logic_not:
+ op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+ "operand", &error_emitted);
+
+ result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type,
+ op[0], NULL);
+ break;
+
+ case ast_mul_assign:
+ case ast_div_assign:
+ case ast_add_assign:
+ case ast_sub_assign: {
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+
+ type = arithmetic_result_type(op[0], op[1],
+ (this->oper == ast_mul_assign),
+ state, & loc);
+
+ ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+
+ error_emitted =
+ do_assignment(instructions, state,
+ this->subexpressions[0]->non_lvalue_description,
+ op[0]->clone(ctx, NULL), temp_rhs,
+ &result, needs_rvalue, false,
+ this->subexpressions[0]->get_location());
+
+ /* GLSL 1.10 does not allow array assignment. However, we don't have to
+ * explicitly test for this because none of the binary expression
+ * operators allow array operands either.
+ */
+
+ break;
+ }
+
+ case ast_mod_assign: {
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+
+ type = modulus_result_type(op[0], op[1], state, &loc);
+
+ assert(operations[this->oper] == ir_binop_mod);
+
+ ir_rvalue *temp_rhs;
+ temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+
+ error_emitted =
+ do_assignment(instructions, state,
+ this->subexpressions[0]->non_lvalue_description,
+ op[0]->clone(ctx, NULL), temp_rhs,
+ &result, needs_rvalue, false,
+ this->subexpressions[0]->get_location());
+ break;
+ }
+
+ case ast_ls_assign:
+ case ast_rs_assign: {
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+ type = shift_result_type(op[0]->type, op[1]->type, this->oper, state,
+ &loc);
+ ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper],
+ type, op[0], op[1]);
+ error_emitted =
+ do_assignment(instructions, state,
+ this->subexpressions[0]->non_lvalue_description,
+ op[0]->clone(ctx, NULL), temp_rhs,
+ &result, needs_rvalue, false,
+ this->subexpressions[0]->get_location());
+ break;
+ }
+
+ case ast_and_assign:
+ case ast_xor_assign:
+ case ast_or_assign: {
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = this->subexpressions[1]->hir(instructions, state);
+ type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc);
+ ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper],
+ type, op[0], op[1]);
+ error_emitted =
+ do_assignment(instructions, state,
+ this->subexpressions[0]->non_lvalue_description,
+ op[0]->clone(ctx, NULL), temp_rhs,
+ &result, needs_rvalue, false,
+ this->subexpressions[0]->get_location());
+ break;
+ }
+
+ case ast_conditional: {
+ /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "The ternary selection operator (?:). It operates on three
+ * expressions (exp1 ? exp2 : exp3). This operator evaluates the
+ * first expression, which must result in a scalar Boolean."
+ */
+ op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+ "condition", &error_emitted);
+
+ /* The :? operator is implemented by generating an anonymous temporary
+ * followed by an if-statement. The last instruction in each branch of
+ * the if-statement assigns a value to the anonymous temporary. This
+ * temporary is the r-value of the expression.
+ */
+ exec_list then_instructions;
+ exec_list else_instructions;
+
+ op[1] = this->subexpressions[1]->hir(&then_instructions, state);
+ op[2] = this->subexpressions[2]->hir(&else_instructions, state);
+
+ /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "The second and third expressions can be any type, as
+ * long their types match, or there is a conversion in
+ * Section 4.1.10 "Implicit Conversions" that can be applied
+ * to one of the expressions to make their types match. This
+ * resulting matching type is the type of the entire
+ * expression."
+ */
+ if ((!apply_implicit_conversion(op[1]->type, op[2], state)
+ && !apply_implicit_conversion(op[2]->type, op[1], state))
+ || (op[1]->type != op[2]->type)) {
+ YYLTYPE loc = this->subexpressions[1]->get_location();
+
+ _mesa_glsl_error(& loc, state, "second and third operands of ?: "
+ "operator must have matching types");
+ error_emitted = true;
+ type = glsl_type::error_type;
+ } else {
+ type = op[1]->type;
+ }
+
+ /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "The second and third expressions must be the same type, but can
+ * be of any type other than an array."
+ */
+ if (type->is_array() &&
+ !state->check_version(120, 300, &loc,
+ "second and third operands of ?: operator "
+ "cannot be arrays")) {
+ error_emitted = true;
+ }
+
+ /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types):
+ *
+ * "Except for array indexing, structure member selection, and
+ * parentheses, opaque variables are not allowed to be operands in
+ * expressions; such use results in a compile-time error."
+ */
+ if (type->contains_opaque()) {
+ _mesa_glsl_error(&loc, state, "opaque variables cannot be operands "
+ "of the ?: operator");
+ error_emitted = true;
+ }
+
+ ir_constant *cond_val = op[0]->constant_expression_value();
+
+ if (then_instructions.is_empty()
+ && else_instructions.is_empty()
+ && cond_val != NULL) {
+ result = cond_val->value.b[0] ? op[1] : op[2];
+ } else {
+ /* The copy to conditional_tmp reads the whole array. */
+ if (type->is_array()) {
+ mark_whole_array_access(op[1]);
+ mark_whole_array_access(op[2]);
+ }
+
+ ir_variable *const tmp =
+ new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary);
+ instructions->push_tail(tmp);
+
+ ir_if *const stmt = new(ctx) ir_if(op[0]);
+ instructions->push_tail(stmt);
+
+ then_instructions.move_nodes_to(& stmt->then_instructions);
+ ir_dereference *const then_deref =
+ new(ctx) ir_dereference_variable(tmp);
+ ir_assignment *const then_assign =
+ new(ctx) ir_assignment(then_deref, op[1]);
+ stmt->then_instructions.push_tail(then_assign);
+
+ else_instructions.move_nodes_to(& stmt->else_instructions);
+ ir_dereference *const else_deref =
+ new(ctx) ir_dereference_variable(tmp);
+ ir_assignment *const else_assign =
+ new(ctx) ir_assignment(else_deref, op[2]);
+ stmt->else_instructions.push_tail(else_assign);
+
+ result = new(ctx) ir_dereference_variable(tmp);
+ }
+ break;
+ }
+
+ case ast_pre_inc:
+ case ast_pre_dec: {
+ this->non_lvalue_description = (this->oper == ast_pre_inc)
+ ? "pre-increment operation" : "pre-decrement operation";
+
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = constant_one_for_inc_dec(ctx, op[0]->type);
+
+ type = arithmetic_result_type(op[0], op[1], false, state, & loc);
+
+ ir_rvalue *temp_rhs;
+ temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+
+ error_emitted =
+ do_assignment(instructions, state,
+ this->subexpressions[0]->non_lvalue_description,
+ op[0]->clone(ctx, NULL), temp_rhs,
+ &result, needs_rvalue, false,
+ this->subexpressions[0]->get_location());
+ break;
+ }
+
+ case ast_post_inc:
+ case ast_post_dec: {
+ this->non_lvalue_description = (this->oper == ast_post_inc)
+ ? "post-increment operation" : "post-decrement operation";
+ op[0] = this->subexpressions[0]->hir(instructions, state);
+ op[1] = constant_one_for_inc_dec(ctx, op[0]->type);
+
+ error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
+
+ type = arithmetic_result_type(op[0], op[1], false, state, & loc);
+
+ ir_rvalue *temp_rhs;
+ temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+ op[0], op[1]);
+
+ /* Get a temporary of a copy of the lvalue before it's modified.
+ * This may get thrown away later.
+ */
+ result = get_lvalue_copy(instructions, op[0]->clone(ctx, NULL));
+
+ ir_rvalue *junk_rvalue;
+ error_emitted =
+ do_assignment(instructions, state,
+ this->subexpressions[0]->non_lvalue_description,
+ op[0]->clone(ctx, NULL), temp_rhs,
+ &junk_rvalue, false, false,
+ this->subexpressions[0]->get_location());
+
+ break;
+ }
+
+ case ast_field_selection:
+ result = _mesa_ast_field_selection_to_hir(this, instructions, state);
+ break;
+
+ case ast_array_index: {
+ YYLTYPE index_loc = subexpressions[1]->get_location();
+
+ op[0] = subexpressions[0]->hir(instructions, state);
+ op[1] = subexpressions[1]->hir(instructions, state);
+
+ result = _mesa_ast_array_index_to_hir(ctx, state, op[0], op[1],
+ loc, index_loc);
+
+ if (result->type->is_error())
+ error_emitted = true;
+
+ break;
+ }
+
+ case ast_unsized_array_dim:
+ assert(!"ast_unsized_array_dim: Should never get here.");
+ break;
+
+ case ast_function_call:
+ /* Should *NEVER* get here. ast_function_call should always be handled
+ * by ast_function_expression::hir.
+ */
+ assert(0);
+ break;
+
+ case ast_identifier: {
+ /* ast_identifier can appear several places in a full abstract syntax
+ * tree. This particular use must be at location specified in the grammar
+ * as 'variable_identifier'.
+ */
+ ir_variable *var =
+ state->symbols->get_variable(this->primary_expression.identifier);
+
+ if (var != NULL) {
+ var->data.used = true;
+ result = new(ctx) ir_dereference_variable(var);
+ } else {
+ _mesa_glsl_error(& loc, state, "`%s' undeclared",
+ this->primary_expression.identifier);
+
+ result = ir_rvalue::error_value(ctx);
+ error_emitted = true;
+ }
+ break;
+ }
+
+ case ast_int_constant:
+ result = new(ctx) ir_constant(this->primary_expression.int_constant);
+ break;
+
+ case ast_uint_constant:
+ result = new(ctx) ir_constant(this->primary_expression.uint_constant);
+ break;
+
+ case ast_float_constant:
+ result = new(ctx) ir_constant(this->primary_expression.float_constant);
+ break;
+
+ case ast_bool_constant:
+ result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant));
+ break;
+
+ case ast_double_constant:
+ result = new(ctx) ir_constant(this->primary_expression.double_constant);
+ break;
+
+ case ast_sequence: {
+ /* It should not be possible to generate a sequence in the AST without
+ * any expressions in it.
+ */
+ assert(!this->expressions.is_empty());
+
+ /* The r-value of a sequence is the last expression in the sequence. If
+ * the other expressions in the sequence do not have side-effects (and
+ * therefore add instructions to the instruction list), they get dropped
+ * on the floor.
+ */
+ exec_node *previous_tail_pred = NULL;
+ YYLTYPE previous_operand_loc = loc;
+
+ foreach_list_typed (ast_node, ast, link, &this->expressions) {
+ /* If one of the operands of comma operator does not generate any
+ * code, we want to emit a warning. At each pass through the loop
+ * previous_tail_pred will point to the last instruction in the
+ * stream *before* processing the previous operand. Naturally,
+ * instructions->tail_pred will point to the last instruction in the
+ * stream *after* processing the previous operand. If the two
+ * pointers match, then the previous operand had no effect.
+ *
+ * The warning behavior here differs slightly from GCC. GCC will
+ * only emit a warning if none of the left-hand operands have an
+ * effect. However, it will emit a warning for each. I believe that
+ * there are some cases in C (especially with GCC extensions) where
+ * it is useful to have an intermediate step in a sequence have no
+ * effect, but I don't think these cases exist in GLSL. Either way,
+ * it would be a giant hassle to replicate that behavior.
+ */
+ if (previous_tail_pred == instructions->tail_pred) {
+ _mesa_glsl_warning(&previous_operand_loc, state,
+ "left-hand operand of comma expression has "
+ "no effect");
+ }
+
+ /* tail_pred is directly accessed instead of using the get_tail()
+ * method for performance reasons. get_tail() has extra code to
+ * return NULL when the list is empty. We don't care about that
+ * here, so using tail_pred directly is fine.
+ */
+ previous_tail_pred = instructions->tail_pred;
+ previous_operand_loc = ast->get_location();
+
+ result = ast->hir(instructions, state);
+ }
+
+ /* Any errors should have already been emitted in the loop above.
+ */
+ error_emitted = true;
+ break;
+ }
+ }
+ type = NULL; /* use result->type, not type. */
+ assert(result != NULL || !needs_rvalue);
+
+ if (result && result->type->is_error() && !error_emitted)
+ _mesa_glsl_error(& loc, state, "type mismatch");
+
+ return result;
+ }
+
+ bool
+ ast_expression::has_sequence_subexpression() const
+ {
+ switch (this->oper) {
+ case ast_plus:
+ case ast_neg:
+ case ast_bit_not:
+ case ast_logic_not:
+ case ast_pre_inc:
+ case ast_pre_dec:
+ case ast_post_inc:
+ case ast_post_dec:
+ return this->subexpressions[0]->has_sequence_subexpression();
+
+ case ast_assign:
+ case ast_add:
+ case ast_sub:
+ case ast_mul:
+ case ast_div:
+ case ast_mod:
+ case ast_lshift:
+ case ast_rshift:
+ case ast_less:
+ case ast_greater:
+ case ast_lequal:
+ case ast_gequal:
+ case ast_nequal:
+ case ast_equal:
+ case ast_bit_and:
+ case ast_bit_xor:
+ case ast_bit_or:
+ case ast_logic_and:
+ case ast_logic_or:
+ case ast_logic_xor:
+ case ast_array_index:
+ case ast_mul_assign:
+ case ast_div_assign:
+ case ast_add_assign:
+ case ast_sub_assign:
+ case ast_mod_assign:
+ case ast_ls_assign:
+ case ast_rs_assign:
+ case ast_and_assign:
+ case ast_xor_assign:
+ case ast_or_assign:
+ return this->subexpressions[0]->has_sequence_subexpression() ||
+ this->subexpressions[1]->has_sequence_subexpression();
+
+ case ast_conditional:
+ return this->subexpressions[0]->has_sequence_subexpression() ||
+ this->subexpressions[1]->has_sequence_subexpression() ||
+ this->subexpressions[2]->has_sequence_subexpression();
+
+ case ast_sequence:
+ return true;
+
+ case ast_field_selection:
+ case ast_identifier:
+ case ast_int_constant:
+ case ast_uint_constant:
+ case ast_float_constant:
+ case ast_bool_constant:
+ case ast_double_constant:
+ return false;
+
+ case ast_aggregate:
+ unreachable("ast_aggregate: Should never get here.");
+
+ case ast_function_call:
+ unreachable("should be handled by ast_function_expression::hir");
+
+ case ast_unsized_array_dim:
+ unreachable("ast_unsized_array_dim: Should never get here.");
+ }
+
+ return false;
+ }
+
+ ir_rvalue *
+ ast_expression_statement::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ /* It is possible to have expression statements that don't have an
+ * expression. This is the solitary semicolon:
+ *
+ * for (i = 0; i < 5; i++)
+ * ;
+ *
+ * In this case the expression will be NULL. Test for NULL and don't do
+ * anything in that case.
+ */
+ if (expression != NULL)
+ expression->hir_no_rvalue(instructions, state);
+
+ /* Statements do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_compound_statement::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ if (new_scope)
+ state->symbols->push_scope();
+
+ foreach_list_typed (ast_node, ast, link, &this->statements)
+ ast->hir(instructions, state);
+
+ if (new_scope)
+ state->symbols->pop_scope();
+
+ /* Compound statements do not have r-values.
+ */
+ return NULL;
+ }
+
+ /**
+ * Evaluate the given exec_node (which should be an ast_node representing
+ * a single array dimension) and return its integer value.
+ */
+ static unsigned
+ process_array_size(exec_node *node,
+ struct _mesa_glsl_parse_state *state)
+ {
+ exec_list dummy_instructions;
+
+ ast_node *array_size = exec_node_data(ast_node, node, link);
+
+ /**
+ * Dimensions other than the outermost dimension can by unsized if they
+ * are immediately sized by a constructor or initializer.
+ */
+ if (((ast_expression*)array_size)->oper == ast_unsized_array_dim)
+ return 0;
+
+ ir_rvalue *const ir = array_size->hir(& dummy_instructions, state);
+ YYLTYPE loc = array_size->get_location();
+
+ if (ir == NULL) {
+ _mesa_glsl_error(& loc, state,
+ "array size could not be resolved");
+ return 0;
+ }
+
+ if (!ir->type->is_integer()) {
+ _mesa_glsl_error(& loc, state,
+ "array size must be integer type");
+ return 0;
+ }
+
+ if (!ir->type->is_scalar()) {
+ _mesa_glsl_error(& loc, state,
+ "array size must be scalar type");
+ return 0;
+ }
+
+ ir_constant *const size = ir->constant_expression_value();
+ if (size == NULL || array_size->has_sequence_subexpression()) {
+ _mesa_glsl_error(& loc, state, "array size must be a "
+ "constant valued expression");
+ return 0;
+ }
+
+ if (size->value.i[0] <= 0) {
+ _mesa_glsl_error(& loc, state, "array size must be > 0");
+ return 0;
+ }
+
+ assert(size->type == ir->type);
+
+ /* If the array size is const (and we've verified that
+ * it is) then no instructions should have been emitted
+ * when we converted it to HIR. If they were emitted,
+ * then either the array size isn't const after all, or
+ * we are emitting unnecessary instructions.
+ */
+ assert(dummy_instructions.is_empty());
+
+ return size->value.u[0];
+ }
+
+ static const glsl_type *
+ process_array_type(YYLTYPE *loc, const glsl_type *base,
+ ast_array_specifier *array_specifier,
+ struct _mesa_glsl_parse_state *state)
+ {
+ const glsl_type *array_type = base;
+
+ if (array_specifier != NULL) {
+ if (base->is_array()) {
+
+ /* From page 19 (page 25) of the GLSL 1.20 spec:
+ *
+ * "Only one-dimensional arrays may be declared."
+ */
+ if (!state->check_arrays_of_arrays_allowed(loc)) {
+ return glsl_type::error_type;
+ }
+ }
+
+ for (exec_node *node = array_specifier->array_dimensions.tail_pred;
+ !node->is_head_sentinel(); node = node->prev) {
+ unsigned array_size = process_array_size(node, state);
+ array_type = glsl_type::get_array_instance(array_type, array_size);
+ }
+ }
+
+ return array_type;
+ }
+
+ static bool
+ precision_qualifier_allowed(const glsl_type *type)
+ {
+ /* Precision qualifiers apply to floating point, integer and opaque
+ * types.
+ *
+ * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says:
+ * "Any floating point or any integer declaration can have the type
+ * preceded by one of these precision qualifiers [...] Literal
+ * constants do not have precision qualifiers. Neither do Boolean
+ * variables.
+ *
+ * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30
+ * spec also says:
+ *
+ * "Precision qualifiers are added for code portability with OpenGL
+ * ES, not for functionality. They have the same syntax as in OpenGL
+ * ES."
+ *
+ * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says:
+ *
+ * "uniform lowp sampler2D sampler;
+ * highp vec2 coord;
+ * ...
+ * lowp vec4 col = texture2D (sampler, coord);
+ * // texture2D returns lowp"
+ *
+ * From this, we infer that GLSL 1.30 (and later) should allow precision
+ * qualifiers on sampler types just like float and integer types.
+ */
+ return (type->is_float()
+ || type->is_integer()
+ || type->contains_opaque())
+ && !type->without_array()->is_record();
+ }
+
+ const glsl_type *
+ ast_type_specifier::glsl_type(const char **name,
+ struct _mesa_glsl_parse_state *state) const
+ {
+ const struct glsl_type *type;
+
+ type = state->symbols->get_type(this->type_name);
+ *name = this->type_name;
+
+ YYLTYPE loc = this->get_location();
+ type = process_array_type(&loc, type, this->array_specifier, state);
+
+ return type;
+ }
+
+ /**
+ * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers:
+ *
+ * "The precision statement
+ *
+ * precision precision-qualifier type;
+ *
+ * can be used to establish a default precision qualifier. The type field can
+ * be either int or float or any of the sampler types, (...) If type is float,
+ * the directive applies to non-precision-qualified floating point type
+ * (scalar, vector, and matrix) declarations. If type is int, the directive
+ * applies to all non-precision-qualified integer type (scalar, vector, signed,
+ * and unsigned) declarations."
+ *
+ * We use the symbol table to keep the values of the default precisions for
+ * each 'type' in each scope and we use the 'type' string from the precision
+ * statement as key in the symbol table. When we want to retrieve the default
+ * precision associated with a given glsl_type we need to know the type string
+ * associated with it. This is what this function returns.
+ */
+ static const char *
+ get_type_name_for_precision_qualifier(const glsl_type *type)
+ {
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ return "float";
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ return "int";
+ case GLSL_TYPE_ATOMIC_UINT:
+ return "atomic_uint";
+ case GLSL_TYPE_IMAGE:
+ /* fallthrough */
+ case GLSL_TYPE_SAMPLER: {
+ const unsigned type_idx =
+ type->sampler_array + 2 * type->sampler_shadow;
+ const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4;
+ assert(type_idx < 4);
+ switch (type->sampler_type) {
+ case GLSL_TYPE_FLOAT:
+ switch (type->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "sampler1D", "sampler1DArray",
+ "sampler1DShadow", "sampler1DArrayShadow"
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_2D: {
+ static const char *const names[8] = {
+ "sampler2D", "sampler2DArray",
+ "sampler2DShadow", "sampler2DArrayShadow",
+ "image2D", "image2DArray", NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_3D: {
+ static const char *const names[8] = {
+ "sampler3D", NULL, NULL, NULL,
+ "image3D", NULL, NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_CUBE: {
+ static const char *const names[8] = {
+ "samplerCube", "samplerCubeArray",
+ "samplerCubeShadow", "samplerCubeArrayShadow",
+ "imageCube", NULL, NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_MS: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "sampler2DMS", "sampler2DMSArray", NULL, NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_RECT: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "samplerRect", NULL, "samplerRectShadow", NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_BUF: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "samplerBuffer", NULL, NULL, NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_EXTERNAL: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "samplerExternalOES", NULL, NULL, NULL
+ };
+ return names[type_idx];
+ }
+ default:
+ unreachable("Unsupported sampler/image dimensionality");
+ } /* sampler/image float dimensionality */
+ break;
+ case GLSL_TYPE_INT:
+ switch (type->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "isampler1D", "isampler1DArray", NULL, NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_2D: {
+ static const char *const names[8] = {
+ "isampler2D", "isampler2DArray", NULL, NULL,
+ "iimage2D", "iimage2DArray", NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_3D: {
+ static const char *const names[8] = {
+ "isampler3D", NULL, NULL, NULL,
+ "iimage3D", NULL, NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_CUBE: {
+ static const char *const names[8] = {
+ "isamplerCube", "isamplerCubeArray", NULL, NULL,
+ "iimageCube", NULL, NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_MS: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "isampler2DMS", "isampler2DMSArray", NULL, NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_RECT: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "isamplerRect", NULL, "isamplerRectShadow", NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_BUF: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "isamplerBuffer", NULL, NULL, NULL
+ };
+ return names[type_idx];
+ }
+ default:
+ unreachable("Unsupported isampler/iimage dimensionality");
+ } /* sampler/image int dimensionality */
+ break;
+ case GLSL_TYPE_UINT:
+ switch (type->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "usampler1D", "usampler1DArray", NULL, NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_2D: {
+ static const char *const names[8] = {
+ "usampler2D", "usampler2DArray", NULL, NULL,
+ "uimage2D", "uimage2DArray", NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_3D: {
+ static const char *const names[8] = {
+ "usampler3D", NULL, NULL, NULL,
+ "uimage3D", NULL, NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_CUBE: {
+ static const char *const names[8] = {
+ "usamplerCube", "usamplerCubeArray", NULL, NULL,
+ "uimageCube", NULL, NULL, NULL
+ };
+ return names[offset + type_idx];
+ }
+ case GLSL_SAMPLER_DIM_MS: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "usampler2DMS", "usampler2DMSArray", NULL, NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_RECT: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "usamplerRect", NULL, "usamplerRectShadow", NULL
+ };
+ return names[type_idx];
+ }
+ case GLSL_SAMPLER_DIM_BUF: {
+ assert(type->base_type == GLSL_TYPE_SAMPLER);
+ static const char *const names[4] = {
+ "usamplerBuffer", NULL, NULL, NULL
+ };
+ return names[type_idx];
+ }
+ default:
+ unreachable("Unsupported usampler/uimage dimensionality");
+ } /* sampler/image uint dimensionality */
+ break;
+ default:
+ unreachable("Unsupported sampler/image type");
+ } /* sampler/image type */
+ break;
+ } /* GLSL_TYPE_SAMPLER/GLSL_TYPE_IMAGE */
+ break;
+ default:
+ unreachable("Unsupported type");
+ } /* base type */
+ }
+
+ static unsigned
+ select_gles_precision(unsigned qual_precision,
+ const glsl_type *type,
+ struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+ /* Precision qualifiers do not have any meaning in Desktop GLSL.
+ * In GLES we take the precision from the type qualifier if present,
+ * otherwise, if the type of the variable allows precision qualifiers at
+ * all, we look for the default precision qualifier for that type in the
+ * current scope.
+ */
+ assert(state->es_shader);
+
+ unsigned precision = GLSL_PRECISION_NONE;
+ if (qual_precision) {
+ precision = qual_precision;
+ } else if (precision_qualifier_allowed(type)) {
+ const char *type_name =
+ get_type_name_for_precision_qualifier(type->without_array());
+ assert(type_name != NULL);
+
+ precision =
+ state->symbols->get_default_precision_qualifier(type_name);
+ if (precision == ast_precision_none) {
+ _mesa_glsl_error(loc, state,
+ "No precision specified in this scope for type `%s'",
+ type->name);
+ }
+ }
+ return precision;
+ }
+
+ const glsl_type *
+ ast_fully_specified_type::glsl_type(const char **name,
+ struct _mesa_glsl_parse_state *state) const
+ {
+ return this->specifier->glsl_type(name, state);
+ }
+
+ /**
+ * Determine whether a toplevel variable declaration declares a varying. This
+ * function operates by examining the variable's mode and the shader target,
+ * so it correctly identifies linkage variables regardless of whether they are
+ * declared using the deprecated "varying" syntax or the new "in/out" syntax.
+ *
+ * Passing a non-toplevel variable declaration (e.g. a function parameter) to
+ * this function will produce undefined results.
+ */
+ static bool
+ is_varying_var(ir_variable *var, gl_shader_stage target)
+ {
+ switch (target) {
+ case MESA_SHADER_VERTEX:
+ return var->data.mode == ir_var_shader_out;
+ case MESA_SHADER_FRAGMENT:
+ return var->data.mode == ir_var_shader_in;
+ default:
+ return var->data.mode == ir_var_shader_out || var->data.mode == ir_var_shader_in;
+ }
+ }
+
+
+ /**
+ * Matrix layout qualifiers are only allowed on certain types
+ */
+ static void
+ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc,
+ const glsl_type *type,
+ ir_variable *var)
+ {
+ if (var && !var->is_in_buffer_block()) {
+ /* Layout qualifiers may only apply to interface blocks and fields in
+ * them.
+ */
+ _mesa_glsl_error(loc, state,
+ "uniform block layout qualifiers row_major and "
+ "column_major may not be applied to variables "
+ "outside of uniform blocks");
+ } else if (!type->without_array()->is_matrix()) {
+ /* The OpenGL ES 3.0 conformance tests did not originally allow
+ * matrix layout qualifiers on non-matrices. However, the OpenGL
+ * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were
+ * amended to specifically allow these layouts on all types. Emit
+ * a warning so that people know their code may not be portable.
+ */
+ _mesa_glsl_warning(loc, state,
+ "uniform block layout qualifiers row_major and "
+ "column_major applied to non-matrix types may "
+ "be rejected by older compilers");
+ }
+ }
+
+ static bool
+ process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc,
+ const char *qual_indentifier,
+ ast_expression *const_expression,
+ unsigned *value)
+ {
+ exec_list dummy_instructions;
+
+ if (const_expression == NULL) {
+ *value = 0;
+ return true;
+ }
+
+ ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+ ir_constant *const const_int = ir->constant_expression_value();
+ if (const_int == NULL || !const_int->type->is_integer()) {
+ _mesa_glsl_error(loc, state, "%s must be an integral constant "
+ "expression", qual_indentifier);
+ return false;
+ }
+
+ if (const_int->value.i[0] < 0) {
+ _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
+ qual_indentifier, const_int->value.u[0]);
+ return false;
+ }
+
+ /* If the location is const (and we've verified that
+ * it is) then no instructions should have been emitted
+ * when we converted it to HIR. If they were emitted,
+ * then either the location isn't const after all, or
+ * we are emitting unnecessary instructions.
+ */
+ assert(dummy_instructions.is_empty());
+
+ *value = const_int->value.u[0];
+ return true;
+ }
+
+ static bool
+ validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state,
+ unsigned stream)
+ {
+ if (stream >= state->ctx->Const.MaxVertexStreams) {
+ _mesa_glsl_error(loc, state,
+ "invalid stream specified %d is larger than "
+ "MAX_VERTEX_STREAMS - 1 (%d).",
+ stream, state->ctx->Const.MaxVertexStreams - 1);
+ return false;
+ }
+
+ return true;
+ }
+
+ static void
+ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc,
+ ir_variable *var,
+ const glsl_type *type,
+ const ast_type_qualifier *qual)
+ {
+ if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
+ _mesa_glsl_error(loc, state,
+ "the \"binding\" qualifier only applies to uniforms and "
+ "shader storage buffer objects");
+ return;
+ }
+
+ unsigned qual_binding;
+ if (!process_qualifier_constant(state, loc, "binding", qual->binding,
+ &qual_binding)) {
+ return;
+ }
+
+ const struct gl_context *const ctx = state->ctx;
+ unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
+ unsigned max_index = qual_binding + elements - 1;
+ const glsl_type *base_type = type->without_array();
+
+ if (base_type->is_interface()) {
+ /* UBOs. From page 60 of the GLSL 4.20 specification:
+ * "If the binding point for any uniform block instance is less than zero,
+ * or greater than or equal to the implementation-dependent maximum
+ * number of uniform buffer bindings, a compilation error will occur.
+ * When the binding identifier is used with a uniform block instanced as
+ * an array of size N, all elements of the array from binding through
+ * binding + N – 1 must be within this range."
+ *
+ * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS.
+ */
+ if (qual->flags.q.uniform &&
+ max_index >= ctx->Const.MaxUniformBufferBindings) {
+ _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds "
+ "the maximum number of UBO binding points (%d)",
+ qual_binding, elements,
+ ctx->Const.MaxUniformBufferBindings);
+ return;
+ }
+
+ /* SSBOs. From page 67 of the GLSL 4.30 specification:
+ * "If the binding point for any uniform or shader storage block instance
+ * is less than zero, or greater than or equal to the
+ * implementation-dependent maximum number of uniform buffer bindings, a
+ * compile-time error will occur. When the binding identifier is used
+ * with a uniform or shader storage block instanced as an array of size
+ * N, all elements of the array from binding through binding + N – 1 must
+ * be within this range."
+ */
+ if (qual->flags.q.buffer &&
+ max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
+ _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds "
+ "the maximum number of SSBO binding points (%d)",
+ qual_binding, elements,
+ ctx->Const.MaxShaderStorageBufferBindings);
+ return;
+ }
+ } else if (base_type->is_sampler()) {
+ /* Samplers. From page 63 of the GLSL 4.20 specification:
+ * "If the binding is less than zero, or greater than or equal to the
+ * implementation-dependent maximum supported number of units, a
+ * compilation error will occur. When the binding identifier is used
+ * with an array of size N, all elements of the array from binding
+ * through binding + N - 1 must be within this range."
+ */
+ unsigned limit = ctx->Const.MaxCombinedTextureImageUnits;
+
+ if (max_index >= limit) {
+ _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers "
+ "exceeds the maximum number of texture image units "
+ "(%u)", qual_binding, elements, limit);
+
+ return;
+ }
+ } else if (base_type->contains_atomic()) {
+ assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
+ if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) {
+ _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the "
+ " maximum number of atomic counter buffer bindings"
+ "(%u)", qual_binding,
+ ctx->Const.MaxAtomicBufferBindings);
+
+ return;
+ }
+ } else if ((state->is_version(420, 310) ||
+ state->ARB_shading_language_420pack_enable) &&
+ base_type->is_image()) {
+ assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
+ if (max_index >= ctx->Const.MaxImageUnits) {
+ _mesa_glsl_error(loc, state, "Image binding %d exceeds the "
+ " maximum number of image units (%d)", max_index,
+ ctx->Const.MaxImageUnits);
+ return;
+ }
+
+ } else {
+ _mesa_glsl_error(loc, state,
+ "the \"binding\" qualifier only applies to uniform "
+ "blocks, opaque variables, or arrays thereof");
+ return;
+ }
+
+ var->data.explicit_binding = true;
+ var->data.binding = qual_binding;
+
+ return;
+ }
+
+
+ static glsl_interp_qualifier
+ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
+ ir_variable_mode mode,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
+ {
+ glsl_interp_qualifier interpolation;
+ if (qual->flags.q.flat)
+ interpolation = INTERP_QUALIFIER_FLAT;
+ else if (qual->flags.q.noperspective)
+ interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+ else if (qual->flags.q.smooth)
+ interpolation = INTERP_QUALIFIER_SMOOTH;
+ else
+ interpolation = INTERP_QUALIFIER_NONE;
+
+ if (interpolation != INTERP_QUALIFIER_NONE) {
+ if (mode != ir_var_shader_in && mode != ir_var_shader_out) {
+ _mesa_glsl_error(loc, state,
+ "interpolation qualifier `%s' can only be applied to "
+ "shader inputs or outputs.",
+ interpolation_string(interpolation));
+
+ }
+
+ if ((state->stage == MESA_SHADER_VERTEX && mode == ir_var_shader_in) ||
+ (state->stage == MESA_SHADER_FRAGMENT && mode == ir_var_shader_out)) {
+ _mesa_glsl_error(loc, state,
+ "interpolation qualifier `%s' cannot be applied to "
+ "vertex shader inputs or fragment shader outputs",
+ interpolation_string(interpolation));
+ }
+ }
+
+ return interpolation;
+ }
+
+
+ static void
+ apply_explicit_location(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
+ {
+ bool fail = false;
+
+ unsigned qual_location;
+ if (!process_qualifier_constant(state, loc, "location", qual->location,
+ &qual_location)) {
+ return;
+ }
+
+ /* Checks for GL_ARB_explicit_uniform_location. */
+ if (qual->flags.q.uniform) {
+ if (!state->check_explicit_uniform_location_allowed(loc, var))
+ return;
+
+ const struct gl_context *const ctx = state->ctx;
+ unsigned max_loc = qual_location + var->type->uniform_locations() - 1;
+
+ if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
+ _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
+ ">= MAX_UNIFORM_LOCATIONS (%u)", var->name,
+ ctx->Const.MaxUserAssignableUniformLocations);
+ return;
+ }
+
+ var->data.explicit_location = true;
+ var->data.location = qual_location;
+ return;
+ }
+
+ /* Between GL_ARB_explicit_attrib_location an
+ * GL_ARB_separate_shader_objects, the inputs and outputs of any shader
+ * stage can be assigned explicit locations. The checking here associates
+ * the correct extension with the correct stage's input / output:
+ *
+ * input output
+ * ----- ------
+ * vertex explicit_loc sso
+ * tess control sso sso
+ * tess eval sso sso
+ * geometry sso sso
+ * fragment sso explicit_loc
+ */
+ switch (state->stage) {
+ case MESA_SHADER_VERTEX:
+ if (var->data.mode == ir_var_shader_in) {
+ if (!state->check_explicit_attrib_location_allowed(loc, var))
+ return;
+
+ break;
+ }
+
+ if (var->data.mode == ir_var_shader_out) {
+ if (!state->check_separate_shader_objects_allowed(loc, var))
+ return;
+
+ break;
+ }
+
+ fail = true;
+ break;
+
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) {
+ if (!state->check_separate_shader_objects_allowed(loc, var))
+ return;
+
+ break;
+ }
+
+ fail = true;
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.mode == ir_var_shader_in) {
+ if (!state->check_separate_shader_objects_allowed(loc, var))
+ return;
+
+ break;
+ }
+
+ if (var->data.mode == ir_var_shader_out) {
+ if (!state->check_explicit_attrib_location_allowed(loc, var))
+ return;
+
+ break;
+ }
+
+ fail = true;
+ break;
+
+ case MESA_SHADER_COMPUTE:
+ _mesa_glsl_error(loc, state,
+ "compute shader variables cannot be given "
+ "explicit locations");
+ return;
+ };
+
+ if (fail) {
+ _mesa_glsl_error(loc, state,
+ "%s cannot be given an explicit location in %s shader",
+ mode_string(var),
+ _mesa_shader_stage_to_string(state->stage));
+ } else {
+ var->data.explicit_location = true;
+
+ switch (state->stage) {
+ case MESA_SHADER_VERTEX:
+ var->data.location = (var->data.mode == ir_var_shader_in)
+ ? (qual_location + VERT_ATTRIB_GENERIC0)
+ : (qual_location + VARYING_SLOT_VAR0);
+ break;
+
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ if (var->data.patch)
+ var->data.location = qual_location + VARYING_SLOT_PATCH0;
+ else
+ var->data.location = qual_location + VARYING_SLOT_VAR0;
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ var->data.location = (var->data.mode == ir_var_shader_out)
+ ? (qual_location + FRAG_RESULT_DATA0)
+ : (qual_location + VARYING_SLOT_VAR0);
+ break;
+ case MESA_SHADER_COMPUTE:
+ assert(!"Unexpected shader type");
+ break;
+ }
+
+ /* Check if index was set for the uniform instead of the function */
+ if (qual->flags.q.explicit_index && qual->flags.q.subroutine) {
+ _mesa_glsl_error(loc, state, "an index qualifier can only be "
+ "used with subroutine functions");
+ return;
+ }
+
+ unsigned qual_index;
+ if (qual->flags.q.explicit_index &&
+ process_qualifier_constant(state, loc, "index", qual->index,
+ &qual_index)) {
+ /* From the GLSL 4.30 specification, section 4.4.2 (Output
+ * Layout Qualifiers):
+ *
+ * "It is also a compile-time error if a fragment shader
+ * sets a layout index to less than 0 or greater than 1."
+ *
+ * Older specifications don't mandate a behavior; we take
+ * this as a clarification and always generate the error.
+ */
+ if (qual_index > 1) {
+ _mesa_glsl_error(loc, state,
+ "explicit index may only be 0 or 1");
+ } else {
+ var->data.explicit_index = true;
+ var->data.index = qual_index;
+ }
+ }
+ }
+ }
+
+ static void
+ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
+ {
+ const glsl_type *base_type = var->type->without_array();
+
+ if (base_type->is_image()) {
+ if (var->data.mode != ir_var_uniform &&
+ var->data.mode != ir_var_function_in) {
+ _mesa_glsl_error(loc, state, "image variables may only be declared as "
+ "function parameters or uniform-qualified "
+ "global variables");
+ }
+
+ var->data.image_read_only |= qual->flags.q.read_only;
+ var->data.image_write_only |= qual->flags.q.write_only;
+ var->data.image_coherent |= qual->flags.q.coherent;
+ var->data.image_volatile |= qual->flags.q._volatile;
+ var->data.image_restrict |= qual->flags.q.restrict_flag;
+ var->data.read_only = true;
+
+ if (qual->flags.q.explicit_image_format) {
+ if (var->data.mode == ir_var_function_in) {
+ _mesa_glsl_error(loc, state, "format qualifiers cannot be "
+ "used on image function parameters");
+ }
+
+ if (qual->image_base_type != base_type->sampler_type) {
+ _mesa_glsl_error(loc, state, "format qualifier doesn't match the "
+ "base data type of the image");
+ }
+
+ var->data.image_format = qual->image_format;
+ } else {
+ if (var->data.mode == ir_var_uniform) {
+ if (state->es_shader) {
+ _mesa_glsl_error(loc, state, "all image uniforms "
+ "must have a format layout qualifier");
+
+ } else if (!qual->flags.q.write_only) {
+ _mesa_glsl_error(loc, state, "image uniforms not qualified with "
+ "`writeonly' must have a format layout "
+ "qualifier");
+ }
+ }
+
+ var->data.image_format = GL_NONE;
+ }
+
+ /* From page 70 of the GLSL ES 3.1 specification:
+ *
+ * "Except for image variables qualified with the format qualifiers
+ * r32f, r32i, and r32ui, image variables must specify either memory
+ * qualifier readonly or the memory qualifier writeonly."
+ */
+ if (state->es_shader &&
+ var->data.image_format != GL_R32F &&
+ var->data.image_format != GL_R32I &&
+ var->data.image_format != GL_R32UI &&
+ !var->data.image_read_only &&
+ !var->data.image_write_only) {
+ _mesa_glsl_error(loc, state, "image variables of format other than "
+ "r32f, r32i or r32ui must be qualified `readonly' or "
+ "`writeonly'");
+ }
+
+ } else if (qual->flags.q.read_only ||
+ qual->flags.q.write_only ||
+ qual->flags.q.coherent ||
+ qual->flags.q._volatile ||
+ qual->flags.q.restrict_flag ||
+ qual->flags.q.explicit_image_format) {
+ _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to "
+ "images");
+ }
+ }
+
+ static inline const char*
+ get_layout_qualifier_string(bool origin_upper_left, bool pixel_center_integer)
+ {
+ if (origin_upper_left && pixel_center_integer)
+ return "origin_upper_left, pixel_center_integer";
+ else if (origin_upper_left)
+ return "origin_upper_left";
+ else if (pixel_center_integer)
+ return "pixel_center_integer";
+ else
+ return " ";
+ }
+
+ static inline bool
+ is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state,
+ const struct ast_type_qualifier *qual)
+ {
+ /* If gl_FragCoord was previously declared, and the qualifiers were
+ * different in any way, return true.
+ */
+ if (state->fs_redeclares_gl_fragcoord) {
+ return (state->fs_pixel_center_integer != qual->flags.q.pixel_center_integer
+ || state->fs_origin_upper_left != qual->flags.q.origin_upper_left);
+ }
+
+ return false;
+ }
+
+ static inline void
+ validate_array_dimensions(const glsl_type *t,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc) {
+ if (t->is_array()) {
+ t = t->fields.array;
+ while (t->is_array()) {
+ if (t->is_unsized_array()) {
+ _mesa_glsl_error(loc, state,
+ "only the outermost array dimension can "
+ "be unsized",
+ t->name);
+ break;
+ }
+ t = t->fields.array;
+ }
+ }
+ }
+
+ static void
+ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
+ {
+ if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
+
+ /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
+ *
+ * "Within any shader, the first redeclarations of gl_FragCoord
+ * must appear before any use of gl_FragCoord."
+ *
+ * Generate a compiler error if above condition is not met by the
+ * fragment shader.
+ */
+ ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
+ if (earlier != NULL &&
+ earlier->data.used &&
+ !state->fs_redeclares_gl_fragcoord) {
+ _mesa_glsl_error(loc, state,
+ "gl_FragCoord used before its first redeclaration "
+ "in fragment shader");
+ }
+
+ /* Make sure all gl_FragCoord redeclarations specify the same layout
+ * qualifiers.
+ */
+ if (is_conflicting_fragcoord_redeclaration(state, qual)) {
+ const char *const qual_string =
+ get_layout_qualifier_string(qual->flags.q.origin_upper_left,
+ qual->flags.q.pixel_center_integer);
+
+ const char *const state_string =
+ get_layout_qualifier_string(state->fs_origin_upper_left,
+ state->fs_pixel_center_integer);
+
+ _mesa_glsl_error(loc, state,
+ "gl_FragCoord redeclared with different layout "
+ "qualifiers (%s) and (%s) ",
+ state_string,
+ qual_string);
+ }
+ state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
+ state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
+ state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
+ !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
+ state->fs_redeclares_gl_fragcoord =
+ state->fs_origin_upper_left ||
+ state->fs_pixel_center_integer ||
+ state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
+ }
+
+ var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
+ var->data.origin_upper_left = qual->flags.q.origin_upper_left;
+ if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
+ && (strcmp(var->name, "gl_FragCoord") != 0)) {
+ const char *const qual_string = (qual->flags.q.origin_upper_left)
+ ? "origin_upper_left" : "pixel_center_integer";
+
+ _mesa_glsl_error(loc, state,
+ "layout qualifier `%s' can only be applied to "
+ "fragment shader input `gl_FragCoord'",
+ qual_string);
+ }
+
+ if (qual->flags.q.explicit_location) {
+ apply_explicit_location(qual, var, state, loc);
+ } else if (qual->flags.q.explicit_index) {
+ if (!qual->flags.q.subroutine_def)
+ _mesa_glsl_error(loc, state,
+ "explicit index requires explicit location");
+ }
+
+ if (qual->flags.q.explicit_binding) {
+ apply_explicit_binding(state, loc, var, var->type, qual);
+ }
+
+ if (state->stage == MESA_SHADER_GEOMETRY &&
+ qual->flags.q.out && qual->flags.q.stream) {
+ unsigned qual_stream;
+ if (process_qualifier_constant(state, loc, "stream", qual->stream,
+ &qual_stream) &&
+ validate_stream_qualifier(loc, state, qual_stream)) {
+ var->data.stream = qual_stream;
+ }
+ }
+
+ if (var->type->contains_atomic()) {
+ if (var->data.mode == ir_var_uniform) {
+ if (var->data.explicit_binding) {
+ unsigned *offset =
+ &state->atomic_counter_offsets[var->data.binding];
+
+ if (*offset % ATOMIC_COUNTER_SIZE)
+ _mesa_glsl_error(loc, state,
+ "misaligned atomic counter offset");
+
+ var->data.offset = *offset;
+ *offset += var->type->atomic_size();
+
+ } else {
+ _mesa_glsl_error(loc, state,
+ "atomic counters require explicit binding point");
+ }
+ } else if (var->data.mode != ir_var_function_in) {
+ _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
+ "function parameters or uniform-qualified "
+ "global variables");
+ }
+ }
+
+ /* Is the 'layout' keyword used with parameters that allow relaxed checking.
+ * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
+ * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
+ * allowed the layout qualifier to be used with 'varying' and 'attribute'.
+ * These extensions and all following extensions that add the 'layout'
+ * keyword have been modified to require the use of 'in' or 'out'.
+ *
+ * The following extension do not allow the deprecated keywords:
+ *
+ * GL_AMD_conservative_depth
+ * GL_ARB_conservative_depth
+ * GL_ARB_gpu_shader5
+ * GL_ARB_separate_shader_objects
+ * GL_ARB_tessellation_shader
+ * GL_ARB_transform_feedback3
+ * GL_ARB_uniform_buffer_object
+ *
+ * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
+ * allow layout with the deprecated keywords.
+ */
+ const bool relaxed_layout_qualifier_checking =
+ state->ARB_fragment_coord_conventions_enable;
+
+ const bool uses_deprecated_qualifier = qual->flags.q.attribute
+ || qual->flags.q.varying;
+ if (qual->has_layout() && uses_deprecated_qualifier) {
+ if (relaxed_layout_qualifier_checking) {
+ _mesa_glsl_warning(loc, state,
+ "`layout' qualifier may not be used with "
+ "`attribute' or `varying'");
+ } else {
+ _mesa_glsl_error(loc, state,
+ "`layout' qualifier may not be used with "
+ "`attribute' or `varying'");
+ }
+ }
+
+ /* Layout qualifiers for gl_FragDepth, which are enabled by extension
+ * AMD_conservative_depth.
+ */
+ int depth_layout_count = qual->flags.q.depth_any
+ + qual->flags.q.depth_greater
+ + qual->flags.q.depth_less
+ + qual->flags.q.depth_unchanged;
+ if (depth_layout_count > 0
+ && !state->AMD_conservative_depth_enable
+ && !state->ARB_conservative_depth_enable) {
+ _mesa_glsl_error(loc, state,
+ "extension GL_AMD_conservative_depth or "
+ "GL_ARB_conservative_depth must be enabled "
+ "to use depth layout qualifiers");
+ } else if (depth_layout_count > 0
+ && strcmp(var->name, "gl_FragDepth") != 0) {
+ _mesa_glsl_error(loc, state,
+ "depth layout qualifiers can be applied only to "
+ "gl_FragDepth");
+ } else if (depth_layout_count > 1
+ && strcmp(var->name, "gl_FragDepth") == 0) {
+ _mesa_glsl_error(loc, state,
+ "at most one depth layout qualifier can be applied to "
+ "gl_FragDepth");
+ }
+ if (qual->flags.q.depth_any)
+ var->data.depth_layout = ir_depth_layout_any;
+ else if (qual->flags.q.depth_greater)
+ var->data.depth_layout = ir_depth_layout_greater;
+ else if (qual->flags.q.depth_less)
+ var->data.depth_layout = ir_depth_layout_less;
+ else if (qual->flags.q.depth_unchanged)
+ var->data.depth_layout = ir_depth_layout_unchanged;
+ else
+ var->data.depth_layout = ir_depth_layout_none;
+
+ if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
+ qual->flags.q.packed ||
+ qual->flags.q.shared) {
+ _mesa_glsl_error(loc, state,
+ "uniform and shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform or shader storage blocks, not "
+ "members");
+ }
+
+ if (qual->flags.q.row_major || qual->flags.q.column_major) {
+ validate_matrix_layout_for_type(state, loc, var->type, var);
+ }
+
+ /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
+ * Inputs):
+ *
+ * "Fragment shaders also allow the following layout qualifier on in only
+ * (not with variable declarations)
+ * layout-qualifier-id
+ * early_fragment_tests
+ * [...]"
+ */
+ if (qual->flags.q.early_fragment_tests) {
+ _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
+ "valid in fragment shader input layout declaration.");
+ }
+ }
+
+ static void
+ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc,
+ bool is_parameter)
+ {
+ STATIC_ASSERT(sizeof(qual->flags.q) <= sizeof(qual->flags.i));
+
+ if (qual->flags.q.invariant) {
+ if (var->data.used) {
+ _mesa_glsl_error(loc, state,
+ "variable `%s' may not be redeclared "
+ "`invariant' after being used",
+ var->name);
+ } else {
+ var->data.invariant = 1;
+ }
+ }
+
+ if (qual->flags.q.precise) {
+ if (var->data.used) {
+ _mesa_glsl_error(loc, state,
+ "variable `%s' may not be redeclared "
+ "`precise' after being used",
+ var->name);
+ } else {
+ var->data.precise = 1;
+ }
+ }
+
+ if (qual->flags.q.subroutine && !qual->flags.q.uniform) {
+ _mesa_glsl_error(loc, state,
+ "`subroutine' may only be applied to uniforms, "
+ "subroutine type declarations, or function definitions");
+ }
+
+ if (qual->flags.q.constant || qual->flags.q.attribute
+ || qual->flags.q.uniform
+ || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT)))
+ var->data.read_only = 1;
+
+ if (qual->flags.q.centroid)
+ var->data.centroid = 1;
+
+ if (qual->flags.q.sample)
+ var->data.sample = 1;
+
+ /* Precision qualifiers do not hold any meaning in Desktop GLSL */
+ if (state->es_shader) {
+ var->data.precision =
+ select_gles_precision(qual->precision, var->type, state, loc);
+ }
+
+ if (qual->flags.q.patch)
+ var->data.patch = 1;
+
+ if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) {
+ var->type = glsl_type::error_type;
+ _mesa_glsl_error(loc, state,
+ "`attribute' variables may not be declared in the "
+ "%s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ }
+
+ /* Disallow layout qualifiers which may only appear on layout declarations. */
+ if (qual->flags.q.prim_type) {
+ _mesa_glsl_error(loc, state,
+ "Primitive type may only be specified on GS input or output "
+ "layout declaration, not on variables.");
+ }
+
+ /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says:
+ *
+ * "However, the const qualifier cannot be used with out or inout."
+ *
+ * The same section of the GLSL 4.40 spec further clarifies this saying:
+ *
+ * "The const qualifier cannot be used with out or inout, or a
+ * compile-time error results."
+ */
+ if (is_parameter && qual->flags.q.constant && qual->flags.q.out) {
+ _mesa_glsl_error(loc, state,
+ "`const' may not be applied to `out' or `inout' "
+ "function parameters");
+ }
+
+ /* If there is no qualifier that changes the mode of the variable, leave
+ * the setting alone.
+ */
+ assert(var->data.mode != ir_var_temporary);
+ if (qual->flags.q.in && qual->flags.q.out)
+ var->data.mode = ir_var_function_inout;
+ else if (qual->flags.q.in)
+ var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in;
+ else if (qual->flags.q.attribute
+ || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT)))
+ var->data.mode = ir_var_shader_in;
+ else if (qual->flags.q.out)
+ var->data.mode = is_parameter ? ir_var_function_out : ir_var_shader_out;
+ else if (qual->flags.q.varying && (state->stage == MESA_SHADER_VERTEX))
+ var->data.mode = ir_var_shader_out;
+ else if (qual->flags.q.uniform)
+ var->data.mode = ir_var_uniform;
+ else if (qual->flags.q.buffer)
+ var->data.mode = ir_var_shader_storage;
+ else if (qual->flags.q.shared_storage)
+ var->data.mode = ir_var_shader_shared;
+
+ if (!is_parameter && is_varying_var(var, state->stage)) {
+ /* User-defined ins/outs are not permitted in compute shaders. */
+ if (state->stage == MESA_SHADER_COMPUTE) {
+ _mesa_glsl_error(loc, state,
+ "user-defined input and output variables are not "
+ "permitted in compute shaders");
+ }
+
+ /* This variable is being used to link data between shader stages (in
+ * pre-glsl-1.30 parlance, it's a "varying"). Check that it has a type
+ * that is allowed for such purposes.
+ *
+ * From page 25 (page 31 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "The varying qualifier can be used only with the data types
+ * float, vec2, vec3, vec4, mat2, mat3, and mat4, or arrays of
+ * these."
+ *
+ * This was relaxed in GLSL version 1.30 and GLSL ES version 3.00. From
+ * page 31 (page 37 of the PDF) of the GLSL 1.30 spec:
+ *
+ * "Fragment inputs can only be signed and unsigned integers and
+ * integer vectors, float, floating-point vectors, matrices, or
+ * arrays of these. Structures cannot be input.
+ *
+ * Similar text exists in the section on vertex shader outputs.
+ *
+ * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES
+ * 3.00 spec allows structs as well. Varying structs are also allowed
+ * in GLSL 1.50.
+ */
+ switch (var->type->get_scalar_type()->base_type) {
+ case GLSL_TYPE_FLOAT:
+ /* Ok in all GLSL versions */
+ break;
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ if (state->is_version(130, 300))
+ break;
+ _mesa_glsl_error(loc, state,
+ "varying variables must be of base type float in %s",
+ state->get_version_string());
+ break;
+ case GLSL_TYPE_STRUCT:
+ if (state->is_version(150, 300))
+ break;
+ _mesa_glsl_error(loc, state,
+ "varying variables may not be of type struct");
+ break;
+ case GLSL_TYPE_DOUBLE:
+ break;
+ default:
+ _mesa_glsl_error(loc, state, "illegal type for a varying variable");
+ break;
+ }
+ }
+
+ if (state->all_invariant && (state->current_function == NULL)) {
+ switch (state->stage) {
+ case MESA_SHADER_VERTEX:
+ if (var->data.mode == ir_var_shader_out)
+ var->data.invariant = true;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ if ((var->data.mode == ir_var_shader_in)
+ || (var->data.mode == ir_var_shader_out))
+ var->data.invariant = true;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.mode == ir_var_shader_in)
+ var->data.invariant = true;
+ break;
+ case MESA_SHADER_COMPUTE:
+ /* Invariance isn't meaningful in compute shaders. */
+ break;
+ }
+ }
+
+ var->data.interpolation =
+ interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode,
+ state, loc);
+
+ /* Does the declaration use the deprecated 'attribute' or 'varying'
+ * keywords?
+ */
+ const bool uses_deprecated_qualifier = qual->flags.q.attribute
+ || qual->flags.q.varying;
+
+
+ /* Validate auxiliary storage qualifiers */
+
+ /* From section 4.3.4 of the GLSL 1.30 spec:
+ * "It is an error to use centroid in in a vertex shader."
+ *
+ * From section 4.3.4 of the GLSL ES 3.00 spec:
+ * "It is an error to use centroid in or interpolation qualifiers in
+ * a vertex shader input."
+ */
+
+ /* Section 4.3.6 of the GLSL 1.30 specification states:
+ * "It is an error to use centroid out in a fragment shader."
+ *
+ * The GL_ARB_shading_language_420pack extension specification states:
+ * "It is an error to use auxiliary storage qualifiers or interpolation
+ * qualifiers on an output in a fragment shader."
+ */
+ if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) {
+ _mesa_glsl_error(loc, state,
+ "sample qualifier may only be used on `in` or `out` "
+ "variables between shader stages");
+ }
+ if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) {
+ _mesa_glsl_error(loc, state,
+ "centroid qualifier may only be used with `in', "
+ "`out' or `varying' variables between shader stages");
+ }
+
+ if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) {
+ _mesa_glsl_error(loc, state,
+ "the shared storage qualifiers can only be used with "
+ "compute shaders");
+ }
+
+ apply_image_qualifier_to_variable(qual, var, state, loc);
+ }
+
+ /**
+ * Get the variable that is being redeclared by this declaration
+ *
+ * Semantic checks to verify the validity of the redeclaration are also
+ * performed. If semantic checks fail, compilation error will be emitted via
+ * \c _mesa_glsl_error, but a non-\c NULL pointer will still be returned.
+ *
+ * \returns
+ * A pointer to an existing variable in the current scope if the declaration
+ * is a redeclaration, \c NULL otherwise.
+ */
+ static ir_variable *
+ get_variable_being_redeclared(ir_variable *var, YYLTYPE loc,
+ struct _mesa_glsl_parse_state *state,
+ bool allow_all_redeclarations)
+ {
+ /* Check if this declaration is actually a re-declaration, either to
+ * resize an array or add qualifiers to an existing variable.
+ *
+ * This is allowed for variables in the current scope, or when at
+ * global scope (for built-ins in the implicit outer scope).
+ */
+ ir_variable *earlier = state->symbols->get_variable(var->name);
+ if (earlier == NULL ||
+ (state->current_function != NULL &&
+ !state->symbols->name_declared_this_scope(var->name))) {
+ return NULL;
+ }
+
+
+ /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec,
+ *
+ * "It is legal to declare an array without a size and then
+ * later re-declare the same name as an array of the same
+ * type and specify a size."
+ */
+ if (earlier->type->is_unsized_array() && var->type->is_array()
+ && (var->type->fields.array == earlier->type->fields.array)) {
+ /* FINISHME: This doesn't match the qualifiers on the two
+ * FINISHME: declarations. It's not 100% clear whether this is
+ * FINISHME: required or not.
+ */
+
+ const unsigned size = unsigned(var->type->array_size());
+ check_builtin_array_max_size(var->name, size, loc, state);
+ if ((size > 0) && (size <= earlier->data.max_array_access)) {
+ _mesa_glsl_error(& loc, state, "array size must be > %u due to "
+ "previous access",
+ earlier->data.max_array_access);
+ }
+
+ earlier->type = var->type;
+ delete var;
+ var = NULL;
+ } else if ((state->ARB_fragment_coord_conventions_enable ||
+ state->is_version(150, 0))
+ && strcmp(var->name, "gl_FragCoord") == 0
+ && earlier->type == var->type
+ && var->data.mode == ir_var_shader_in) {
+ /* Allow redeclaration of gl_FragCoord for ARB_fcc layout
+ * qualifiers.
+ */
+ earlier->data.origin_upper_left = var->data.origin_upper_left;
+ earlier->data.pixel_center_integer = var->data.pixel_center_integer;
+
+ /* According to section 4.3.7 of the GLSL 1.30 spec,
+ * the following built-in varaibles can be redeclared with an
+ * interpolation qualifier:
+ * * gl_FrontColor
+ * * gl_BackColor
+ * * gl_FrontSecondaryColor
+ * * gl_BackSecondaryColor
+ * * gl_Color
+ * * gl_SecondaryColor
+ */
+ } else if (state->is_version(130, 0)
+ && (strcmp(var->name, "gl_FrontColor") == 0
+ || strcmp(var->name, "gl_BackColor") == 0
+ || strcmp(var->name, "gl_FrontSecondaryColor") == 0
+ || strcmp(var->name, "gl_BackSecondaryColor") == 0
+ || strcmp(var->name, "gl_Color") == 0
+ || strcmp(var->name, "gl_SecondaryColor") == 0)
+ && earlier->type == var->type
+ && earlier->data.mode == var->data.mode) {
+ earlier->data.interpolation = var->data.interpolation;
+
+ /* Layout qualifiers for gl_FragDepth. */
+ } else if ((state->AMD_conservative_depth_enable ||
+ state->ARB_conservative_depth_enable)
+ && strcmp(var->name, "gl_FragDepth") == 0
+ && earlier->type == var->type
+ && earlier->data.mode == var->data.mode) {
+
+ /** From the AMD_conservative_depth spec:
+ * Within any shader, the first redeclarations of gl_FragDepth
+ * must appear before any use of gl_FragDepth.
+ */
+ if (earlier->data.used) {
+ _mesa_glsl_error(&loc, state,
+ "the first redeclaration of gl_FragDepth "
+ "must appear before any use of gl_FragDepth");
+ }
+
+ /* Prevent inconsistent redeclaration of depth layout qualifier. */
+ if (earlier->data.depth_layout != ir_depth_layout_none
+ && earlier->data.depth_layout != var->data.depth_layout) {
+ _mesa_glsl_error(&loc, state,
+ "gl_FragDepth: depth layout is declared here "
+ "as '%s, but it was previously declared as "
+ "'%s'",
+ depth_layout_string(var->data.depth_layout),
+ depth_layout_string(earlier->data.depth_layout));
+ }
+
+ earlier->data.depth_layout = var->data.depth_layout;
+
+ } else if (allow_all_redeclarations) {
+ if (earlier->data.mode != var->data.mode) {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of `%s' with incorrect qualifiers",
+ var->name);
+ } else if (earlier->type != var->type) {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of `%s' has incorrect type",
+ var->name);
+ }
+ } else {
+ _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
+ }
+
+ return earlier;
+ }
+
+ /**
+ * Generate the IR for an initializer in a variable declaration
+ */
+ ir_rvalue *
+ process_initializer(ir_variable *var, ast_declaration *decl,
+ ast_fully_specified_type *type,
+ exec_list *initializer_instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ ir_rvalue *result = NULL;
+
+ YYLTYPE initializer_loc = decl->initializer->get_location();
+
+ /* From page 24 (page 30 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "All uniform variables are read-only and are initialized either
+ * directly by an application via API commands, or indirectly by
+ * OpenGL."
+ */
+ if (var->data.mode == ir_var_uniform) {
+ state->check_version(120, 0, &initializer_loc,
+ "cannot initialize uniform %s",
+ var->name);
+ }
+
+ /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+ *
+ * "Buffer variables cannot have initializers."
+ */
+ if (var->data.mode == ir_var_shader_storage) {
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize buffer variable %s",
+ var->name);
+ }
+
+ /* From section 4.1.7 of the GLSL 4.40 spec:
+ *
+ * "Opaque variables [...] are initialized only through the
+ * OpenGL API; they cannot be declared with an initializer in a
+ * shader."
+ */
+ if (var->type->contains_opaque()) {
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize opaque variable %s",
+ var->name);
+ }
+
+ if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) {
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize %s shader input / %s %s",
+ _mesa_shader_stage_to_string(state->stage),
+ (state->stage == MESA_SHADER_VERTEX)
+ ? "attribute" : "varying",
+ var->name);
+ }
+
+ if (var->data.mode == ir_var_shader_out && state->current_function == NULL) {
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize %s shader output %s",
+ _mesa_shader_stage_to_string(state->stage),
+ var->name);
+ }
+
+ /* If the initializer is an ast_aggregate_initializer, recursively store
+ * type information from the LHS into it, so that its hir() function can do
+ * type checking.
+ */
+ if (decl->initializer->oper == ast_aggregate)
+ _mesa_ast_set_aggregate_type(var->type, decl->initializer);
+
+ ir_dereference *const lhs = new(state) ir_dereference_variable(var);
+ ir_rvalue *rhs = decl->initializer->hir(initializer_instructions, state);
+
+ /* Calculate the constant value if this is a const or uniform
+ * declaration.
+ *
+ * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says:
+ *
+ * "Declarations of globals without a storage qualifier, or with
+ * just the const qualifier, may include initializers, in which case
+ * they will be initialized before the first line of main() is
+ * executed. Such initializers must be a constant expression."
+ *
+ * The same section of the GLSL ES 3.00.4 spec has similar language.
+ */
+ if (type->qualifier.flags.q.constant
+ || type->qualifier.flags.q.uniform
+ || (state->es_shader && state->current_function == NULL)) {
+ ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
+ lhs, rhs, true);
+ if (new_rhs != NULL) {
+ rhs = new_rhs;
+
+ /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec
+ * says:
+ *
+ * "A constant expression is one of
+ *
+ * ...
+ *
+ * - an expression formed by an operator on operands that are
+ * all constant expressions, including getting an element of
+ * a constant array, or a field of a constant structure, or
+ * components of a constant vector. However, the sequence
+ * operator ( , ) and the assignment operators ( =, +=, ...)
+ * are not included in the operators that can create a
+ * constant expression."
+ *
+ * Section 12.43 (Sequence operator and constant expressions) says:
+ *
+ * "Should the following construct be allowed?
+ *
+ * float a[2,3];
+ *
+ * The expression within the brackets uses the sequence operator
+ * (',') and returns the integer 3 so the construct is declaring
+ * a single-dimensional array of size 3. In some languages, the
+ * construct declares a two-dimensional array. It would be
+ * preferable to make this construct illegal to avoid confusion.
+ *
+ * One possibility is to change the definition of the sequence
+ * operator so that it does not return a constant-expression and
+ * hence cannot be used to declare an array size.
+ *
+ * RESOLUTION: The result of a sequence operator is not a
+ * constant-expression."
+ *
+ * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec
+ * contains language almost identical to the section 4.3.3 in the
+ * GLSL ES 3.00.4 spec. This is a new limitation for these GLSL
+ * versions.
+ */
+ ir_constant *constant_value = rhs->constant_expression_value();
+ if (!constant_value ||
+ (state->is_version(430, 300) &&
+ decl->initializer->has_sequence_subexpression())) {
+ const char *const variable_mode =
+ (type->qualifier.flags.q.constant)
+ ? "const"
+ : ((type->qualifier.flags.q.uniform) ? "uniform" : "global");
+
+ /* If ARB_shading_language_420pack is enabled, initializers of
+ * const-qualified local variables do not have to be constant
+ * expressions. Const-qualified global variables must still be
+ * initialized with constant expressions.
+ */
+ if (!state->has_420pack()
+ || state->current_function == NULL) {
+ _mesa_glsl_error(& initializer_loc, state,
+ "initializer of %s variable `%s' must be a "
+ "constant expression",
+ variable_mode,
+ decl->identifier);
+ if (var->type->is_numeric()) {
+ /* Reduce cascading errors. */
+ var->constant_value = type->qualifier.flags.q.constant
+ ? ir_constant::zero(state, var->type) : NULL;
+ }
+ }
+ } else {
+ rhs = constant_value;
+ var->constant_value = type->qualifier.flags.q.constant
+ ? constant_value : NULL;
+ }
+ } else {
+ if (var->type->is_numeric()) {
+ /* Reduce cascading errors. */
+ var->constant_value = type->qualifier.flags.q.constant
+ ? ir_constant::zero(state, var->type) : NULL;
+ }
+ }
+ }
+
+ if (rhs && !rhs->type->is_error()) {
+ bool temp = var->data.read_only;
+ if (type->qualifier.flags.q.constant)
+ var->data.read_only = false;
+
+ /* Never emit code to initialize a uniform.
+ */
+ const glsl_type *initializer_type;
+ if (!type->qualifier.flags.q.uniform) {
+ do_assignment(initializer_instructions, state,
+ NULL,
+ lhs, rhs,
+ &result, true,
+ true,
+ type->get_location());
+ initializer_type = result->type;
+ } else
+ initializer_type = rhs->type;
+
+ var->constant_initializer = rhs->constant_expression_value();
+ var->data.has_initializer = true;
+
+ /* If the declared variable is an unsized array, it must inherrit
+ * its full type from the initializer. A declaration such as
+ *
+ * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0);
+ *
+ * becomes
+ *
+ * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0);
+ *
+ * The assignment generated in the if-statement (below) will also
+ * automatically handle this case for non-uniforms.
+ *
+ * If the declared variable is not an array, the types must
+ * already match exactly. As a result, the type assignment
+ * here can be done unconditionally. For non-uniforms the call
+ * to do_assignment can change the type of the initializer (via
+ * the implicit conversion rules). For uniforms the initializer
+ * must be a constant expression, and the type of that expression
+ * was validated above.
+ */
+ var->type = initializer_type;
+
+ var->data.read_only = temp;
+ }
+
+ return result;
+ }
+
+ static void
+ validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var,
+ unsigned num_vertices,
+ unsigned *size,
+ const char *var_category)
+ {
+ if (var->type->is_unsized_array()) {
+ /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says:
+ *
+ * All geometry shader input unsized array declarations will be
+ * sized by an earlier input layout qualifier, when present, as per
+ * the following table.
+ *
+ * Followed by a table mapping each allowed input layout qualifier to
+ * the corresponding input length.
+ *
+ * Similarly for tessellation control shader outputs.
+ */
+ if (num_vertices != 0)
+ var->type = glsl_type::get_array_instance(var->type->fields.array,
+ num_vertices);
+ } else {
+ /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec
+ * includes the following examples of compile-time errors:
+ *
+ * // code sequence within one shader...
+ * in vec4 Color1[]; // size unknown
+ * ...Color1.length()...// illegal, length() unknown
+ * in vec4 Color2[2]; // size is 2
+ * ...Color1.length()...// illegal, Color1 still has no size
+ * in vec4 Color3[3]; // illegal, input sizes are inconsistent
+ * layout(lines) in; // legal, input size is 2, matching
+ * in vec4 Color4[3]; // illegal, contradicts layout
+ * ...
+ *
+ * To detect the case illustrated by Color3, we verify that the size of
+ * an explicitly-sized array matches the size of any previously declared
+ * explicitly-sized array. To detect the case illustrated by Color4, we
+ * verify that the size of an explicitly-sized array is consistent with
+ * any previously declared input layout.
+ */
+ if (num_vertices != 0 && var->type->length != num_vertices) {
+ _mesa_glsl_error(&loc, state,
+ "%s size contradicts previously declared layout "
+ "(size is %u, but layout requires a size of %u)",
+ var_category, var->type->length, num_vertices);
+ } else if (*size != 0 && var->type->length != *size) {
+ _mesa_glsl_error(&loc, state,
+ "%s sizes are inconsistent (size is %u, but a "
+ "previous declaration has size %u)",
+ var_category, var->type->length, *size);
+ } else {
+ *size = var->type->length;
+ }
+ }
+ }
+
+ static void
+ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var)
+ {
+ unsigned num_vertices = 0;
+
+ if (state->tcs_output_vertices_specified) {
+ if (!state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices",
+ &num_vertices, false)) {
+ return;
+ }
+
+ if (num_vertices > state->Const.MaxPatchVertices) {
+ _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+ "GL_MAX_PATCH_VERTICES", num_vertices);
+ return;
+ }
+ }
+
+ if (!var->type->is_array() && !var->data.patch) {
+ _mesa_glsl_error(&loc, state,
+ "tessellation control shader outputs must be arrays");
+
+ /* To avoid cascading failures, short circuit the checks below. */
+ return;
+ }
+
+ if (var->data.patch)
+ return;
+
+ validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+ &state->tcs_output_size,
+ "tessellation control shader output");
+ }
+
+ /**
+ * Do additional processing necessary for tessellation control/evaluation shader
+ * input declarations. This covers both interface block arrays and bare input
+ * variables.
+ */
+ static void
+ handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var)
+ {
+ if (!var->type->is_array() && !var->data.patch) {
+ _mesa_glsl_error(&loc, state,
+ "per-vertex tessellation shader inputs must be arrays");
+ /* Avoid cascading failures. */
+ return;
+ }
+
+ if (var->data.patch)
+ return;
+
+ /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */
+ if (var->type->is_unsized_array()) {
+ var->type = glsl_type::get_array_instance(var->type->fields.array,
+ state->Const.MaxPatchVertices);
+ }
+ }
+
+
+ /**
+ * Do additional processing necessary for geometry shader input declarations
+ * (this covers both interface blocks arrays and bare input variables).
+ */
+ static void
+ handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var)
+ {
+ unsigned num_vertices = 0;
+
+ if (state->gs_input_prim_type_specified) {
+ num_vertices = vertices_per_prim(state->in_qualifier->prim_type);
+ }
+
+ /* Geometry shader input variables must be arrays. Caller should have
+ * reported an error for this.
+ */
+ if (!var->type->is_array()) {
+ assert(state->error);
+
+ /* To avoid cascading failures, short circuit the checks below. */
+ return;
+ }
+
+ validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+ &state->gs_input_size,
+ "geometry shader input");
+ }
+
+ void
+ validate_identifier(const char *identifier, YYLTYPE loc,
+ struct _mesa_glsl_parse_state *state)
+ {
+ /* From page 15 (page 21 of the PDF) of the GLSL 1.10 spec,
+ *
+ * "Identifiers starting with "gl_" are reserved for use by
+ * OpenGL, and may not be declared in a shader as either a
+ * variable or a function."
+ */
+ if (is_gl_identifier(identifier)) {
+ _mesa_glsl_error(&loc, state,
+ "identifier `%s' uses reserved `gl_' prefix",
+ identifier);
+ } else if (strstr(identifier, "__")) {
+ /* From page 14 (page 20 of the PDF) of the GLSL 1.10
+ * spec:
+ *
+ * "In addition, all identifiers containing two
+ * consecutive underscores (__) are reserved as
+ * possible future keywords."
+ *
+ * The intention is that names containing __ are reserved for internal
+ * use by the implementation, and names prefixed with GL_ are reserved
+ * for use by Khronos. Names simply containing __ are dangerous to use,
+ * but should be allowed.
+ *
+ * A future version of the GLSL specification will clarify this.
+ */
+ _mesa_glsl_warning(&loc, state,
+ "identifier `%s' uses reserved `__' string",
+ identifier);
+ }
+ }
+
+ ir_rvalue *
+ ast_declarator_list::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+ const struct glsl_type *decl_type;
+ const char *type_name = NULL;
+ ir_rvalue *result = NULL;
+ YYLTYPE loc = this->get_location();
+
+ /* From page 46 (page 52 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "To ensure that a particular output variable is invariant, it is
+ * necessary to use the invariant qualifier. It can either be used to
+ * qualify a previously declared variable as being invariant
+ *
+ * invariant gl_Position; // make existing gl_Position be invariant"
+ *
+ * In these cases the parser will set the 'invariant' flag in the declarator
+ * list, and the type will be NULL.
+ */
+ if (this->invariant) {
+ assert(this->type == NULL);
+
+ if (state->current_function != NULL) {
+ _mesa_glsl_error(& loc, state,
+ "all uses of `invariant' keyword must be at global "
+ "scope");
+ }
+
+ foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
+ assert(decl->array_specifier == NULL);
+ assert(decl->initializer == NULL);
+
+ ir_variable *const earlier =
+ state->symbols->get_variable(decl->identifier);
+ if (earlier == NULL) {
+ _mesa_glsl_error(& loc, state,
+ "undeclared variable `%s' cannot be marked "
+ "invariant", decl->identifier);
+ } else if (!is_varying_var(earlier, state->stage)) {
+ _mesa_glsl_error(&loc, state,
+ "`%s' cannot be marked invariant; interfaces between "
+ "shader stages only.", decl->identifier);
+ } else if (earlier->data.used) {
+ _mesa_glsl_error(& loc, state,
+ "variable `%s' may not be redeclared "
+ "`invariant' after being used",
+ earlier->name);
+ } else {
+ earlier->data.invariant = true;
+ }
+ }
+
+ /* Invariant redeclarations do not have r-values.
+ */
+ return NULL;
+ }
+
+ if (this->precise) {
+ assert(this->type == NULL);
+
+ foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
+ assert(decl->array_specifier == NULL);
+ assert(decl->initializer == NULL);
+
+ ir_variable *const earlier =
+ state->symbols->get_variable(decl->identifier);
+ if (earlier == NULL) {
+ _mesa_glsl_error(& loc, state,
+ "undeclared variable `%s' cannot be marked "
+ "precise", decl->identifier);
+ } else if (state->current_function != NULL &&
+ !state->symbols->name_declared_this_scope(decl->identifier)) {
+ /* Note: we have to check if we're in a function, since
+ * builtins are treated as having come from another scope.
+ */
+ _mesa_glsl_error(& loc, state,
+ "variable `%s' from an outer scope may not be "
+ "redeclared `precise' in this scope",
+ earlier->name);
+ } else if (earlier->data.used) {
+ _mesa_glsl_error(& loc, state,
+ "variable `%s' may not be redeclared "
+ "`precise' after being used",
+ earlier->name);
+ } else {
+ earlier->data.precise = true;
+ }
+ }
+
+ /* Precise redeclarations do not have r-values either. */
+ return NULL;
+ }
+
+ assert(this->type != NULL);
+ assert(!this->invariant);
+ assert(!this->precise);
+
+ /* The type specifier may contain a structure definition. Process that
+ * before any of the variable declarations.
+ */
+ (void) this->type->specifier->hir(instructions, state);
+
+ decl_type = this->type->glsl_type(& type_name, state);
+
+ /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+ * "Buffer variables may only be declared inside interface blocks
+ * (section 4.3.9 “Interface Blocks”), which are then referred to as
+ * shader storage blocks. It is a compile-time error to declare buffer
+ * variables at global scope (outside a block)."
+ */
+ if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) {
+ _mesa_glsl_error(&loc, state,
+ "buffer variables cannot be declared outside "
+ "interface blocks");
+ }
+
+ /* An offset-qualified atomic counter declaration sets the default
+ * offset for the next declaration within the same atomic counter
+ * buffer.
+ */
+ if (decl_type && decl_type->contains_atomic()) {
+ if (type->qualifier.flags.q.explicit_binding &&
+ type->qualifier.flags.q.explicit_offset) {
+ unsigned qual_binding;
+ unsigned qual_offset;
+ if (process_qualifier_constant(state, &loc, "binding",
+ type->qualifier.binding,
+ &qual_binding)
+ && process_qualifier_constant(state, &loc, "offset",
+ type->qualifier.offset,
+ &qual_offset)) {
+ state->atomic_counter_offsets[qual_binding] = qual_offset;
+ }
+ }
+ }
+
+ if (this->declarations.is_empty()) {
+ /* If there is no structure involved in the program text, there are two
+ * possible scenarios:
+ *
+ * - The program text contained something like 'vec4;'. This is an
+ * empty declaration. It is valid but weird. Emit a warning.
+ *
+ * - The program text contained something like 'S;' and 'S' is not the
+ * name of a known structure type. This is both invalid and weird.
+ * Emit an error.
+ *
+ * - The program text contained something like 'mediump float;'
+ * when the programmer probably meant 'precision mediump
+ * float;' Emit a warning with a description of what they
+ * probably meant to do.
+ *
+ * Note that if decl_type is NULL and there is a structure involved,
+ * there must have been some sort of error with the structure. In this
+ * case we assume that an error was already generated on this line of
+ * code for the structure. There is no need to generate an additional,
+ * confusing error.
+ */
+ assert(this->type->specifier->structure == NULL || decl_type != NULL
+ || state->error);
+
+ if (decl_type == NULL) {
+ _mesa_glsl_error(&loc, state,
+ "invalid type `%s' in empty declaration",
+ type_name);
+ } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) {
+ /* Empty atomic counter declarations are allowed and useful
+ * to set the default offset qualifier.
+ */
+ return NULL;
+ } else if (this->type->qualifier.precision != ast_precision_none) {
+ if (this->type->specifier->structure != NULL) {
+ _mesa_glsl_error(&loc, state,
+ "precision qualifiers can't be applied "
+ "to structures");
+ } else {
+ static const char *const precision_names[] = {
+ "highp",
+ "highp",
+ "mediump",
+ "lowp"
+ };
+
+ _mesa_glsl_warning(&loc, state,
+ "empty declaration with precision qualifier, "
+ "to set the default precision, use "
+ "`precision %s %s;'",
+ precision_names[this->type->qualifier.precision],
+ type_name);
+ }
+ } else if (this->type->specifier->structure == NULL) {
+ _mesa_glsl_warning(&loc, state, "empty declaration");
+ }
+ }
+
+ foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
+ const struct glsl_type *var_type;
+ ir_variable *var;
+ const char *identifier = decl->identifier;
+ /* FINISHME: Emit a warning if a variable declaration shadows a
+ * FINISHME: declaration at a higher scope.
+ */
+
+ if ((decl_type == NULL) || decl_type->is_void()) {
+ if (type_name != NULL) {
+ _mesa_glsl_error(& loc, state,
+ "invalid type `%s' in declaration of `%s'",
+ type_name, decl->identifier);
+ } else {
+ _mesa_glsl_error(& loc, state,
+ "invalid type in declaration of `%s'",
+ decl->identifier);
+ }
+ continue;
+ }
+
+ if (this->type->qualifier.flags.q.subroutine) {
+ const glsl_type *t;
+ const char *name;
+
+ t = state->symbols->get_type(this->type->specifier->type_name);
+ if (!t)
+ _mesa_glsl_error(& loc, state,
+ "invalid type in declaration of `%s'",
+ decl->identifier);
+ name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier);
+
+ identifier = name;
+
+ }
+ var_type = process_array_type(&loc, decl_type, decl->array_specifier,
+ state);
+
+ var = new(ctx) ir_variable(var_type, identifier, ir_var_auto);
+
+ /* The 'varying in' and 'varying out' qualifiers can only be used with
+ * ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support
+ * yet.
+ */
+ if (this->type->qualifier.flags.q.varying) {
+ if (this->type->qualifier.flags.q.in) {
+ _mesa_glsl_error(& loc, state,
+ "`varying in' qualifier in declaration of "
+ "`%s' only valid for geometry shaders using "
+ "ARB_geometry_shader4 or EXT_geometry_shader4",
+ decl->identifier);
+ } else if (this->type->qualifier.flags.q.out) {
+ _mesa_glsl_error(& loc, state,
+ "`varying out' qualifier in declaration of "
+ "`%s' only valid for geometry shaders using "
+ "ARB_geometry_shader4 or EXT_geometry_shader4",
+ decl->identifier);
+ }
+ }
+
+ /* From page 22 (page 28 of the PDF) of the GLSL 1.10 specification;
+ *
+ * "Global variables can only use the qualifiers const,
+ * attribute, uniform, or varying. Only one may be
+ * specified.
+ *
+ * Local variables can only use the qualifier const."
+ *
+ * This is relaxed in GLSL 1.30 and GLSL ES 3.00. It is also relaxed by
+ * any extension that adds the 'layout' keyword.
+ */
+ if (!state->is_version(130, 300)
+ && !state->has_explicit_attrib_location()
+ && !state->has_separate_shader_objects()
+ && !state->ARB_fragment_coord_conventions_enable) {
+ if (this->type->qualifier.flags.q.out) {
+ _mesa_glsl_error(& loc, state,
+ "`out' qualifier in declaration of `%s' "
+ "only valid for function parameters in %s",
+ decl->identifier, state->get_version_string());
+ }
+ if (this->type->qualifier.flags.q.in) {
+ _mesa_glsl_error(& loc, state,
+ "`in' qualifier in declaration of `%s' "
+ "only valid for function parameters in %s",
+ decl->identifier, state->get_version_string());
+ }
+ /* FINISHME: Test for other invalid qualifiers. */
+ }
+
+ apply_type_qualifier_to_variable(& this->type->qualifier, var, state,
+ & loc, false);
+ apply_layout_qualifier_to_variable(&this->type->qualifier, var, state,
+ &loc);
+
+ if (this->type->qualifier.flags.q.invariant) {
+ if (!is_varying_var(var, state->stage)) {
+ _mesa_glsl_error(&loc, state,
+ "`%s' cannot be marked invariant; interfaces between "
+ "shader stages only", var->name);
+ }
+ }
+
+ if (state->current_function != NULL) {
+ const char *mode = NULL;
+ const char *extra = "";
+
+ /* There is no need to check for 'inout' here because the parser will
+ * only allow that in function parameter lists.
+ */
+ if (this->type->qualifier.flags.q.attribute) {
+ mode = "attribute";
+ } else if (this->type->qualifier.flags.q.subroutine) {
+ mode = "subroutine uniform";
+ } else if (this->type->qualifier.flags.q.uniform) {
+ mode = "uniform";
+ } else if (this->type->qualifier.flags.q.varying) {
+ mode = "varying";
+ } else if (this->type->qualifier.flags.q.in) {
+ mode = "in";
+ extra = " or in function parameter list";
+ } else if (this->type->qualifier.flags.q.out) {
+ mode = "out";
+ extra = " or in function parameter list";
+ }
+
+ if (mode) {
+ _mesa_glsl_error(& loc, state,
+ "%s variable `%s' must be declared at "
+ "global scope%s",
+ mode, var->name, extra);
+ }
+ } else if (var->data.mode == ir_var_shader_in) {
+ var->data.read_only = true;
+
+ if (state->stage == MESA_SHADER_VERTEX) {
+ bool error_emitted = false;
+
+ /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "Vertex shader inputs can only be float, floating-point
+ * vectors, matrices, signed and unsigned integers and integer
+ * vectors. Vertex shader inputs can also form arrays of these
+ * types, but not structures."
+ *
+ * From page 31 (page 27 of the PDF) of the GLSL 1.30 spec:
+ *
+ * "Vertex shader inputs can only be float, floating-point
+ * vectors, matrices, signed and unsigned integers and integer
+ * vectors. They cannot be arrays or structures."
+ *
+ * From page 23 (page 29 of the PDF) of the GLSL 1.20 spec:
+ *
+ * "The attribute qualifier can be used only with float,
+ * floating-point vectors, and matrices. Attribute variables
+ * cannot be declared as arrays or structures."
+ *
+ * From page 33 (page 39 of the PDF) of the GLSL ES 3.00 spec:
+ *
+ * "Vertex shader inputs can only be float, floating-point
+ * vectors, matrices, signed and unsigned integers and integer
+ * vectors. Vertex shader inputs cannot be arrays or
+ * structures."
+ */
+ const glsl_type *check_type = var->type->without_array();
+
+ switch (check_type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ break;
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ if (state->is_version(120, 300))
+ break;
+ case GLSL_TYPE_DOUBLE:
+ if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable))
+ break;
+ /* FALLTHROUGH */
+ default:
+ _mesa_glsl_error(& loc, state,
+ "vertex shader input / attribute cannot have "
+ "type %s`%s'",
+ var->type->is_array() ? "array of " : "",
+ check_type->name);
+ error_emitted = true;
+ }
+
+ if (!error_emitted && var->type->is_array() &&
+ !state->check_version(150, 0, &loc,
+ "vertex shader input / attribute "
+ "cannot have array type")) {
+ error_emitted = true;
+ }
+ } else if (state->stage == MESA_SHADER_GEOMETRY) {
+ /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec:
+ *
+ * Geometry shader input variables get the per-vertex values
+ * written out by vertex shader output variables of the same
+ * names. Since a geometry shader operates on a set of
+ * vertices, each input varying variable (or input block, see
+ * interface blocks below) needs to be declared as an array.
+ */
+ if (!var->type->is_array()) {
+ _mesa_glsl_error(&loc, state,
+ "geometry shader inputs must be arrays");
+ }
+
+ handle_geometry_shader_input_decl(state, loc, var);
+ } else if (state->stage == MESA_SHADER_FRAGMENT) {
+ /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec:
+ *
+ * It is a compile-time error to declare a fragment shader
+ * input with, or that contains, any of the following types:
+ *
+ * * A boolean type
+ * * An opaque type
+ * * An array of arrays
+ * * An array of structures
+ * * A structure containing an array
+ * * A structure containing a structure
+ */
+ if (state->es_shader) {
+ const glsl_type *check_type = var->type->without_array();
+ if (check_type->is_boolean() ||
+ check_type->contains_opaque()) {
+ _mesa_glsl_error(&loc, state,
+ "fragment shader input cannot have type %s",
+ check_type->name);
+ }
+ if (var->type->is_array() &&
+ var->type->fields.array->is_array()) {
+ _mesa_glsl_error(&loc, state,
+ "%s shader output "
+ "cannot have an array of arrays",
+ _mesa_shader_stage_to_string(state->stage));
+ }
+ if (var->type->is_array() &&
+ var->type->fields.array->is_record()) {
+ _mesa_glsl_error(&loc, state,
+ "fragment shader input "
+ "cannot have an array of structs");
+ }
+ if (var->type->is_record()) {
+ for (unsigned i = 0; i < var->type->length; i++) {
+ if (var->type->fields.structure[i].type->is_array() ||
+ var->type->fields.structure[i].type->is_record())
+ _mesa_glsl_error(&loc, state,
+ "fragement shader input cannot have "
+ "a struct that contains an "
+ "array or struct");
+ }
+ }
+ }
+ } else if (state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL) {
+ handle_tess_shader_input_decl(state, loc, var);
+ }
+ } else if (var->data.mode == ir_var_shader_out) {
+ const glsl_type *check_type = var->type->without_array();
+
+ /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+ *
+ * It is a compile-time error to declare a vertex, tessellation
+ * evaluation, tessellation control, or geometry shader output
+ * that contains any of the following:
+ *
+ * * A Boolean type (bool, bvec2 ...)
+ * * An opaque type
+ */
+ if (check_type->is_boolean() || check_type->contains_opaque())
+ _mesa_glsl_error(&loc, state,
+ "%s shader output cannot have type %s",
+ _mesa_shader_stage_to_string(state->stage),
+ check_type->name);
+
+ /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+ *
+ * It is a compile-time error to declare a fragment shader output
+ * that contains any of the following:
+ *
+ * * A Boolean type (bool, bvec2 ...)
+ * * A double-precision scalar or vector (double, dvec2 ...)
+ * * An opaque type
+ * * Any matrix type
+ * * A structure
+ */
+ if (state->stage == MESA_SHADER_FRAGMENT) {
+ if (check_type->is_record() || check_type->is_matrix())
+ _mesa_glsl_error(&loc, state,
+ "fragment shader output "
+ "cannot have struct or matrix type");
+ switch (check_type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ break;
+ default:
+ _mesa_glsl_error(&loc, state,
+ "fragment shader output cannot have "
+ "type %s", check_type->name);
+ }
+ }
+
+ /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec:
+ *
+ * It is a compile-time error to declare a vertex shader output
+ * with, or that contains, any of the following types:
+ *
+ * * A boolean type
+ * * An opaque type
+ * * An array of arrays
+ * * An array of structures
+ * * A structure containing an array
+ * * A structure containing a structure
+ *
+ * It is a compile-time error to declare a fragment shader output
+ * with, or that contains, any of the following types:
+ *
+ * * A boolean type
+ * * An opaque type
+ * * A matrix
+ * * A structure
+ * * An array of array
+ */
+ if (state->es_shader) {
+ if (var->type->is_array() &&
+ var->type->fields.array->is_array()) {
+ _mesa_glsl_error(&loc, state,
+ "%s shader output "
+ "cannot have an array of arrays",
+ _mesa_shader_stage_to_string(state->stage));
+ }
+ if (state->stage == MESA_SHADER_VERTEX) {
+ if (var->type->is_array() &&
+ var->type->fields.array->is_record()) {
+ _mesa_glsl_error(&loc, state,
+ "vertex shader output "
+ "cannot have an array of structs");
+ }
+ if (var->type->is_record()) {
+ for (unsigned i = 0; i < var->type->length; i++) {
+ if (var->type->fields.structure[i].type->is_array() ||
+ var->type->fields.structure[i].type->is_record())
+ _mesa_glsl_error(&loc, state,
+ "vertex shader output cannot have a "
+ "struct that contains an "
+ "array or struct");
+ }
+ }
+ }
+ }
+
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ handle_tess_ctrl_shader_output_decl(state, loc, var);
+ }
+ } else if (var->type->contains_subroutine()) {
+ /* declare subroutine uniforms as hidden */
+ var->data.how_declared = ir_var_hidden;
+ }
+
+ /* Integer fragment inputs must be qualified with 'flat'. In GLSL ES,
+ * so must integer vertex outputs.
+ *
+ * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec:
+ * "Fragment shader inputs that are signed or unsigned integers or
+ * integer vectors must be qualified with the interpolation qualifier
+ * flat."
+ *
+ * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec:
+ * "Fragment shader inputs that are, or contain, signed or unsigned
+ * integers or integer vectors must be qualified with the
+ * interpolation qualifier flat."
+ *
+ * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec:
+ * "Vertex shader outputs that are, or contain, signed or unsigned
+ * integers or integer vectors must be qualified with the
+ * interpolation qualifier flat."
+ *
+ * Note that prior to GLSL 1.50, this requirement applied to vertex
+ * outputs rather than fragment inputs. That creates problems in the
+ * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all
+ * desktop GL shaders. For GLSL ES shaders, we follow the spec and
+ * apply the restriction to both vertex outputs and fragment inputs.
+ *
+ * Note also that the desktop GLSL specs are missing the text "or
+ * contain"; this is presumably an oversight, since there is no
+ * reasonable way to interpolate a fragment shader input that contains
+ * an integer.
+ */
+ if (state->is_version(130, 300) &&
+ var->type->contains_integer() &&
+ var->data.interpolation != INTERP_QUALIFIER_FLAT &&
+ ((state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_in)
+ || (state->stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_out
+ && state->es_shader))) {
+ const char *var_type = (state->stage == MESA_SHADER_VERTEX) ?
+ "vertex output" : "fragment input";
+ _mesa_glsl_error(&loc, state, "if a %s is (or contains) "
+ "an integer, then it must be qualified with 'flat'",
+ var_type);
+ }
+
+ /* Double fragment inputs must be qualified with 'flat'. */
+ if (var->type->contains_double() &&
+ var->data.interpolation != INTERP_QUALIFIER_FLAT &&
+ state->stage == MESA_SHADER_FRAGMENT &&
+ var->data.mode == ir_var_shader_in) {
+ _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) "
+ "a double, then it must be qualified with 'flat'",
+ var_type);
+ }
+
+ /* Interpolation qualifiers cannot be applied to 'centroid' and
+ * 'centroid varying'.
+ *
+ * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec:
+ * "interpolation qualifiers may only precede the qualifiers in,
+ * centroid in, out, or centroid out in a declaration. They do not apply
+ * to the deprecated storage qualifiers varying or centroid varying."
+ *
+ * These deprecated storage qualifiers do not exist in GLSL ES 3.00.
+ */
+ if (state->is_version(130, 0)
+ && this->type->qualifier.has_interpolation()
+ && this->type->qualifier.flags.q.varying) {
+
+ const char *i = this->type->qualifier.interpolation_string();
+ assert(i != NULL);
+ const char *s;
+ if (this->type->qualifier.flags.q.centroid)
+ s = "centroid varying";
+ else
+ s = "varying";
+
+ _mesa_glsl_error(&loc, state,
+ "qualifier '%s' cannot be applied to the "
+ "deprecated storage qualifier '%s'", i, s);
+ }
+
+
+ /* Interpolation qualifiers can only apply to vertex shader outputs and
+ * fragment shader inputs.
+ *
+ * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec:
+ * "Outputs from a vertex shader (out) and inputs to a fragment
+ * shader (in) can be further qualified with one or more of these
+ * interpolation qualifiers"
+ *
+ * From page 31 (page 37 of the PDF) of the GLSL ES 3.00 spec:
+ * "These interpolation qualifiers may only precede the qualifiers
+ * in, centroid in, out, or centroid out in a declaration. They do
+ * not apply to inputs into a vertex shader or outputs from a
+ * fragment shader."
+ */
+ if (state->is_version(130, 300)
+ && this->type->qualifier.has_interpolation()) {
+
+ const char *i = this->type->qualifier.interpolation_string();
+ assert(i != NULL);
+
+ switch (state->stage) {
+ case MESA_SHADER_VERTEX:
+ if (this->type->qualifier.flags.q.in) {
+ _mesa_glsl_error(&loc, state,
+ "qualifier '%s' cannot be applied to vertex "
+ "shader inputs", i);
+ }
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (this->type->qualifier.flags.q.out) {
+ _mesa_glsl_error(&loc, state,
+ "qualifier '%s' cannot be applied to fragment "
+ "shader outputs", i);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+
+ /* From section 4.3.4 of the GLSL 4.00 spec:
+ * "Input variables may not be declared using the patch in qualifier
+ * in tessellation control or geometry shaders."
+ *
+ * From section 4.3.6 of the GLSL 4.00 spec:
+ * "It is an error to use patch out in a vertex, tessellation
+ * evaluation, or geometry shader."
+ *
+ * This doesn't explicitly forbid using them in a fragment shader, but
+ * that's probably just an oversight.
+ */
+ if (state->stage != MESA_SHADER_TESS_EVAL
+ && this->type->qualifier.flags.q.patch
+ && this->type->qualifier.flags.q.in) {
+
+ _mesa_glsl_error(&loc, state, "'patch in' can only be used in a "
+ "tessellation evaluation shader");
+ }
+
+ if (state->stage != MESA_SHADER_TESS_CTRL
+ && this->type->qualifier.flags.q.patch
+ && this->type->qualifier.flags.q.out) {
+
+ _mesa_glsl_error(&loc, state, "'patch out' can only be used in a "
+ "tessellation control shader");
+ }
+
+ /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30.
+ */
+ if (this->type->qualifier.precision != ast_precision_none) {
+ state->check_precision_qualifiers_allowed(&loc);
+ }
+
+
+ /* If a precision qualifier is allowed on a type, it is allowed on
+ * an array of that type.
+ */
+ if (!(this->type->qualifier.precision == ast_precision_none
+ || precision_qualifier_allowed(var->type->without_array()))) {
+
+ _mesa_glsl_error(&loc, state,
+ "precision qualifiers apply only to floating point"
+ ", integer and opaque types");
+ }
+
+ /* From section 4.1.7 of the GLSL 4.40 spec:
+ *
+ * "[Opaque types] can only be declared as function
+ * parameters or uniform-qualified variables."
+ */
+ if (var_type->contains_opaque() &&
+ !this->type->qualifier.flags.q.uniform) {
+ _mesa_glsl_error(&loc, state,
+ "opaque variables must be declared uniform");
+ }
+
+ /* Process the initializer and add its instructions to a temporary
+ * list. This list will be added to the instruction stream (below) after
+ * the declaration is added. This is done because in some cases (such as
+ * redeclarations) the declaration may not actually be added to the
+ * instruction stream.
+ */
+ exec_list initializer_instructions;
+
+ /* Examine var name here since var may get deleted in the next call */
+ bool var_is_gl_id = is_gl_identifier(var->name);
+
+ ir_variable *earlier =
+ get_variable_being_redeclared(var, decl->get_location(), state,
+ false /* allow_all_redeclarations */);
+ if (earlier != NULL) {
+ if (var_is_gl_id &&
+ earlier->data.how_declared == ir_var_declared_in_block) {
+ _mesa_glsl_error(&loc, state,
+ "`%s' has already been redeclared using "
+ "gl_PerVertex", earlier->name);
+ }
+ earlier->data.how_declared = ir_var_declared_normally;
+ }
+
+ if (decl->initializer != NULL) {
+ result = process_initializer((earlier == NULL) ? var : earlier,
+ decl, this->type,
+ &initializer_instructions, state);
+ } else {
+ validate_array_dimensions(var_type, state, &loc);
+ }
+
+ /* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "It is an error to write to a const variable outside of
+ * its declaration, so they must be initialized when
+ * declared."
+ */
+ if (this->type->qualifier.flags.q.constant && decl->initializer == NULL) {
+ _mesa_glsl_error(& loc, state,
+ "const declaration of `%s' must be initialized",
+ decl->identifier);
+ }
+
+ if (state->es_shader) {
+ const glsl_type *const t = (earlier == NULL)
+ ? var->type : earlier->type;
+
+ if (t->is_unsized_array())
+ /* Section 10.17 of the GLSL ES 1.00 specification states that
+ * unsized array declarations have been removed from the language.
+ * Arrays that are sized using an initializer are still explicitly
+ * sized. However, GLSL ES 1.00 does not allow array
+ * initializers. That is only allowed in GLSL ES 3.00.
+ *
+ * Section 4.1.9 (Arrays) of the GLSL ES 3.00 spec says:
+ *
+ * "An array type can also be formed without specifying a size
+ * if the definition includes an initializer:
+ *
+ * float x[] = float[2] (1.0, 2.0); // declares an array of size 2
+ * float y[] = float[] (1.0, 2.0, 3.0); // declares an array of size 3
+ *
+ * float a[5];
+ * float b[] = a;"
+ */
+ _mesa_glsl_error(& loc, state,
+ "unsized array declarations are not allowed in "
+ "GLSL ES");
+ }
+
+ /* If the declaration is not a redeclaration, there are a few additional
+ * semantic checks that must be applied. In addition, variable that was
+ * created for the declaration should be added to the IR stream.
+ */
+ if (earlier == NULL) {
+ validate_identifier(decl->identifier, loc, state);
+
+ /* Add the variable to the symbol table. Note that the initializer's
+ * IR was already processed earlier (though it hasn't been emitted
+ * yet), without the variable in scope.
+ *
+ * This differs from most C-like languages, but it follows the GLSL
+ * specification. From page 28 (page 34 of the PDF) of the GLSL 1.50
+ * spec:
+ *
+ * "Within a declaration, the scope of a name starts immediately
+ * after the initializer if present or immediately after the name
+ * being declared if not."
+ */
+ if (!state->symbols->add_variable(var)) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state, "name `%s' already taken in the "
+ "current scope", decl->identifier);
+ continue;
+ }
+
+ /* Push the variable declaration to the top. It means that all the
+ * variable declarations will appear in a funny last-to-first order,
+ * but otherwise we run into trouble if a function is prototyped, a
+ * global var is decled, then the function is defined with usage of
+ * the global var. See glslparsertest's CorrectModule.frag.
+ */
+ instructions->push_head(var);
+ }
+
+ instructions->append_list(&initializer_instructions);
+ }
+
+
+ /* Generally, variable declarations do not have r-values. However,
+ * one is used for the declaration in
+ *
+ * while (bool b = some_condition()) {
+ * ...
+ * }
+ *
+ * so we return the rvalue from the last seen declaration here.
+ */
+ return result;
+ }
+
+
+ ir_rvalue *
+ ast_parameter_declarator::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+ const struct glsl_type *type;
+ const char *name = NULL;
+ YYLTYPE loc = this->get_location();
+
+ type = this->type->glsl_type(& name, state);
+
+ if (type == NULL) {
+ if (name != NULL) {
+ _mesa_glsl_error(& loc, state,
+ "invalid type `%s' in declaration of `%s'",
+ name, this->identifier);
+ } else {
+ _mesa_glsl_error(& loc, state,
+ "invalid type in declaration of `%s'",
+ this->identifier);
+ }
+
+ type = glsl_type::error_type;
+ }
+
+ /* From page 62 (page 68 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "Functions that accept no input arguments need not use void in the
+ * argument list because prototypes (or definitions) are required and
+ * therefore there is no ambiguity when an empty argument list "( )" is
+ * declared. The idiom "(void)" as a parameter list is provided for
+ * convenience."
+ *
+ * Placing this check here prevents a void parameter being set up
+ * for a function, which avoids tripping up checks for main taking
+ * parameters and lookups of an unnamed symbol.
+ */
+ if (type->is_void()) {
+ if (this->identifier != NULL)
+ _mesa_glsl_error(& loc, state,
+ "named parameter cannot have type `void'");
+
+ is_void = true;
+ return NULL;
+ }
+
+ if (formal_parameter && (this->identifier == NULL)) {
+ _mesa_glsl_error(& loc, state, "formal parameter lacks a name");
+ return NULL;
+ }
+
+ /* This only handles "vec4 foo[..]". The earlier specifier->glsl_type(...)
+ * call already handled the "vec4[..] foo" case.
+ */
+ type = process_array_type(&loc, type, this->array_specifier, state);
+
+ if (!type->is_error() && type->is_unsized_array()) {
+ _mesa_glsl_error(&loc, state, "arrays passed as parameters must have "
+ "a declared size");
+ type = glsl_type::error_type;
+ }
+
+ is_void = false;
+ ir_variable *var = new(ctx)
+ ir_variable(type, this->identifier, ir_var_function_in);
+
+ /* Apply any specified qualifiers to the parameter declaration. Note that
+ * for function parameters the default mode is 'in'.
+ */
+ apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc,
+ true);
+
+ /* From section 4.1.7 of the GLSL 4.40 spec:
+ *
+ * "Opaque variables cannot be treated as l-values; hence cannot
+ * be used as out or inout function parameters, nor can they be
+ * assigned into."
+ */
+ if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out)
+ && type->contains_opaque()) {
+ _mesa_glsl_error(&loc, state, "out and inout parameters cannot "
+ "contain opaque variables");
+ type = glsl_type::error_type;
+ }
+
+ /* From page 39 (page 45 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "When calling a function, expressions that do not evaluate to
+ * l-values cannot be passed to parameters declared as out or inout."
+ *
+ * From page 32 (page 38 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "Other binary or unary expressions, non-dereferenced arrays,
+ * function names, swizzles with repeated fields, and constants
+ * cannot be l-values."
+ *
+ * So for GLSL 1.10, passing an array as an out or inout parameter is not
+ * allowed. This restriction is removed in GLSL 1.20, and in GLSL ES.
+ */
+ if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out)
+ && type->is_array()
+ && !state->check_version(120, 100, &loc,
+ "arrays cannot be out or inout parameters")) {
+ type = glsl_type::error_type;
+ }
+
+ instructions->push_tail(var);
+
+ /* Parameter declarations do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ void
+ ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters,
+ bool formal,
+ exec_list *ir_parameters,
+ _mesa_glsl_parse_state *state)
+ {
+ ast_parameter_declarator *void_param = NULL;
+ unsigned count = 0;
+
+ foreach_list_typed (ast_parameter_declarator, param, link, ast_parameters) {
+ param->formal_parameter = formal;
+ param->hir(ir_parameters, state);
+
+ if (param->is_void)
+ void_param = param;
+
+ count++;
+ }
+
+ if ((void_param != NULL) && (count > 1)) {
+ YYLTYPE loc = void_param->get_location();
+
+ _mesa_glsl_error(& loc, state,
+ "`void' parameter must be only parameter");
+ }
+ }
+
+
+ void
+ emit_function(_mesa_glsl_parse_state *state, ir_function *f)
+ {
+ /* IR invariants disallow function declarations or definitions
+ * nested within other function definitions. But there is no
+ * requirement about the relative order of function declarations
+ * and definitions with respect to one another. So simply insert
+ * the new ir_function block at the end of the toplevel instruction
+ * list.
+ */
+ state->toplevel_ir->push_tail(f);
+ }
+
+
+ ir_rvalue *
+ ast_function::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+ ir_function *f = NULL;
+ ir_function_signature *sig = NULL;
+ exec_list hir_parameters;
+ YYLTYPE loc = this->get_location();
+
+ const char *const name = identifier;
+
+ /* New functions are always added to the top-level IR instruction stream,
+ * so this instruction list pointer is ignored. See also emit_function
+ * (called below).
+ */
+ (void) instructions;
+
+ /* From page 21 (page 27 of the PDF) of the GLSL 1.20 spec,
+ *
+ * "Function declarations (prototypes) cannot occur inside of functions;
+ * they must be at global scope, or for the built-in functions, outside
+ * the global scope."
+ *
+ * From page 27 (page 33 of the PDF) of the GLSL ES 1.00.16 spec,
+ *
+ * "User defined functions may only be defined within the global scope."
+ *
+ * Note that this language does not appear in GLSL 1.10.
+ */
+ if ((state->current_function != NULL) &&
+ state->is_version(120, 100)) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state,
+ "declaration of function `%s' not allowed within "
+ "function body", name);
+ }
+
+ validate_identifier(name, this->get_location(), state);
+
+ /* Convert the list of function parameters to HIR now so that they can be
+ * used below to compare this function's signature with previously seen
+ * signatures for functions with the same name.
+ */
+ ast_parameter_declarator::parameters_to_hir(& this->parameters,
+ is_definition,
+ & hir_parameters, state);
+
+ const char *return_type_name;
+ const glsl_type *return_type =
+ this->return_type->glsl_type(& return_type_name, state);
+
+ if (!return_type) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state,
+ "function `%s' has undeclared return type `%s'",
+ name, return_type_name);
+ return_type = glsl_type::error_type;
+ }
+
+ /* ARB_shader_subroutine states:
+ * "Subroutine declarations cannot be prototyped. It is an error to prepend
+ * subroutine(...) to a function declaration."
+ */
+ if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state,
+ "function declaration `%s' cannot have subroutine prepended",
+ name);
+ }
+
+ /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
+ * "No qualifier is allowed on the return type of a function."
+ */
+ if (this->return_type->has_qualifiers(state)) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(& loc, state,
+ "function `%s' return type has qualifiers", name);
+ }
+
+ /* Section 6.1 (Function Definitions) of the GLSL 1.20 spec says:
+ *
+ * "Arrays are allowed as arguments and as the return type. In both
+ * cases, the array must be explicitly sized."
+ */
+ if (return_type->is_unsized_array()) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(& loc, state,
+ "function `%s' return type array must be explicitly "
+ "sized", name);
+ }
+
+ /* From section 4.1.7 of the GLSL 4.40 spec:
+ *
+ * "[Opaque types] can only be declared as function parameters
+ * or uniform-qualified variables."
+ */
+ if (return_type->contains_opaque()) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state,
+ "function `%s' return type can't contain an opaque type",
+ name);
+ }
+
+ /* Create an ir_function if one doesn't already exist. */
+ f = state->symbols->get_function(name);
+ if (f == NULL) {
+ f = new(ctx) ir_function(name);
+ if (!this->return_type->qualifier.flags.q.subroutine) {
+ if (!state->symbols->add_function(f)) {
+ /* This function name shadows a non-function use of the same name. */
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state, "function name `%s' conflicts with "
+ "non-function", name);
+ return NULL;
+ }
+ }
+ emit_function(state, f);
+ }
+
+ /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71:
+ *
+ * "A shader cannot redefine or overload built-in functions."
+ *
+ * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions":
+ *
+ * "User code can overload the built-in functions but cannot redefine
+ * them."
+ */
+ if (state->es_shader && state->language_version >= 300) {
+ /* Local shader has no exact candidates; check the built-ins. */
+ _mesa_glsl_initialize_builtin_functions();
+ if (_mesa_glsl_find_builtin_function_by_name(name)) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(& loc, state,
+ "A shader cannot redefine or overload built-in "
+ "function `%s' in GLSL ES 3.00", name);
+ return NULL;
+ }
+ }
+
+ /* Verify that this function's signature either doesn't match a previously
+ * seen signature for a function with the same name, or, if a match is found,
+ * that the previously seen signature does not have an associated definition.
+ */
+ if (state->es_shader || f->has_user_signature()) {
+ sig = f->exact_matching_signature(state, &hir_parameters);
+ if (sig != NULL) {
+ const char *badvar = sig->qualifiers_match(&hir_parameters);
+ if (badvar != NULL) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(&loc, state, "function `%s' parameter `%s' "
+ "qualifiers don't match prototype", name, badvar);
+ }
+
+ if (sig->return_type != return_type) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(&loc, state, "function `%s' return type doesn't "
+ "match prototype", name);
+ }
+
+ if (sig->is_defined) {
+ if (is_definition) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+ } else {
+ /* We just encountered a prototype that exactly matches a
+ * function that's already been defined. This is redundant,
+ * and we should ignore it.
+ */
+ return NULL;
+ }
+ }
+ }
+ }
+
+ /* Verify the return type of main() */
+ if (strcmp(name, "main") == 0) {
+ if (! return_type->is_void()) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(& loc, state, "main() must return void");
+ }
+
+ if (!hir_parameters.is_empty()) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(& loc, state, "main() must not take any parameters");
+ }
+ }
+
+ /* Finish storing the information about this new function in its signature.
+ */
+ if (sig == NULL) {
+ sig = new(ctx) ir_function_signature(return_type);
+ f->add_signature(sig);
+ }
+
+ sig->replace_parameters(&hir_parameters);
+ signature = sig;
+
+ if (this->return_type->qualifier.flags.q.subroutine_def) {
+ int idx;
+
+ if (this->return_type->qualifier.flags.q.explicit_index) {
+ unsigned qual_index;
+ if (process_qualifier_constant(state, &loc, "index",
+ this->return_type->qualifier.index,
+ &qual_index)) {
+ if (!state->has_explicit_uniform_location()) {
+ _mesa_glsl_error(&loc, state, "subroutine index requires "
+ "GL_ARB_explicit_uniform_location or "
+ "GLSL 4.30");
+ } else if (qual_index >= MAX_SUBROUTINES) {
+ _mesa_glsl_error(&loc, state,
+ "invalid subroutine index (%d) index must "
+ "be a number between 0 and "
+ "GL_MAX_SUBROUTINES - 1 (%d)", qual_index,
+ MAX_SUBROUTINES - 1);
+ } else {
+ f->subroutine_index = qual_index;
+ }
+ }
+ }
+
+ f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
+ f->subroutine_types = ralloc_array(state, const struct glsl_type *,
+ f->num_subroutine_types);
+ idx = 0;
+ foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) {
+ const struct glsl_type *type;
+ /* the subroutine type must be already declared */
+ type = state->symbols->get_type(decl->identifier);
+ if (!type) {
+ _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier);
+ }
+ f->subroutine_types[idx++] = type;
+ }
+ state->subroutines = (ir_function **)reralloc(state, state->subroutines,
+ ir_function *,
+ state->num_subroutines + 1);
+ state->subroutines[state->num_subroutines] = f;
+ state->num_subroutines++;
+
+ }
+
+ if (this->return_type->qualifier.flags.q.subroutine) {
+ if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) {
+ _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier);
+ return NULL;
+ }
+ state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types,
+ ir_function *,
+ state->num_subroutine_types + 1);
+ state->subroutine_types[state->num_subroutine_types] = f;
+ state->num_subroutine_types++;
+
+ f->is_subroutine = true;
+ }
+
+ /* Function declarations (prototypes) do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_function_definition::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ prototype->is_definition = true;
+ prototype->hir(instructions, state);
+
+ ir_function_signature *signature = prototype->signature;
+ if (signature == NULL)
+ return NULL;
+
+ assert(state->current_function == NULL);
+ state->current_function = signature;
+ state->found_return = false;
+
+ /* Duplicate parameters declared in the prototype as concrete variables.
+ * Add these to the symbol table.
+ */
+ state->symbols->push_scope();
+ foreach_in_list(ir_variable, var, &signature->parameters) {
+ assert(var->as_variable() != NULL);
+
+ /* The only way a parameter would "exist" is if two parameters have
+ * the same name.
+ */
+ if (state->symbols->name_declared_this_scope(var->name)) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(& loc, state, "parameter `%s' redeclared", var->name);
+ } else {
+ state->symbols->add_variable(var);
+ }
+ }
+
+ /* Convert the body of the function to HIR. */
+ this->body->hir(&signature->body, state);
+ signature->is_defined = true;
+
+ state->symbols->pop_scope();
+
+ assert(state->current_function == signature);
+ state->current_function = NULL;
+
+ if (!signature->return_type->is_void() && !state->found_return) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(& loc, state, "function `%s' has non-void return type "
+ "%s, but no return statement",
+ signature->function_name(),
+ signature->return_type->name);
+ }
+
+ /* Function definitions do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_jump_statement::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+
+ switch (mode) {
+ case ast_return: {
+ ir_return *inst;
+ assert(state->current_function);
+
+ if (opt_return_value) {
+ ir_rvalue *ret = opt_return_value->hir(instructions, state);
+
+ /* The value of the return type can be NULL if the shader says
+ * 'return foo();' and foo() is a function that returns void.
+ *
+ * NOTE: The GLSL spec doesn't say that this is an error. The type
+ * of the return value is void. If the return type of the function is
+ * also void, then this should compile without error. Seriously.
+ */
+ const glsl_type *const ret_type =
+ (ret == NULL) ? glsl_type::void_type : ret->type;
+
+ /* Implicit conversions are not allowed for return values prior to
+ * ARB_shading_language_420pack.
+ */
+ if (state->current_function->return_type != ret_type) {
+ YYLTYPE loc = this->get_location();
+
+ if (state->has_420pack()) {
+ if (!apply_implicit_conversion(state->current_function->return_type,
+ ret, state)) {
+ _mesa_glsl_error(& loc, state,
+ "could not implicitly convert return value "
+ "to %s, in function `%s'",
+ state->current_function->return_type->name,
+ state->current_function->function_name());
+ }
+ } else {
+ _mesa_glsl_error(& loc, state,
+ "`return' with wrong type %s, in function `%s' "
+ "returning %s",
+ ret_type->name,
+ state->current_function->function_name(),
+ state->current_function->return_type->name);
+ }
+ } else if (state->current_function->return_type->base_type ==
+ GLSL_TYPE_VOID) {
+ YYLTYPE loc = this->get_location();
+
+ /* The ARB_shading_language_420pack, GLSL ES 3.0, and GLSL 4.20
+ * specs add a clarification:
+ *
+ * "A void function can only use return without a return argument, even if
+ * the return argument has void type. Return statements only accept values:
+ *
+ * void func1() { }
+ * void func2() { return func1(); } // illegal return statement"
+ */
+ _mesa_glsl_error(& loc, state,
+ "void functions can only use `return' without a "
+ "return argument");
+ }
+
+ inst = new(ctx) ir_return(ret);
+ } else {
+ if (state->current_function->return_type->base_type !=
+ GLSL_TYPE_VOID) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(& loc, state,
+ "`return' with no value, in function %s returning "
+ "non-void",
+ state->current_function->function_name());
+ }
+ inst = new(ctx) ir_return;
+ }
+
+ state->found_return = true;
+ instructions->push_tail(inst);
+ break;
+ }
+
+ case ast_discard:
+ if (state->stage != MESA_SHADER_FRAGMENT) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(& loc, state,
+ "`discard' may only appear in a fragment shader");
+ }
+ instructions->push_tail(new(ctx) ir_discard);
+ break;
+
+ case ast_break:
+ case ast_continue:
+ if (mode == ast_continue &&
+ state->loop_nesting_ast == NULL) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(& loc, state, "continue may only appear in a loop");
+ } else if (mode == ast_break &&
+ state->loop_nesting_ast == NULL &&
+ state->switch_state.switch_nesting_ast == NULL) {
+ YYLTYPE loc = this->get_location();
+
+ _mesa_glsl_error(& loc, state,
+ "break may only appear in a loop or a switch");
+ } else {
+ /* For a loop, inline the for loop expression again, since we don't
+ * know where near the end of the loop body the normal copy of it is
+ * going to be placed. Same goes for the condition for a do-while
+ * loop.
+ */
+ if (state->loop_nesting_ast != NULL &&
+ mode == ast_continue && !state->switch_state.is_switch_innermost) {
+ if (state->loop_nesting_ast->rest_expression) {
+ state->loop_nesting_ast->rest_expression->hir(instructions,
+ state);
+ }
+ if (state->loop_nesting_ast->mode ==
+ ast_iteration_statement::ast_do_while) {
+ state->loop_nesting_ast->condition_to_hir(instructions, state);
+ }
+ }
+
+ if (state->switch_state.is_switch_innermost &&
+ mode == ast_continue) {
+ /* Set 'continue_inside' to true. */
+ ir_rvalue *const true_val = new (ctx) ir_constant(true);
+ ir_dereference_variable *deref_continue_inside_var =
+ new(ctx) ir_dereference_variable(state->switch_state.continue_inside);
+ instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var,
+ true_val));
+
+ /* Break out from the switch, continue for the loop will
+ * be called right after switch. */
+ ir_loop_jump *const jump =
+ new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+ instructions->push_tail(jump);
+
+ } else if (state->switch_state.is_switch_innermost &&
+ mode == ast_break) {
+ /* Force break out of switch by inserting a break. */
+ ir_loop_jump *const jump =
+ new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+ instructions->push_tail(jump);
+ } else {
+ ir_loop_jump *const jump =
+ new(ctx) ir_loop_jump((mode == ast_break)
+ ? ir_loop_jump::jump_break
+ : ir_loop_jump::jump_continue);
+ instructions->push_tail(jump);
+ }
+ }
+
+ break;
+ }
+
+ /* Jump instructions do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_selection_statement::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+
+ ir_rvalue *const condition = this->condition->hir(instructions, state);
+
+ /* From page 66 (page 72 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "Any expression whose type evaluates to a Boolean can be used as the
+ * conditional expression bool-expression. Vector types are not accepted
+ * as the expression to if."
+ *
+ * The checks are separated so that higher quality diagnostics can be
+ * generated for cases where both rules are violated.
+ */
+ if (!condition->type->is_boolean() || !condition->type->is_scalar()) {
+ YYLTYPE loc = this->condition->get_location();
+
+ _mesa_glsl_error(& loc, state, "if-statement condition must be scalar "
+ "boolean");
+ }
+
+ ir_if *const stmt = new(ctx) ir_if(condition);
+
+ if (then_statement != NULL) {
+ state->symbols->push_scope();
+ then_statement->hir(& stmt->then_instructions, state);
+ state->symbols->pop_scope();
+ }
+
+ if (else_statement != NULL) {
+ state->symbols->push_scope();
+ else_statement->hir(& stmt->else_instructions, state);
+ state->symbols->pop_scope();
+ }
+
+ instructions->push_tail(stmt);
+
+ /* if-statements do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_switch_statement::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+
+ ir_rvalue *const test_expression =
+ this->test_expression->hir(instructions, state);
+
+ /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "The type of init-expression in a switch statement must be a
+ * scalar integer."
+ */
+ if (!test_expression->type->is_scalar() ||
+ !test_expression->type->is_integer()) {
+ YYLTYPE loc = this->test_expression->get_location();
+
+ _mesa_glsl_error(& loc,
+ state,
+ "switch-statement expression must be scalar "
+ "integer");
+ }
+
+ /* Track the switch-statement nesting in a stack-like manner.
+ */
+ struct glsl_switch_state saved = state->switch_state;
+
+ state->switch_state.is_switch_innermost = true;
+ state->switch_state.switch_nesting_ast = this;
+ state->switch_state.labels_ht = hash_table_ctor(0, hash_table_pointer_hash,
+ hash_table_pointer_compare);
+ state->switch_state.previous_default = NULL;
+
+ /* Initalize is_fallthru state to false.
+ */
+ ir_rvalue *const is_fallthru_val = new (ctx) ir_constant(false);
+ state->switch_state.is_fallthru_var =
+ new(ctx) ir_variable(glsl_type::bool_type,
+ "switch_is_fallthru_tmp",
+ ir_var_temporary);
+ instructions->push_tail(state->switch_state.is_fallthru_var);
+
+ ir_dereference_variable *deref_is_fallthru_var =
+ new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var);
+ instructions->push_tail(new(ctx) ir_assignment(deref_is_fallthru_var,
+ is_fallthru_val));
+
+ /* Initialize continue_inside state to false.
+ */
+ state->switch_state.continue_inside =
+ new(ctx) ir_variable(glsl_type::bool_type,
+ "continue_inside_tmp",
+ ir_var_temporary);
+ instructions->push_tail(state->switch_state.continue_inside);
+
+ ir_rvalue *const false_val = new (ctx) ir_constant(false);
+ ir_dereference_variable *deref_continue_inside_var =
+ new(ctx) ir_dereference_variable(state->switch_state.continue_inside);
+ instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var,
+ false_val));
+
+ state->switch_state.run_default =
+ new(ctx) ir_variable(glsl_type::bool_type,
+ "run_default_tmp",
+ ir_var_temporary);
+ instructions->push_tail(state->switch_state.run_default);
+
+ /* Loop around the switch is used for flow control. */
+ ir_loop * loop = new(ctx) ir_loop();
+ instructions->push_tail(loop);
+
+ /* Cache test expression.
+ */
+ test_to_hir(&loop->body_instructions, state);
+
+ /* Emit code for body of switch stmt.
+ */
+ body->hir(&loop->body_instructions, state);
+
+ /* Insert a break at the end to exit loop. */
+ ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+ loop->body_instructions.push_tail(jump);
+
+ /* If we are inside loop, check if continue got called inside switch. */
+ if (state->loop_nesting_ast != NULL) {
+ ir_dereference_variable *deref_continue_inside =
+ new(ctx) ir_dereference_variable(state->switch_state.continue_inside);
+ ir_if *irif = new(ctx) ir_if(deref_continue_inside);
+ ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_continue);
+
+ if (state->loop_nesting_ast != NULL) {
+ if (state->loop_nesting_ast->rest_expression) {
+ state->loop_nesting_ast->rest_expression->hir(&irif->then_instructions,
+ state);
+ }
+ if (state->loop_nesting_ast->mode ==
+ ast_iteration_statement::ast_do_while) {
+ state->loop_nesting_ast->condition_to_hir(&irif->then_instructions, state);
+ }
+ }
+ irif->then_instructions.push_tail(jump);
+ instructions->push_tail(irif);
+ }
+
+ hash_table_dtor(state->switch_state.labels_ht);
+
+ state->switch_state = saved;
+
+ /* Switch statements do not have r-values. */
+ return NULL;
+ }
+
+
+ void
+ ast_switch_statement::test_to_hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+
+ /* Cache value of test expression. */
+ ir_rvalue *const test_val =
+ test_expression->hir(instructions,
+ state);
+
+ state->switch_state.test_var = new(ctx) ir_variable(test_val->type,
+ "switch_test_tmp",
+ ir_var_temporary);
+ ir_dereference_variable *deref_test_var =
+ new(ctx) ir_dereference_variable(state->switch_state.test_var);
+
+ instructions->push_tail(state->switch_state.test_var);
+ instructions->push_tail(new(ctx) ir_assignment(deref_test_var, test_val));
+ }
+
+
+ ir_rvalue *
+ ast_switch_body::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ if (stmts != NULL)
+ stmts->hir(instructions, state);
+
+ /* Switch bodies do not have r-values. */
+ return NULL;
+ }
+
+ ir_rvalue *
+ ast_case_statement_list::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ exec_list default_case, after_default, tmp;
+
+ foreach_list_typed (ast_case_statement, case_stmt, link, & this->cases) {
+ case_stmt->hir(&tmp, state);
+
+ /* Default case. */
+ if (state->switch_state.previous_default && default_case.is_empty()) {
+ default_case.append_list(&tmp);
+ continue;
+ }
+
+ /* If default case found, append 'after_default' list. */
+ if (!default_case.is_empty())
+ after_default.append_list(&tmp);
+ else
+ instructions->append_list(&tmp);
+ }
+
+ /* Handle the default case. This is done here because default might not be
+ * the last case. We need to add checks against following cases first to see
+ * if default should be chosen or not.
+ */
+ if (!default_case.is_empty()) {
+
+ ir_rvalue *const true_val = new (state) ir_constant(true);
+ ir_dereference_variable *deref_run_default_var =
+ new(state) ir_dereference_variable(state->switch_state.run_default);
+
+ /* Choose to run default case initially, following conditional
+ * assignments might change this.
+ */
+ ir_assignment *const init_var =
+ new(state) ir_assignment(deref_run_default_var, true_val);
+ instructions->push_tail(init_var);
+
+ /* Default case was the last one, no checks required. */
+ if (after_default.is_empty()) {
+ instructions->append_list(&default_case);
+ return NULL;
+ }
+
+ foreach_in_list(ir_instruction, ir, &after_default) {
+ ir_assignment *assign = ir->as_assignment();
+
+ if (!assign)
+ continue;
+
+ /* Clone the check between case label and init expression. */
+ ir_expression *exp = (ir_expression*) assign->condition;
+ ir_expression *clone = exp->clone(state, NULL);
+
+ ir_dereference_variable *deref_var =
+ new(state) ir_dereference_variable(state->switch_state.run_default);
+ ir_rvalue *const false_val = new (state) ir_constant(false);
+
+ ir_assignment *const set_false =
+ new(state) ir_assignment(deref_var, false_val, clone);
+
+ instructions->push_tail(set_false);
+ }
+
+ /* Append default case and all cases after it. */
+ instructions->append_list(&default_case);
+ instructions->append_list(&after_default);
+ }
+
+ /* Case statements do not have r-values. */
+ return NULL;
+ }
+
+ ir_rvalue *
+ ast_case_statement::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ labels->hir(instructions, state);
+
+ /* Guard case statements depending on fallthru state. */
+ ir_dereference_variable *const deref_fallthru_guard =
+ new(state) ir_dereference_variable(state->switch_state.is_fallthru_var);
+ ir_if *const test_fallthru = new(state) ir_if(deref_fallthru_guard);
+
+ foreach_list_typed (ast_node, stmt, link, & this->stmts)
+ stmt->hir(& test_fallthru->then_instructions, state);
+
+ instructions->push_tail(test_fallthru);
+
+ /* Case statements do not have r-values. */
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_case_label_list::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ foreach_list_typed (ast_case_label, label, link, & this->labels)
+ label->hir(instructions, state);
+
+ /* Case labels do not have r-values. */
+ return NULL;
+ }
+
+ ir_rvalue *
+ ast_case_label::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+
+ ir_dereference_variable *deref_fallthru_var =
+ new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var);
+
+ ir_rvalue *const true_val = new(ctx) ir_constant(true);
+
+ /* If not default case, ... */
+ if (this->test_value != NULL) {
+ /* Conditionally set fallthru state based on
+ * comparison of cached test expression value to case label.
+ */
+ ir_rvalue *const label_rval = this->test_value->hir(instructions, state);
+ ir_constant *label_const = label_rval->constant_expression_value();
+
+ if (!label_const) {
+ YYLTYPE loc = this->test_value->get_location();
+
+ _mesa_glsl_error(& loc, state,
+ "switch statement case label must be a "
+ "constant expression");
+
+ /* Stuff a dummy value in to allow processing to continue. */
+ label_const = new(ctx) ir_constant(0);
+ } else {
+ ast_expression *previous_label = (ast_expression *)
+ hash_table_find(state->switch_state.labels_ht,
+ (void *)(uintptr_t)label_const->value.u[0]);
+
+ if (previous_label) {
+ YYLTYPE loc = this->test_value->get_location();
+ _mesa_glsl_error(& loc, state, "duplicate case value");
+
+ loc = previous_label->get_location();
+ _mesa_glsl_error(& loc, state, "this is the previous case label");
+ } else {
+ hash_table_insert(state->switch_state.labels_ht,
+ this->test_value,
+ (void *)(uintptr_t)label_const->value.u[0]);
+ }
+ }
+
+ ir_dereference_variable *deref_test_var =
+ new(ctx) ir_dereference_variable(state->switch_state.test_var);
+
+ ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
+ label_const,
+ deref_test_var);
+
+ /*
+ * From GLSL 4.40 specification section 6.2 ("Selection"):
+ *
+ * "The type of the init-expression value in a switch statement must
+ * be a scalar int or uint. The type of the constant-expression value
+ * in a case label also must be a scalar int or uint. When any pair
+ * of these values is tested for "equal value" and the types do not
+ * match, an implicit conversion will be done to convert the int to a
+ * uint (see section 4.1.10 “Implicit Conversions”) before the compare
+ * is done."
+ */
+ if (label_const->type != state->switch_state.test_var->type) {
+ YYLTYPE loc = this->test_value->get_location();
+
+ const glsl_type *type_a = label_const->type;
+ const glsl_type *type_b = state->switch_state.test_var->type;
+
+ /* Check if int->uint implicit conversion is supported. */
+ bool integer_conversion_supported =
+ glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type,
+ state);
+
+ if ((!type_a->is_integer() || !type_b->is_integer()) ||
+ !integer_conversion_supported) {
+ _mesa_glsl_error(&loc, state, "type mismatch with switch "
+ "init-expression and case label (%s != %s)",
+ type_a->name, type_b->name);
+ } else {
+ /* Conversion of the case label. */
+ if (type_a->base_type == GLSL_TYPE_INT) {
+ if (!apply_implicit_conversion(glsl_type::uint_type,
+ test_cond->operands[0], state))
+ _mesa_glsl_error(&loc, state, "implicit type conversion error");
+ } else {
+ /* Conversion of the init-expression value. */
+ if (!apply_implicit_conversion(glsl_type::uint_type,
+ test_cond->operands[1], state))
+ _mesa_glsl_error(&loc, state, "implicit type conversion error");
+ }
+ }
+ }
+
+ ir_assignment *set_fallthru_on_test =
+ new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
+
+ instructions->push_tail(set_fallthru_on_test);
+ } else { /* default case */
+ if (state->switch_state.previous_default) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(& loc, state,
+ "multiple default labels in one switch");
+
+ loc = state->switch_state.previous_default->get_location();
+ _mesa_glsl_error(& loc, state, "this is the first default label");
+ }
+ state->switch_state.previous_default = this;
+
+ /* Set fallthru condition on 'run_default' bool. */
+ ir_dereference_variable *deref_run_default =
+ new(ctx) ir_dereference_variable(state->switch_state.run_default);
+ ir_rvalue *const cond_true = new(ctx) ir_constant(true);
+ ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
+ cond_true,
+ deref_run_default);
+
+ /* Set falltrhu state. */
+ ir_assignment *set_fallthru =
+ new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
+
+ instructions->push_tail(set_fallthru);
+ }
+
+ /* Case statements do not have r-values. */
+ return NULL;
+ }
+
+ void
+ ast_iteration_statement::condition_to_hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+
+ if (condition != NULL) {
+ ir_rvalue *const cond =
+ condition->hir(instructions, state);
+
+ if ((cond == NULL)
+ || !cond->type->is_boolean() || !cond->type->is_scalar()) {
+ YYLTYPE loc = condition->get_location();
+
+ _mesa_glsl_error(& loc, state,
+ "loop condition must be scalar boolean");
+ } else {
+ /* As the first code in the loop body, generate a block that looks
+ * like 'if (!condition) break;' as the loop termination condition.
+ */
+ ir_rvalue *const not_cond =
+ new(ctx) ir_expression(ir_unop_logic_not, cond);
+
+ ir_if *const if_stmt = new(ctx) ir_if(not_cond);
+
+ ir_jump *const break_stmt =
+ new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+
+ if_stmt->then_instructions.push_tail(break_stmt);
+ instructions->push_tail(if_stmt);
+ }
+ }
+ }
+
+
+ ir_rvalue *
+ ast_iteration_statement::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ void *ctx = state;
+
+ /* For-loops and while-loops start a new scope, but do-while loops do not.
+ */
+ if (mode != ast_do_while)
+ state->symbols->push_scope();
+
+ if (init_statement != NULL)
+ init_statement->hir(instructions, state);
+
+ ir_loop *const stmt = new(ctx) ir_loop();
+ instructions->push_tail(stmt);
+
+ /* Track the current loop nesting. */
+ ast_iteration_statement *nesting_ast = state->loop_nesting_ast;
+
+ state->loop_nesting_ast = this;
+
+ /* Likewise, indicate that following code is closest to a loop,
+ * NOT closest to a switch.
+ */
+ bool saved_is_switch_innermost = state->switch_state.is_switch_innermost;
+ state->switch_state.is_switch_innermost = false;
+
+ if (mode != ast_do_while)
+ condition_to_hir(&stmt->body_instructions, state);
+
+ if (body != NULL)
+ body->hir(& stmt->body_instructions, state);
+
+ if (rest_expression != NULL)
+ rest_expression->hir(& stmt->body_instructions, state);
+
+ if (mode == ast_do_while)
+ condition_to_hir(&stmt->body_instructions, state);
+
+ if (mode != ast_do_while)
+ state->symbols->pop_scope();
+
+ /* Restore previous nesting before returning. */
+ state->loop_nesting_ast = nesting_ast;
+ state->switch_state.is_switch_innermost = saved_is_switch_innermost;
+
+ /* Loops do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ /**
+ * Determine if the given type is valid for establishing a default precision
+ * qualifier.
+ *
+ * From GLSL ES 3.00 section 4.5.4 ("Default Precision Qualifiers"):
+ *
+ * "The precision statement
+ *
+ * precision precision-qualifier type;
+ *
+ * can be used to establish a default precision qualifier. The type field
+ * can be either int or float or any of the sampler types, and the
+ * precision-qualifier can be lowp, mediump, or highp."
+ *
+ * GLSL ES 1.00 has similar language. GLSL 1.30 doesn't allow precision
+ * qualifiers on sampler types, but this seems like an oversight (since the
+ * intention of including these in GLSL 1.30 is to allow compatibility with ES
+ * shaders). So we allow int, float, and all sampler types regardless of GLSL
+ * version.
+ */
+ static bool
+ is_valid_default_precision_type(const struct glsl_type *const type)
+ {
+ if (type == NULL)
+ return false;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ /* "int" and "float" are valid, but vectors and matrices are not. */
+ return type->vector_elements == 1 && type->matrix_columns == 1;
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_ATOMIC_UINT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+
+ ir_rvalue *
+ ast_type_specifier::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ if (this->default_precision == ast_precision_none && this->structure == NULL)
+ return NULL;
+
+ YYLTYPE loc = this->get_location();
+
+ /* If this is a precision statement, check that the type to which it is
+ * applied is either float or int.
+ *
+ * From section 4.5.3 of the GLSL 1.30 spec:
+ * "The precision statement
+ * precision precision-qualifier type;
+ * can be used to establish a default precision qualifier. The type
+ * field can be either int or float [...]. Any other types or
+ * qualifiers will result in an error.
+ */
+ if (this->default_precision != ast_precision_none) {
+ if (!state->check_precision_qualifiers_allowed(&loc))
+ return NULL;
+
+ if (this->structure != NULL) {
+ _mesa_glsl_error(&loc, state,
+ "precision qualifiers do not apply to structures");
+ return NULL;
+ }
+
+ if (this->array_specifier != NULL) {
+ _mesa_glsl_error(&loc, state,
+ "default precision statements do not apply to "
+ "arrays");
+ return NULL;
+ }
+
+ const struct glsl_type *const type =
+ state->symbols->get_type(this->type_name);
+ if (!is_valid_default_precision_type(type)) {
+ _mesa_glsl_error(&loc, state,
+ "default precision statements apply only to "
+ "float, int, and opaque types");
+ return NULL;
+ }
+
+ if (state->es_shader) {
+ /* Section 4.5.3 (Default Precision Qualifiers) of the GLSL ES 1.00
+ * spec says:
+ *
+ * "Non-precision qualified declarations will use the precision
+ * qualifier specified in the most recent precision statement
+ * that is still in scope. The precision statement has the same
+ * scoping rules as variable declarations. If it is declared
+ * inside a compound statement, its effect stops at the end of
+ * the innermost statement it was declared in. Precision
+ * statements in nested scopes override precision statements in
+ * outer scopes. Multiple precision statements for the same basic
+ * type can appear inside the same scope, with later statements
+ * overriding earlier statements within that scope."
+ *
+ * Default precision specifications follow the same scope rules as
+ * variables. So, we can track the state of the default precision
+ * qualifiers in the symbol table, and the rules will just work. This
+ * is a slight abuse of the symbol table, but it has the semantics
+ * that we want.
+ */
+ state->symbols->add_default_precision_qualifier(this->type_name,
+ this->default_precision);
+ }
+
+ /* FINISHME: Translate precision statements into IR. */
+ return NULL;
+ }
+
+ /* _mesa_ast_set_aggregate_type() sets the <structure> field so that
+ * process_record_constructor() can do type-checking on C-style initializer
+ * expressions of structs, but ast_struct_specifier should only be translated
+ * to HIR if it is declaring the type of a structure.
+ *
+ * The ->is_declaration field is false for initializers of variables
+ * declared separately from the struct's type definition.
+ *
+ * struct S { ... }; (is_declaration = true)
+ * struct T { ... } t = { ... }; (is_declaration = true)
+ * S s = { ... }; (is_declaration = false)
+ */
+ if (this->structure != NULL && this->structure->is_declaration)
+ return this->structure->hir(instructions, state);
+
+ return NULL;
+ }
+
+
+ /**
+ * Process a structure or interface block tree into an array of structure fields
+ *
+ * After parsing, where there are some syntax differnces, structures and
+ * interface blocks are almost identical. They are similar enough that the
+ * AST for each can be processed the same way into a set of
+ * \c glsl_struct_field to describe the members.
+ *
+ * If we're processing an interface block, var_mode should be the type of the
+ * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or
+ * ir_var_shader_storage). If we're processing a structure, var_mode should be
+ * ir_var_auto.
+ *
+ * \return
+ * The number of fields processed. A pointer to the array structure fields is
+ * stored in \c *fields_ret.
+ */
+ static unsigned
+ ast_process_struct_or_iface_block_members(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state,
+ exec_list *declarations,
+ glsl_struct_field **fields_ret,
+ bool is_interface,
+ enum glsl_matrix_layout matrix_layout,
+ bool allow_reserved_names,
+ ir_variable_mode var_mode,
+ ast_type_qualifier *layout,
+ unsigned block_stream,
+ unsigned expl_location)
+ {
+ unsigned decl_count = 0;
+
+ /* Make an initial pass over the list of fields to determine how
+ * many there are. Each element in this list is an ast_declarator_list.
+ * This means that we actually need to count the number of elements in the
+ * 'declarations' list in each of the elements.
+ */
+ foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
+ decl_count += decl_list->declarations.length();
+ }
+
+ /* Allocate storage for the fields and process the field
+ * declarations. As the declarations are processed, try to also convert
+ * the types to HIR. This ensures that structure definitions embedded in
+ * other structure definitions or in interface blocks are processed.
+ */
+ glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field,
+ decl_count);
+
+ bool first_member = true;
+ bool first_member_has_explicit_location;
+
+ unsigned i = 0;
+ foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
+ const char *type_name;
+ YYLTYPE loc = decl_list->get_location();
+
+ decl_list->type->specifier->hir(instructions, state);
+
+ /* Section 10.9 of the GLSL ES 1.00 specification states that
+ * embedded structure definitions have been removed from the language.
+ */
+ if (state->es_shader && decl_list->type->specifier->structure != NULL) {
+ _mesa_glsl_error(&loc, state, "embedded structure definitions are "
+ "not allowed in GLSL ES 1.00");
+ }
+
+ const glsl_type *decl_type =
+ decl_list->type->glsl_type(& type_name, state);
+
+ const struct ast_type_qualifier *const qual =
+ &decl_list->type->qualifier;
+
+ /* From section 4.3.9 of the GLSL 4.40 spec:
+ *
+ * "[In interface blocks] opaque types are not allowed."
+ *
+ * It should be impossible for decl_type to be NULL here. Cases that
+ * might naturally lead to decl_type being NULL, especially for the
+ * is_interface case, will have resulted in compilation having
+ * already halted due to a syntax error.
+ */
+ assert(decl_type);
+
+ if (is_interface && decl_type->contains_opaque()) {
+ _mesa_glsl_error(&loc, state,
+ "uniform/buffer in non-default interface block contains "
+ "opaque variable");
+ }
+
+ if (decl_type->contains_atomic()) {
+ /* From section 4.1.7.3 of the GLSL 4.40 spec:
+ *
+ * "Members of structures cannot be declared as atomic counter
+ * types."
+ */
+ _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+ "shader storage block or uniform block");
+ }
+
+ if (decl_type->contains_image()) {
+ /* FINISHME: Same problem as with atomic counters.
+ * FINISHME: Request clarification from Khronos and add
+ * FINISHME: spec quotation here.
+ */
+ _mesa_glsl_error(&loc, state,
+ "image in structure, shader storage block or "
+ "uniform block");
+ }
+
+ if (qual->flags.q.explicit_binding) {
+ _mesa_glsl_error(&loc, state,
+ "binding layout qualifier cannot be applied "
+ "to struct or interface block members");
+ }
+
+ if (is_interface) {
+ if (!first_member) {
+ if (!layout->flags.q.explicit_location &&
+ ((first_member_has_explicit_location &&
+ !qual->flags.q.explicit_location) ||
+ (!first_member_has_explicit_location &&
+ qual->flags.q.explicit_location))) {
+ _mesa_glsl_error(&loc, state,
+ "when block-level location layout qualifier "
+ "is not supplied either all members must "
+ "have a location layout qualifier or all "
+ "members must not have a location layout "
+ "qualifier");
+ }
+ } else {
+ first_member = false;
+ first_member_has_explicit_location =
+ qual->flags.q.explicit_location;
+ }
+ }
+
+ if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
+ qual->flags.q.packed ||
+ qual->flags.q.shared) {
+ _mesa_glsl_error(&loc, state,
+ "uniform/shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform/shader storage blocks, not "
+ "members");
+ }
+
+ if (qual->flags.q.constant) {
+ _mesa_glsl_error(&loc, state,
+ "const storage qualifier cannot be applied "
+ "to struct or interface block members");
+ }
+
+ /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
+ *
+ * "A block member may be declared with a stream identifier, but
+ * the specified stream must match the stream associated with the
+ * containing block."
+ */
+ if (qual->flags.q.explicit_stream) {
+ unsigned qual_stream;
+ if (process_qualifier_constant(state, &loc, "stream",
+ qual->stream, &qual_stream) &&
+ qual_stream != block_stream) {
+ _mesa_glsl_error(&loc, state, "stream layout qualifier on "
+ "interface block member does not match "
+ "the interface block (%u vs %u)", qual_stream,
+ block_stream);
+ }
+ }
+
+ if (qual->flags.q.uniform && qual->has_interpolation()) {
+ _mesa_glsl_error(&loc, state,
+ "interpolation qualifiers cannot be used "
+ "with uniform interface blocks");
+ }
+
+ if ((qual->flags.q.uniform || !is_interface) &&
+ qual->has_auxiliary_storage()) {
+ _mesa_glsl_error(&loc, state,
+ "auxiliary storage qualifiers cannot be used "
+ "in uniform blocks or structures.");
+ }
+
+ if (qual->flags.q.row_major || qual->flags.q.column_major) {
+ if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
+ _mesa_glsl_error(&loc, state,
+ "row_major and column_major can only be "
+ "applied to interface blocks");
+ } else
+ validate_matrix_layout_for_type(state, &loc, decl_type, NULL);
+ }
+
+ if (qual->flags.q.read_only && qual->flags.q.write_only) {
+ _mesa_glsl_error(&loc, state, "buffer variable can't be both "
+ "readonly and writeonly.");
+ }
+
+ foreach_list_typed (ast_declaration, decl, link,
+ &decl_list->declarations) {
+ YYLTYPE loc = decl->get_location();
+
+ if (!allow_reserved_names)
+ validate_identifier(decl->identifier, loc, state);
+
+ const struct glsl_type *field_type =
+ process_array_type(&loc, decl_type, decl->array_specifier, state);
+ validate_array_dimensions(field_type, state, &loc);
+ fields[i].type = field_type;
+ fields[i].name = decl->identifier;
+ fields[i].interpolation =
+ interpret_interpolation_qualifier(qual, var_mode, state, &loc);
+ fields[i].centroid = qual->flags.q.centroid ? 1 : 0;
+ fields[i].sample = qual->flags.q.sample ? 1 : 0;
+ fields[i].patch = qual->flags.q.patch ? 1 : 0;
+ fields[i].precision = qual->precision;
+
+ if (qual->flags.q.explicit_location) {
+ unsigned qual_location;
+ if (process_qualifier_constant(state, &loc, "location",
+ qual->location, &qual_location)) {
+ fields[i].location = VARYING_SLOT_VAR0 + qual_location;
+ expl_location = fields[i].location +
+ fields[i].type->count_attribute_slots(false);
+ }
+ } else {
+ if (layout && layout->flags.q.explicit_location) {
+ fields[i].location = expl_location;
+ expl_location += fields[i].type->count_attribute_slots(false);
+ } else {
+ fields[i].location = -1;
+ }
+ }
+
+ /* Propogate row- / column-major information down the fields of the
+ * structure or interface block. Structures need this data because
+ * the structure may contain a structure that contains ... a matrix
+ * that need the proper layout.
+ */
+ if (field_type->without_array()->is_matrix()
+ || field_type->without_array()->is_record()) {
+ /* If no layout is specified for the field, inherit the layout
+ * from the block.
+ */
+ fields[i].matrix_layout = matrix_layout;
+
+ if (qual->flags.q.row_major)
+ fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+ else if (qual->flags.q.column_major)
+ fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR;
+
+ /* If we're processing an interface block, the matrix layout must
+ * be decided by this point.
+ */
+ assert(!is_interface
+ || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR
+ || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
+ }
+
+ /* Image qualifiers are allowed on buffer variables, which can only
+ * be defined inside shader storage buffer objects
+ */
+ if (layout && var_mode == ir_var_shader_storage) {
+ /* For readonly and writeonly qualifiers the field definition,
+ * if set, overwrites the layout qualifier.
+ */
+ if (qual->flags.q.read_only) {
+ fields[i].image_read_only = true;
+ fields[i].image_write_only = false;
+ } else if (qual->flags.q.write_only) {
+ fields[i].image_read_only = false;
+ fields[i].image_write_only = true;
+ } else {
+ fields[i].image_read_only = layout->flags.q.read_only;
+ fields[i].image_write_only = layout->flags.q.write_only;
+ }
+
+ /* For other qualifiers, we set the flag if either the layout
+ * qualifier or the field qualifier are set
+ */
+ fields[i].image_coherent = qual->flags.q.coherent ||
+ layout->flags.q.coherent;
+ fields[i].image_volatile = qual->flags.q._volatile ||
+ layout->flags.q._volatile;
+ fields[i].image_restrict = qual->flags.q.restrict_flag ||
+ layout->flags.q.restrict_flag;
+ }
+
+ i++;
+ }
+ }
+
+ assert(i == decl_count);
+
+ *fields_ret = fields;
+ return decl_count;
+ }
+
+
+ ir_rvalue *
+ ast_struct_specifier::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ YYLTYPE loc = this->get_location();
+
+ /* Section 4.1.8 (Structures) of the GLSL 1.10 spec says:
+ *
+ * "Anonymous structures are not supported; so embedded structures must
+ * have a declarator. A name given to an embedded struct is scoped at
+ * the same level as the struct it is embedded in."
+ *
+ * The same section of the GLSL 1.20 spec says:
+ *
+ * "Anonymous structures are not supported. Embedded structures are not
+ * supported.
+ *
+ * struct S { float f; };
+ * struct T {
+ * S; // Error: anonymous structures disallowed
+ * struct { ... }; // Error: embedded structures disallowed
+ * S s; // Okay: nested structures with name are allowed
+ * };"
+ *
+ * The GLSL ES 1.00 and 3.00 specs have similar langauge and examples. So,
+ * we allow embedded structures in 1.10 only.
+ */
+ if (state->language_version != 110 && state->struct_specifier_depth != 0)
+ _mesa_glsl_error(&loc, state,
+ "embedded structure declarations are not allowed");
+
+ state->struct_specifier_depth++;
+
+ unsigned expl_location = 0;
+ if (layout && layout->flags.q.explicit_location) {
+ if (!process_qualifier_constant(state, &loc, "location",
+ layout->location, &expl_location)) {
+ return NULL;
+ } else {
+ expl_location = VARYING_SLOT_VAR0 + expl_location;
+ }
+ }
+
+ glsl_struct_field *fields;
+ unsigned decl_count =
+ ast_process_struct_or_iface_block_members(instructions,
+ state,
+ &this->declarations,
+ &fields,
+ false,
+ GLSL_MATRIX_LAYOUT_INHERITED,
+ false /* allow_reserved_names */,
+ ir_var_auto,
+ layout,
+ 0, /* for interface only */
+ expl_location);
+
+ validate_identifier(this->name, loc, state);
+
+ const glsl_type *t =
+ glsl_type::get_record_instance(fields, decl_count, this->name);
+
+ if (!state->symbols->add_type(name, t)) {
+ _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);
+ } else {
+ const glsl_type **s = reralloc(state, state->user_structures,
+ const glsl_type *,
+ state->num_user_structures + 1);
+ if (s != NULL) {
+ s[state->num_user_structures] = t;
+ state->user_structures = s;
+ state->num_user_structures++;
+ }
+ }
+
+ state->struct_specifier_depth--;
+
+ /* Structure type definitions do not have r-values.
+ */
+ return NULL;
+ }
+
+
+ /**
+ * Visitor class which detects whether a given interface block has been used.
+ */
+ class interface_block_usage_visitor : public ir_hierarchical_visitor
+ {
+ public:
+ interface_block_usage_visitor(ir_variable_mode mode, const glsl_type *block)
+ : mode(mode), block(block), found(false)
+ {
+ }
+
+ virtual ir_visitor_status visit(ir_dereference_variable *ir)
+ {
+ if (ir->var->data.mode == mode && ir->var->get_interface_type() == block) {
+ found = true;
+ return visit_stop;
+ }
+ return visit_continue;
+ }
+
+ bool usage_found() const
+ {
+ return this->found;
+ }
+
+ private:
+ ir_variable_mode mode;
+ const glsl_type *block;
+ bool found;
+ };
+
+ static bool
+ is_unsized_array_last_element(ir_variable *v)
+ {
+ const glsl_type *interface_type = v->get_interface_type();
+ int length = interface_type->length;
+
+ assert(v->type->is_unsized_array());
+
+ /* Check if it is the last element of the interface */
+ if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+ return true;
+ return false;
+ }
+
+ ir_rvalue *
+ ast_interface_block::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ YYLTYPE loc = this->get_location();
+
+ /* Interface blocks must be declared at global scope */
+ if (state->current_function != NULL) {
+ _mesa_glsl_error(&loc, state,
+ "Interface block `%s' must be declared "
+ "at global scope",
+ this->block_name);
+ }
+
+ if (!this->layout.flags.q.buffer &&
+ this->layout.flags.q.std430) {
+ _mesa_glsl_error(&loc, state,
+ "std430 storage block layout qualifier is supported "
+ "only for shader storage blocks");
+ }
+
+ /* The ast_interface_block has a list of ast_declarator_lists. We
+ * need to turn those into ir_variables with an association
+ * with this uniform block.
+ */
+ enum glsl_interface_packing packing;
+ if (this->layout.flags.q.shared) {
+ packing = GLSL_INTERFACE_PACKING_SHARED;
+ } else if (this->layout.flags.q.packed) {
+ packing = GLSL_INTERFACE_PACKING_PACKED;
+ } else if (this->layout.flags.q.std430) {
+ packing = GLSL_INTERFACE_PACKING_STD430;
+ } else {
+ /* The default layout is std140.
+ */
+ packing = GLSL_INTERFACE_PACKING_STD140;
+ }
+
+ ir_variable_mode var_mode;
+ const char *iface_type_name;
+ if (this->layout.flags.q.in) {
+ var_mode = ir_var_shader_in;
+ iface_type_name = "in";
+ } else if (this->layout.flags.q.out) {
+ var_mode = ir_var_shader_out;
+ iface_type_name = "out";
+ } else if (this->layout.flags.q.uniform) {
+ var_mode = ir_var_uniform;
+ iface_type_name = "uniform";
+ } else if (this->layout.flags.q.buffer) {
+ var_mode = ir_var_shader_storage;
+ iface_type_name = "buffer";
+ } else {
+ var_mode = ir_var_auto;
+ iface_type_name = "UNKNOWN";
+ assert(!"interface block layout qualifier not found!");
+ }
+
+ enum glsl_matrix_layout matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED;
+ if (this->layout.flags.q.row_major)
+ matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+ else if (this->layout.flags.q.column_major)
+ matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR;
+
+ bool redeclaring_per_vertex = strcmp(this->block_name, "gl_PerVertex") == 0;
+ exec_list declared_variables;
+ glsl_struct_field *fields;
+
+ /* Treat an interface block as one level of nesting, so that embedded struct
+ * specifiers will be disallowed.
+ */
+ state->struct_specifier_depth++;
+
+ /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+ * that we don't have incompatible qualifiers
+ */
+ if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) {
+ _mesa_glsl_error(&loc, state,
+ "Interface block sets both readonly and writeonly");
+ }
+
+ unsigned qual_stream;
+ if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream,
+ &qual_stream) ||
+ !validate_stream_qualifier(&loc, state, qual_stream)) {
+ /* If the stream qualifier is invalid it doesn't make sense to continue
+ * on and try to compare stream layouts on member variables against it
+ * so just return early.
+ */
+ return NULL;
+ }
+
+ unsigned expl_location = 0;
+ if (layout.flags.q.explicit_location) {
+ if (!process_qualifier_constant(state, &loc, "location",
+ layout.location, &expl_location)) {
+ return NULL;
+ } else {
+ expl_location = VARYING_SLOT_VAR0 + expl_location;
+ }
+ }
+
+ unsigned int num_variables =
+ ast_process_struct_or_iface_block_members(&declared_variables,
+ state,
+ &this->declarations,
+ &fields,
+ true,
+ matrix_layout,
+ redeclaring_per_vertex,
+ var_mode,
+ &this->layout,
+ qual_stream,
+ expl_location);
+
+ state->struct_specifier_depth--;
+
+ if (!redeclaring_per_vertex) {
+ validate_identifier(this->block_name, loc, state);
+
+ /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec:
+ *
+ * "Block names have no other use within a shader beyond interface
+ * matching; it is a compile-time error to use a block name at global
+ * scope for anything other than as a block name."
+ */
+ ir_variable *var = state->symbols->get_variable(this->block_name);
+ if (var && !var->type->is_interface()) {
+ _mesa_glsl_error(&loc, state, "Block name `%s' is "
+ "already used in the scope.",
+ this->block_name);
+ }
+ }
+
+ const glsl_type *earlier_per_vertex = NULL;
+ if (redeclaring_per_vertex) {
+ /* Find the previous declaration of gl_PerVertex. If we're redeclaring
+ * the named interface block gl_in, we can find it by looking at the
+ * previous declaration of gl_in. Otherwise we can find it by looking
+ * at the previous decalartion of any of the built-in outputs,
+ * e.g. gl_Position.
+ *
+ * Also check that the instance name and array-ness of the redeclaration
+ * are correct.
+ */
+ switch (var_mode) {
+ case ir_var_shader_in:
+ if (ir_variable *earlier_gl_in =
+ state->symbols->get_variable("gl_in")) {
+ earlier_per_vertex = earlier_gl_in->get_interface_type();
+ } else {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of gl_PerVertex input not allowed "
+ "in the %s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ }
+ if (this->instance_name == NULL ||
+ strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL ||
+ !this->array_specifier->is_single_dimension()) {
+ _mesa_glsl_error(&loc, state,
+ "gl_PerVertex input must be redeclared as "
+ "gl_in[]");
+ }
+ break;
+ case ir_var_shader_out:
+ if (ir_variable *earlier_gl_Position =
+ state->symbols->get_variable("gl_Position")) {
+ earlier_per_vertex = earlier_gl_Position->get_interface_type();
+ } else if (ir_variable *earlier_gl_out =
+ state->symbols->get_variable("gl_out")) {
+ earlier_per_vertex = earlier_gl_out->get_interface_type();
+ } else {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of gl_PerVertex output not "
+ "allowed in the %s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ }
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ if (this->instance_name == NULL ||
+ strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) {
+ _mesa_glsl_error(&loc, state,
+ "gl_PerVertex output must be redeclared as "
+ "gl_out[]");
+ }
+ } else {
+ if (this->instance_name != NULL) {
+ _mesa_glsl_error(&loc, state,
+ "gl_PerVertex output may not be redeclared with "
+ "an instance name");
+ }
+ }
+ break;
+ default:
+ _mesa_glsl_error(&loc, state,
+ "gl_PerVertex must be declared as an input or an "
+ "output");
+ break;
+ }
+
+ if (earlier_per_vertex == NULL) {
+ /* An error has already been reported. Bail out to avoid null
+ * dereferences later in this function.
+ */
+ return NULL;
+ }
+
+ /* Copy locations from the old gl_PerVertex interface block. */
+ for (unsigned i = 0; i < num_variables; i++) {
+ int j = earlier_per_vertex->field_index(fields[i].name);
+ if (j == -1) {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of gl_PerVertex must be a subset "
+ "of the built-in members of gl_PerVertex");
+ } else {
+ fields[i].location =
+ earlier_per_vertex->fields.structure[j].location;
+ fields[i].interpolation =
+ earlier_per_vertex->fields.structure[j].interpolation;
+ fields[i].centroid =
+ earlier_per_vertex->fields.structure[j].centroid;
+ fields[i].sample =
+ earlier_per_vertex->fields.structure[j].sample;
+ fields[i].patch =
+ earlier_per_vertex->fields.structure[j].patch;
+ fields[i].precision =
+ earlier_per_vertex->fields.structure[j].precision;
+ }
+ }
+
+ /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10
+ * spec:
+ *
+ * If a built-in interface block is redeclared, it must appear in
+ * the shader before any use of any member included in the built-in
+ * declaration, or a compilation error will result.
+ *
+ * This appears to be a clarification to the behaviour established for
+ * gl_PerVertex by GLSL 1.50, therefore we implement this behaviour
+ * regardless of GLSL version.
+ */
+ interface_block_usage_visitor v(var_mode, earlier_per_vertex);
+ v.run(instructions);
+ if (v.usage_found()) {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of a built-in interface block must "
+ "appear before any use of any member of the "
+ "interface block");
+ }
+ }
+
+ const glsl_type *block_type =
+ glsl_type::get_interface_instance(fields,
+ num_variables,
+ packing,
+ this->block_name);
+
+ if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' "
+ "already taken in the current scope",
+ this->block_name, iface_type_name);
+ }
+
+ /* Since interface blocks cannot contain statements, it should be
+ * impossible for the block to generate any instructions.
+ */
+ assert(declared_variables.is_empty());
+
+ /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec:
+ *
+ * Geometry shader input variables get the per-vertex values written
+ * out by vertex shader output variables of the same names. Since a
+ * geometry shader operates on a set of vertices, each input varying
+ * variable (or input block, see interface blocks below) needs to be
+ * declared as an array.
+ */
+ if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL &&
+ var_mode == ir_var_shader_in) {
+ _mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays");
+ } else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL) &&
+ this->array_specifier == NULL &&
+ var_mode == ir_var_shader_in) {
+ _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays");
+ } else if (state->stage == MESA_SHADER_TESS_CTRL &&
+ this->array_specifier == NULL &&
+ var_mode == ir_var_shader_out) {
+ _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays");
+ }
+
+
+ /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec
+ * says:
+ *
+ * "If an instance name (instance-name) is used, then it puts all the
+ * members inside a scope within its own name space, accessed with the
+ * field selector ( . ) operator (analogously to structures)."
+ */
+ if (this->instance_name) {
+ if (redeclaring_per_vertex) {
+ /* When a built-in in an unnamed interface block is redeclared,
+ * get_variable_being_redeclared() calls
+ * check_builtin_array_max_size() to make sure that built-in array
+ * variables aren't redeclared to illegal sizes. But we're looking
+ * at a redeclaration of a named built-in interface block. So we
+ * have to manually call check_builtin_array_max_size() for all parts
+ * of the interface that are arrays.
+ */
+ for (unsigned i = 0; i < num_variables; i++) {
+ if (fields[i].type->is_array()) {
+ const unsigned size = fields[i].type->array_size();
+ check_builtin_array_max_size(fields[i].name, size, loc, state);
+ }
+ }
+ } else {
+ validate_identifier(this->instance_name, loc, state);
+ }
+
+ ir_variable *var;
+
+ if (this->array_specifier != NULL) {
+ const glsl_type *block_array_type =
+ process_array_type(&loc, block_type, this->array_specifier, state);
+
+ /* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says:
+ *
+ * For uniform blocks declared an array, each individual array
+ * element corresponds to a separate buffer object backing one
+ * instance of the block. As the array size indicates the number
+ * of buffer objects needed, uniform block array declarations
+ * must specify an array size.
+ *
+ * And a few paragraphs later:
+ *
+ * Geometry shader input blocks must be declared as arrays and
+ * follow the array declaration and linking rules for all
+ * geometry shader inputs. All other input and output block
+ * arrays must specify an array size.
+ *
+ * The same applies to tessellation shaders.
+ *
+ * The upshot of this is that the only circumstance where an
+ * interface array size *doesn't* need to be specified is on a
+ * geometry shader input, tessellation control shader input,
+ * tessellation control shader output, and tessellation evaluation
+ * shader input.
+ */
+ if (block_array_type->is_unsized_array()) {
+ bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY ||
+ state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL;
+ bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL;
+
+ if (this->layout.flags.q.in) {
+ if (!allow_inputs)
+ _mesa_glsl_error(&loc, state,
+ "unsized input block arrays not allowed in "
+ "%s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ } else if (this->layout.flags.q.out) {
+ if (!allow_outputs)
+ _mesa_glsl_error(&loc, state,
+ "unsized output block arrays not allowed in "
+ "%s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ } else {
+ /* by elimination, this is a uniform block array */
+ _mesa_glsl_error(&loc, state,
+ "unsized uniform block arrays not allowed in "
+ "%s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ }
+ }
+
+ /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
+ *
+ * * Arrays of arrays of blocks are not allowed
+ */
+ if (state->es_shader && block_array_type->is_array() &&
+ block_array_type->fields.array->is_array()) {
+ _mesa_glsl_error(&loc, state,
+ "arrays of arrays interface blocks are "
+ "not allowed");
+ }
+
+ var = new(state) ir_variable(block_array_type,
+ this->instance_name,
+ var_mode);
+ } else {
+ var = new(state) ir_variable(block_type,
+ this->instance_name,
+ var_mode);
+ }
+
+ var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
+ ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
+
+ if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+ var->data.read_only = true;
+
+ if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in)
+ handle_geometry_shader_input_decl(state, loc, var);
+ else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in)
+ handle_tess_shader_input_decl(state, loc, var);
+ else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
+ handle_tess_ctrl_shader_output_decl(state, loc, var);
+
+ for (unsigned i = 0; i < num_variables; i++) {
+ if (fields[i].type->is_unsized_array()) {
+ if (var_mode == ir_var_shader_storage) {
+ if (i != (num_variables - 1)) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ fields[i].name);
+ }
+ } else {
+ /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+ *
+ * "If an array is declared as the last member of a shader storage
+ * block and the size is not specified at compile-time, it is
+ * sized at run-time. In all other cases, arrays are sized only
+ * at compile-time."
+ */
+ if (state->es_shader) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ fields[i].name);
+ }
+ }
+ }
+ }
+
+ if (ir_variable *earlier =
+ state->symbols->get_variable(this->instance_name)) {
+ if (!redeclaring_per_vertex) {
+ _mesa_glsl_error(&loc, state, "`%s' redeclared",
+ this->instance_name);
+ }
+ earlier->data.how_declared = ir_var_declared_normally;
+ earlier->type = var->type;
+ earlier->reinit_interface_type(block_type);
+ delete var;
+ } else {
+ if (this->layout.flags.q.explicit_binding) {
+ apply_explicit_binding(state, &loc, var, var->type,
+ &this->layout);
+ }
+
+ var->data.stream = qual_stream;
+ if (layout.flags.q.explicit_location) {
+ var->data.location = expl_location;
+ var->data.explicit_location = true;
+ }
+
+ state->symbols->add_variable(var);
+ instructions->push_tail(var);
+ }
+ } else {
+ /* In order to have an array size, the block must also be declared with
+ * an instance name.
+ */
+ assert(this->array_specifier == NULL);
+
+ for (unsigned i = 0; i < num_variables; i++) {
+ ir_variable *var =
+ new(state) ir_variable(fields[i].type,
+ ralloc_strdup(state, fields[i].name),
+ var_mode);
+ var->data.interpolation = fields[i].interpolation;
+ var->data.centroid = fields[i].centroid;
+ var->data.sample = fields[i].sample;
+ var->data.patch = fields[i].patch;
+ var->data.stream = qual_stream;
+ var->data.location = fields[i].location;
+ if (fields[i].location != -1)
+ var->data.explicit_location = true;
+ var->init_interface_type(block_type);
+
+ if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+ var->data.read_only = true;
+
+ /* Precision qualifiers do not have any meaning in Desktop GLSL */
+ if (state->es_shader) {
+ var->data.precision =
+ select_gles_precision(fields[i].precision, fields[i].type,
+ state, &loc);
+ }
+
+ if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) {
+ var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
+ ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
+ } else {
+ var->data.matrix_layout = fields[i].matrix_layout;
+ }
+
+ if (var->data.mode == ir_var_shader_storage) {
+ var->data.image_read_only = fields[i].image_read_only;
+ var->data.image_write_only = fields[i].image_write_only;
+ var->data.image_coherent = fields[i].image_coherent;
+ var->data.image_volatile = fields[i].image_volatile;
+ var->data.image_restrict = fields[i].image_restrict;
+ }
+
+ /* Examine var name here since var may get deleted in the next call */
+ bool var_is_gl_id = is_gl_identifier(var->name);
+
+ if (redeclaring_per_vertex) {
+ ir_variable *earlier =
+ get_variable_being_redeclared(var, loc, state,
+ true /* allow_all_redeclarations */);
+ if (!var_is_gl_id || earlier == NULL) {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of gl_PerVertex can only "
+ "include built-in variables");
+ } else if (earlier->data.how_declared == ir_var_declared_normally) {
+ _mesa_glsl_error(&loc, state,
+ "`%s' has already been redeclared",
+ earlier->name);
+ } else {
+ earlier->data.how_declared = ir_var_declared_in_block;
+ earlier->reinit_interface_type(block_type);
+ }
+ continue;
+ }
+
+ if (state->symbols->get_variable(var->name) != NULL)
+ _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
+
+ /* Propagate the "binding" keyword into this UBO/SSBO's fields.
+ * The UBO declaration itself doesn't get an ir_variable unless it
+ * has an instance name. This is ugly.
+ */
+ if (this->layout.flags.q.explicit_binding) {
+ apply_explicit_binding(state, &loc, var,
+ var->get_interface_type(), &this->layout);
+ }
+
+ if (var->type->is_unsized_array()) {
+ if (var->is_in_shader_storage_block()) {
+ if (!is_unsized_array_last_element(var)) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ var->name);
+ }
+ var->data.from_ssbo_unsized_array = true;
+ } else {
+ /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+ *
+ * "If an array is declared as the last member of a shader storage
+ * block and the size is not specified at compile-time, it is
+ * sized at run-time. In all other cases, arrays are sized only
+ * at compile-time."
+ */
+ if (state->es_shader) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ var->name);
+ }
+ }
+ }
+
+ state->symbols->add_variable(var);
+ instructions->push_tail(var);
+ }
+
+ if (redeclaring_per_vertex && block_type != earlier_per_vertex) {
+ /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 spec:
+ *
+ * It is also a compilation error ... to redeclare a built-in
+ * block and then use a member from that built-in block that was
+ * not included in the redeclaration.
+ *
+ * This appears to be a clarification to the behaviour established
+ * for gl_PerVertex by GLSL 1.50, therefore we implement this
+ * behaviour regardless of GLSL version.
+ *
+ * To prevent the shader from using a member that was not included in
+ * the redeclaration, we disable any ir_variables that are still
+ * associated with the old declaration of gl_PerVertex (since we've
+ * already updated all of the variables contained in the new
+ * gl_PerVertex to point to it).
+ *
+ * As a side effect this will prevent
+ * validate_intrastage_interface_blocks() from getting confused and
+ * thinking there are conflicting definitions of gl_PerVertex in the
+ * shader.
+ */
+ foreach_in_list_safe(ir_instruction, node, instructions) {
+ ir_variable *const var = node->as_variable();
+ if (var != NULL &&
+ var->get_interface_type() == earlier_per_vertex &&
+ var->data.mode == var_mode) {
+ if (var->data.how_declared == ir_var_declared_normally) {
+ _mesa_glsl_error(&loc, state,
+ "redeclaration of gl_PerVertex cannot "
+ "follow a redeclaration of `%s'",
+ var->name);
+ }
+ state->symbols->disable_variable(var->name);
+ var->remove();
+ }
+ }
+ }
+ }
+
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_tcs_output_layout::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ YYLTYPE loc = this->get_location();
+
+ unsigned num_vertices;
+ if (!state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices", &num_vertices,
+ false)) {
+ /* return here to stop cascading incorrect error messages */
+ return NULL;
+ }
+
+ /* If any shader outputs occurred before this declaration and specified an
+ * array size, make sure the size they specified is consistent with the
+ * primitive type.
+ */
+ if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
+ _mesa_glsl_error(&loc, state,
+ "this tessellation control shader output layout "
+ "specifies %u vertices, but a previous output "
+ "is declared with size %u",
+ num_vertices, state->tcs_output_size);
+ return NULL;
+ }
+
+ state->tcs_output_vertices_specified = true;
+
+ /* If any shader outputs occurred before this declaration and did not
+ * specify an array size, their size is determined now.
+ */
+ foreach_in_list (ir_instruction, node, instructions) {
+ ir_variable *var = node->as_variable();
+ if (var == NULL || var->data.mode != ir_var_shader_out)
+ continue;
+
+ /* Note: Not all tessellation control shader output are arrays. */
+ if (!var->type->is_unsized_array() || var->data.patch)
+ continue;
+
+ if (var->data.max_array_access >= num_vertices) {
+ _mesa_glsl_error(&loc, state,
+ "this tessellation control shader output layout "
+ "specifies %u vertices, but an access to element "
+ "%u of output `%s' already exists", num_vertices,
+ var->data.max_array_access, var->name);
+ } else {
+ var->type = glsl_type::get_array_instance(var->type->fields.array,
+ num_vertices);
+ }
+ }
+
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_gs_input_layout::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ YYLTYPE loc = this->get_location();
+
+ /* If any geometry input layout declaration preceded this one, make sure it
+ * was consistent with this one.
+ */
+ if (state->gs_input_prim_type_specified &&
+ state->in_qualifier->prim_type != this->prim_type) {
+ _mesa_glsl_error(&loc, state,
+ "geometry shader input layout does not match"
+ " previous declaration");
+ return NULL;
+ }
+
+ /* If any shader inputs occurred before this declaration and specified an
+ * array size, make sure the size they specified is consistent with the
+ * primitive type.
+ */
+ unsigned num_vertices = vertices_per_prim(this->prim_type);
+ if (state->gs_input_size != 0 && state->gs_input_size != num_vertices) {
+ _mesa_glsl_error(&loc, state,
+ "this geometry shader input layout implies %u vertices"
+ " per primitive, but a previous input is declared"
+ " with size %u", num_vertices, state->gs_input_size);
+ return NULL;
+ }
+
+ state->gs_input_prim_type_specified = true;
+
+ /* If any shader inputs occurred before this declaration and did not
+ * specify an array size, their size is determined now.
+ */
+ foreach_in_list(ir_instruction, node, instructions) {
+ ir_variable *var = node->as_variable();
+ if (var == NULL || var->data.mode != ir_var_shader_in)
+ continue;
+
+ /* Note: gl_PrimitiveIDIn has mode ir_var_shader_in, but it's not an
+ * array; skip it.
+ */
+
+ if (var->type->is_unsized_array()) {
+ if (var->data.max_array_access >= num_vertices) {
+ _mesa_glsl_error(&loc, state,
+ "this geometry shader input layout implies %u"
+ " vertices, but an access to element %u of input"
+ " `%s' already exists", num_vertices,
+ var->data.max_array_access, var->name);
+ } else {
+ var->type = glsl_type::get_array_instance(var->type->fields.array,
+ num_vertices);
+ }
+ }
+ }
+
+ return NULL;
+ }
+
+
+ ir_rvalue *
+ ast_cs_input_layout::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+ {
+ YYLTYPE loc = this->get_location();
+
+ /* From the ARB_compute_shader specification:
+ *
+ * If the local size of the shader in any dimension is greater
+ * than the maximum size supported by the implementation for that
+ * dimension, a compile-time error results.
+ *
+ * It is not clear from the spec how the error should be reported if
+ * the total size of the work group exceeds
+ * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to
+ * report it at compile time as well.
+ */
+ GLuint64 total_invocations = 1;
+ unsigned qual_local_size[3];
+ for (int i = 0; i < 3; i++) {
+
+ char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c",
+ 'x' + i);
+ /* Infer a local_size of 1 for unspecified dimensions */
+ if (this->local_size[i] == NULL) {
+ qual_local_size[i] = 1;
+ } else if (!this->local_size[i]->
+ process_qualifier_constant(state, local_size_str,
+ &qual_local_size[i], false)) {
+ ralloc_free(local_size_str);
+ return NULL;
+ }
+ ralloc_free(local_size_str);
+
+ if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
+ _mesa_glsl_error(&loc, state,
+ "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE"
+ " (%d)", 'x' + i,
+ state->ctx->Const.MaxComputeWorkGroupSize[i]);
+ break;
+ }
+ total_invocations *= qual_local_size[i];
+ if (total_invocations >
+ state->ctx->Const.MaxComputeWorkGroupInvocations) {
+ _mesa_glsl_error(&loc, state,
+ "product of local_sizes exceeds "
+ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)",
+ state->ctx->Const.MaxComputeWorkGroupInvocations);
+ break;
+ }
+ }
+
+ /* If any compute input layout declaration preceded this one, make sure it
+ * was consistent with this one.
+ */
+ if (state->cs_input_local_size_specified) {
+ for (int i = 0; i < 3; i++) {
+ if (state->cs_input_local_size[i] != qual_local_size[i]) {
+ _mesa_glsl_error(&loc, state,
+ "compute shader input layout does not match"
+ " previous declaration");
+ return NULL;
+ }
+ }
+ }
+
+ state->cs_input_local_size_specified = true;
+ for (int i = 0; i < 3; i++)
+ state->cs_input_local_size[i] = qual_local_size[i];
+
+ /* We may now declare the built-in constant gl_WorkGroupSize (see
+ * builtin_variable_generator::generate_constants() for why we didn't
+ * declare it earlier).
+ */
+ ir_variable *var = new(state->symbols)
+ ir_variable(glsl_type::uvec3_type, "gl_WorkGroupSize", ir_var_auto);
+ var->data.how_declared = ir_var_declared_implicitly;
+ var->data.read_only = true;
+ instructions->push_tail(var);
+ state->symbols->add_variable(var);
+ ir_constant_data data;
+ memset(&data, 0, sizeof(data));
+ for (int i = 0; i < 3; i++)
+ data.u[i] = qual_local_size[i];
+ var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data);
+ var->constant_initializer =
+ new(var) ir_constant(glsl_type::uvec3_type, &data);
+ var->data.has_initializer = true;
+
+ return NULL;
+ }
+
+
+ static void
+ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
+ exec_list *instructions)
+ {
+ bool gl_FragColor_assigned = false;
+ bool gl_FragData_assigned = false;
+ bool gl_FragSecondaryColor_assigned = false;
+ bool gl_FragSecondaryData_assigned = false;
+ bool user_defined_fs_output_assigned = false;
+ ir_variable *user_defined_fs_output = NULL;
+
+ /* It would be nice to have proper location information. */
+ YYLTYPE loc;
+ memset(&loc, 0, sizeof(loc));
+
+ foreach_in_list(ir_instruction, node, instructions) {
+ ir_variable *var = node->as_variable();
+
+ if (!var || !var->data.assigned)
+ continue;
+
+ if (strcmp(var->name, "gl_FragColor") == 0)
+ gl_FragColor_assigned = true;
+ else if (strcmp(var->name, "gl_FragData") == 0)
+ gl_FragData_assigned = true;
+ else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0)
+ gl_FragSecondaryColor_assigned = true;
+ else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0)
+ gl_FragSecondaryData_assigned = true;
+ else if (!is_gl_identifier(var->name)) {
+ if (state->stage == MESA_SHADER_FRAGMENT &&
+ var->data.mode == ir_var_shader_out) {
+ user_defined_fs_output_assigned = true;
+ user_defined_fs_output = var;
+ }
+ }
+ }
+
+ /* From the GLSL 1.30 spec:
+ *
+ * "If a shader statically assigns a value to gl_FragColor, it
+ * may not assign a value to any element of gl_FragData. If a
+ * shader statically writes a value to any element of
+ * gl_FragData, it may not assign a value to
+ * gl_FragColor. That is, a shader may assign values to either
+ * gl_FragColor or gl_FragData, but not both. Multiple shaders
+ * linked together must also consistently write just one of
+ * these variables. Similarly, if user declared output
+ * variables are in use (statically assigned to), then the
+ * built-in variables gl_FragColor and gl_FragData may not be
+ * assigned to. These incorrect usages all generate compile
+ * time errors."
+ */
+ if (gl_FragColor_assigned && gl_FragData_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragColor' and `gl_FragData'");
+ } else if (gl_FragColor_assigned && user_defined_fs_output_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragColor' and `%s'",
+ user_defined_fs_output->name);
+ } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragSecondaryColorEXT' and"
+ " `gl_FragSecondaryDataEXT'");
+ } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragColor' and"
+ " `gl_FragSecondaryDataEXT'");
+ } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragData' and"
+ " `gl_FragSecondaryColorEXT'");
+ } else if (gl_FragData_assigned && user_defined_fs_output_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragData' and `%s'",
+ user_defined_fs_output->name);
+ }
+
+ if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) &&
+ !state->EXT_blend_func_extended_enable) {
+ _mesa_glsl_error(&loc, state,
+ "Dual source blending requires EXT_blend_func_extended");
+ }
+ }
+
+
+ static void
+ remove_per_vertex_blocks(exec_list *instructions,
+ _mesa_glsl_parse_state *state, ir_variable_mode mode)
+ {
+ /* Find the gl_PerVertex interface block of the appropriate (in/out) mode,
+ * if it exists in this shader type.
+ */
+ const glsl_type *per_vertex = NULL;
+ switch (mode) {
+ case ir_var_shader_in:
+ if (ir_variable *gl_in = state->symbols->get_variable("gl_in"))
+ per_vertex = gl_in->get_interface_type();
+ break;
+ case ir_var_shader_out:
+ if (ir_variable *gl_Position =
+ state->symbols->get_variable("gl_Position")) {
+ per_vertex = gl_Position->get_interface_type();
+ }
+ break;
+ default:
+ assert(!"Unexpected mode");
+ break;
+ }
+
+ /* If we didn't find a built-in gl_PerVertex interface block, then we don't
+ * need to do anything.
+ */
+ if (per_vertex == NULL)
+ return;
+
+ /* If the interface block is used by the shader, then we don't need to do
+ * anything.
+ */
+ interface_block_usage_visitor v(mode, per_vertex);
+ v.run(instructions);
+ if (v.usage_found())
+ return;
+
+ /* Remove any ir_variable declarations that refer to the interface block
+ * we're removing.
+ */
+ foreach_in_list_safe(ir_instruction, node, instructions) {
+ ir_variable *const var = node->as_variable();
+ if (var != NULL && var->get_interface_type() == per_vertex &&
+ var->data.mode == mode) {
+ state->symbols->disable_variable(var->name);
+ var->remove();
+ }
+ }
+ }
--- /dev/null
+ /*
+ * Copyright © 2008, 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+ #include <stdio.h>
+ #include <stdarg.h>
+ #include <string.h>
+ #include <assert.h>
+
+ #include "main/core.h" /* for struct gl_context */
+ #include "main/context.h"
+ #include "main/shaderobj.h"
+ #include "util/u_atomic.h" /* for p_atomic_cmpxchg */
+ #include "util/ralloc.h"
+ #include "ast.h"
+ #include "glsl_parser_extras.h"
+ #include "glsl_parser.h"
+ #include "ir_optimization.h"
+ #include "loop_analysis.h"
+
+ /**
+ * Format a short human-readable description of the given GLSL version.
+ */
+ const char *
+ glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version)
+ {
+ return ralloc_asprintf(mem_ctx, "GLSL%s %d.%02d", is_es ? " ES" : "",
+ version / 100, version % 100);
+ }
+
+
+ static const unsigned known_desktop_glsl_versions[] =
+ { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 };
+
+
+ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
+ gl_shader_stage stage,
+ void *mem_ctx)
+ : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(),
+ switch_state()
+ {
+ assert(stage < MESA_SHADER_STAGES);
+ this->stage = stage;
+
+ this->scanner = NULL;
+ this->translation_unit.make_empty();
+ this->symbols = new(mem_ctx) glsl_symbol_table;
+
+ this->info_log = ralloc_strdup(mem_ctx, "");
+ this->error = false;
+ this->loop_nesting_ast = NULL;
+
+ this->struct_specifier_depth = 0;
+
+ this->uses_builtin_functions = false;
+
+ /* Set default language version and extensions */
+ this->language_version = 110;
+ this->forced_language_version = ctx->Const.ForceGLSLVersion;
+ this->es_shader = false;
+ this->ARB_texture_rectangle_enable = true;
+
+ /* OpenGL ES 2.0 has different defaults from desktop GL. */
+ if (ctx->API == API_OPENGLES2) {
+ this->language_version = 100;
+ this->es_shader = true;
+ this->ARB_texture_rectangle_enable = false;
+ }
+
+ this->extensions = &ctx->Extensions;
+
++ this->ARB_compute_shader_enable = true;
++
+ this->Const.MaxLights = ctx->Const.MaxLights;
+ this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes;
+ this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits;
+ this->Const.MaxTextureCoords = ctx->Const.MaxTextureCoordUnits;
+ this->Const.MaxVertexAttribs = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs;
+ this->Const.MaxVertexUniformComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents;
+ this->Const.MaxVertexTextureImageUnits = ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits;
+ this->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits;
+ this->Const.MaxTextureImageUnits = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
+ this->Const.MaxFragmentUniformComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents;
+ this->Const.MinProgramTexelOffset = ctx->Const.MinProgramTexelOffset;
+ this->Const.MaxProgramTexelOffset = ctx->Const.MaxProgramTexelOffset;
+
+ this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers;
+
+ this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers;
+
+ /* 1.50 constants */
+ this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
+ this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents;
+ this->Const.MaxGeometryOutputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents;
+ this->Const.MaxFragmentInputComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents;
+ this->Const.MaxGeometryTextureImageUnits = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits;
+ this->Const.MaxGeometryOutputVertices = ctx->Const.MaxGeometryOutputVertices;
+ this->Const.MaxGeometryTotalOutputComponents = ctx->Const.MaxGeometryTotalOutputComponents;
+ this->Const.MaxGeometryUniformComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents;
+
+ this->Const.MaxVertexAtomicCounters = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters;
+ this->Const.MaxTessControlAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters;
+ this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters;
+ this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters;
+ this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters;
+ this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters;
+ this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings;
+ this->Const.MaxVertexAtomicCounterBuffers =
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers;
+ this->Const.MaxTessControlAtomicCounterBuffers =
+ ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers;
+ this->Const.MaxTessEvaluationAtomicCounterBuffers =
+ ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers;
+ this->Const.MaxGeometryAtomicCounterBuffers =
+ ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers;
+ this->Const.MaxFragmentAtomicCounterBuffers =
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ this->Const.MaxCombinedAtomicCounterBuffers =
+ ctx->Const.MaxCombinedAtomicBuffers;
+ this->Const.MaxAtomicCounterBufferSize =
+ ctx->Const.MaxAtomicBufferSize;
+
+ /* Compute shader constants */
+ for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++)
+ this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i];
+ for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++)
+ this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i];
+
+ this->Const.MaxImageUnits = ctx->Const.MaxImageUnits;
+ this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources;
+ this->Const.MaxImageSamples = ctx->Const.MaxImageSamples;
+ this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms;
+ this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms;
+ this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms;
+ this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms;
+ this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
+ this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms;
+
+ /* ARB_viewport_array */
+ this->Const.MaxViewports = ctx->Const.MaxViewports;
+
+ /* tessellation shader constants */
+ this->Const.MaxPatchVertices = ctx->Const.MaxPatchVertices;
+ this->Const.MaxTessGenLevel = ctx->Const.MaxTessGenLevel;
+ this->Const.MaxTessControlInputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents;
+ this->Const.MaxTessControlOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents;
+ this->Const.MaxTessControlTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits;
+ this->Const.MaxTessEvaluationInputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents;
+ this->Const.MaxTessEvaluationOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents;
+ this->Const.MaxTessEvaluationTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits;
+ this->Const.MaxTessPatchComponents = ctx->Const.MaxTessPatchComponents;
+ this->Const.MaxTessControlTotalOutputComponents = ctx->Const.MaxTessControlTotalOutputComponents;
+ this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents;
+ this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents;
+
+ this->current_function = NULL;
+ this->toplevel_ir = NULL;
+ this->found_return = false;
+ this->all_invariant = false;
+ this->user_structures = NULL;
+ this->num_user_structures = 0;
+ this->num_subroutines = 0;
+ this->subroutines = NULL;
+ this->num_subroutine_types = 0;
+ this->subroutine_types = NULL;
+
+ /* supported_versions should be large enough to support the known desktop
+ * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10))
+ */
+ STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) ==
+ ARRAY_SIZE(this->supported_versions));
+
+ /* Populate the list of supported GLSL versions */
+ /* FINISHME: Once the OpenGL 3.0 'forward compatible' context or
+ * the OpenGL 3.2 Core context is supported, this logic will need
+ * change. Older versions of GLSL are no longer supported
+ * outside the compatibility contexts of 3.x.
+ */
+ this->num_supported_versions = 0;
+ if (_mesa_is_desktop_gl(ctx)) {
+ for (unsigned i = 0; i < ARRAY_SIZE(known_desktop_glsl_versions); i++) {
+ if (known_desktop_glsl_versions[i] <= ctx->Const.GLSLVersion) {
+ this->supported_versions[this->num_supported_versions].ver
+ = known_desktop_glsl_versions[i];
+ this->supported_versions[this->num_supported_versions].es = false;
+ this->num_supported_versions++;
+ }
+ }
+ }
+ if (ctx->API == API_OPENGLES2 || ctx->Extensions.ARB_ES2_compatibility) {
+ this->supported_versions[this->num_supported_versions].ver = 100;
+ this->supported_versions[this->num_supported_versions].es = true;
+ this->num_supported_versions++;
+ }
+ if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
+ this->supported_versions[this->num_supported_versions].ver = 300;
+ this->supported_versions[this->num_supported_versions].es = true;
+ this->num_supported_versions++;
+ }
+ if (_mesa_is_gles31(ctx)) {
+ this->supported_versions[this->num_supported_versions].ver = 310;
+ this->supported_versions[this->num_supported_versions].es = true;
+ this->num_supported_versions++;
+ }
+
+ /* Create a string for use in error messages to tell the user which GLSL
+ * versions are supported.
+ */
+ char *supported = ralloc_strdup(this, "");
+ for (unsigned i = 0; i < this->num_supported_versions; i++) {
+ unsigned ver = this->supported_versions[i].ver;
+ const char *const prefix = (i == 0)
+ ? ""
+ : ((i == this->num_supported_versions - 1) ? ", and " : ", ");
+ const char *const suffix = (this->supported_versions[i].es) ? " ES" : "";
+
+ ralloc_asprintf_append(& supported, "%s%u.%02u%s",
+ prefix,
+ ver / 100, ver % 100,
+ suffix);
+ }
+
+ this->supported_version_string = supported;
+
+ if (ctx->Const.ForceGLSLExtensionsWarn)
+ _mesa_glsl_process_extension("all", NULL, "warn", NULL, this);
+
+ this->default_uniform_qualifier = new(this) ast_type_qualifier();
+ this->default_uniform_qualifier->flags.q.shared = 1;
+ this->default_uniform_qualifier->flags.q.column_major = 1;
+ this->default_uniform_qualifier->is_default_qualifier = true;
+
+ this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+ this->default_shader_storage_qualifier->flags.q.shared = 1;
+ this->default_shader_storage_qualifier->flags.q.column_major = 1;
+ this->default_shader_storage_qualifier->is_default_qualifier = true;
+
+ this->fs_uses_gl_fragcoord = false;
+ this->fs_redeclares_gl_fragcoord = false;
+ this->fs_origin_upper_left = false;
+ this->fs_pixel_center_integer = false;
+ this->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = false;
+
+ this->gs_input_prim_type_specified = false;
+ this->tcs_output_vertices_specified = false;
+ this->gs_input_size = 0;
+ this->in_qualifier = new(this) ast_type_qualifier();
+ this->out_qualifier = new(this) ast_type_qualifier();
+ this->fs_early_fragment_tests = false;
+ memset(this->atomic_counter_offsets, 0,
+ sizeof(this->atomic_counter_offsets));
+ this->allow_extension_directive_midshader =
+ ctx->Const.AllowGLSLExtensionDirectiveMidShader;
+ }
+
+ /**
+ * Determine whether the current GLSL version is sufficiently high to support
+ * a certain feature, and generate an error message if it isn't.
+ *
+ * \param required_glsl_version and \c required_glsl_es_version are
+ * interpreted as they are in _mesa_glsl_parse_state::is_version().
+ *
+ * \param locp is the parser location where the error should be reported.
+ *
+ * \param fmt (and additional arguments) constitute a printf-style error
+ * message to report if the version check fails. Information about the
+ * current and required GLSL versions will be appended. So, for example, if
+ * the GLSL version being compiled is 1.20, and check_version(130, 300, locp,
+ * "foo unsupported") is called, the error message will be "foo unsupported in
+ * GLSL 1.20 (GLSL 1.30 or GLSL 3.00 ES required)".
+ */
+ bool
+ _mesa_glsl_parse_state::check_version(unsigned required_glsl_version,
+ unsigned required_glsl_es_version,
+ YYLTYPE *locp, const char *fmt, ...)
+ {
+ if (this->is_version(required_glsl_version, required_glsl_es_version))
+ return true;
+
+ va_list args;
+ va_start(args, fmt);
+ char *problem = ralloc_vasprintf(this, fmt, args);
+ va_end(args);
+ const char *glsl_version_string
+ = glsl_compute_version_string(this, false, required_glsl_version);
+ const char *glsl_es_version_string
+ = glsl_compute_version_string(this, true, required_glsl_es_version);
+ const char *requirement_string = "";
+ if (required_glsl_version && required_glsl_es_version) {
+ requirement_string = ralloc_asprintf(this, " (%s or %s required)",
+ glsl_version_string,
+ glsl_es_version_string);
+ } else if (required_glsl_version) {
+ requirement_string = ralloc_asprintf(this, " (%s required)",
+ glsl_version_string);
+ } else if (required_glsl_es_version) {
+ requirement_string = ralloc_asprintf(this, " (%s required)",
+ glsl_es_version_string);
+ }
+ _mesa_glsl_error(locp, this, "%s in %s%s",
+ problem, this->get_version_string(),
+ requirement_string);
+
+ return false;
+ }
+
+ /**
+ * Process a GLSL #version directive.
+ *
+ * \param version is the integer that follows the #version token.
+ *
+ * \param ident is a string identifier that follows the integer, if any is
+ * present. Otherwise NULL.
+ */
+ void
+ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version,
+ const char *ident)
+ {
+ bool es_token_present = false;
+ if (ident) {
+ if (strcmp(ident, "es") == 0) {
+ es_token_present = true;
+ } else if (version >= 150) {
+ if (strcmp(ident, "core") == 0) {
+ /* Accept the token. There's no need to record that this is
+ * a core profile shader since that's the only profile we support.
+ */
+ } else if (strcmp(ident, "compatibility") == 0) {
+ _mesa_glsl_error(locp, this,
+ "the compatibility profile is not supported");
+ } else {
+ _mesa_glsl_error(locp, this,
+ "\"%s\" is not a valid shading language profile; "
+ "if present, it must be \"core\"", ident);
+ }
+ } else {
+ _mesa_glsl_error(locp, this,
+ "illegal text following version number");
+ }
+ }
+
+ this->es_shader = es_token_present;
+ if (version == 100) {
+ if (es_token_present) {
+ _mesa_glsl_error(locp, this,
+ "GLSL 1.00 ES should be selected using "
+ "`#version 100'");
+ } else {
+ this->es_shader = true;
+ }
+ }
+
+ if (this->es_shader) {
+ this->ARB_texture_rectangle_enable = false;
+ }
+
+ if (this->forced_language_version)
+ this->language_version = this->forced_language_version;
+ else
+ this->language_version = version;
+
+ bool supported = false;
+ for (unsigned i = 0; i < this->num_supported_versions; i++) {
+ if (this->supported_versions[i].ver == this->language_version
+ && this->supported_versions[i].es == this->es_shader) {
+ supported = true;
+ break;
+ }
+ }
+
+ if (!supported) {
+ _mesa_glsl_error(locp, this, "%s is not supported. "
+ "Supported versions are: %s",
+ this->get_version_string(),
+ this->supported_version_string);
+
+ /* On exit, the language_version must be set to a valid value.
+ * Later calls to _mesa_glsl_initialize_types will misbehave if
+ * the version is invalid.
+ */
+ switch (this->ctx->API) {
+ case API_OPENGL_COMPAT:
+ case API_OPENGL_CORE:
+ this->language_version = this->ctx->Const.GLSLVersion;
+ break;
+
+ case API_OPENGLES:
+ assert(!"Should not get here.");
+ /* FALLTHROUGH */
+
+ case API_OPENGLES2:
+ this->language_version = 100;
+ break;
+ }
+ }
+ }
+
+
+ /* This helper function will append the given message to the shader's
+ info log and report it via GL_ARB_debug_output. Per that extension,
+ 'type' is one of the enum values classifying the message, and
+ 'id' is the implementation-defined ID of the given message. */
+ static void
+ _mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
+ GLenum type, const char *fmt, va_list ap)
+ {
+ bool error = (type == MESA_DEBUG_TYPE_ERROR);
+ GLuint msg_id = 0;
+
+ assert(state->info_log != NULL);
+
+ /* Get the offset that the new message will be written to. */
+ int msg_offset = strlen(state->info_log);
+
+ ralloc_asprintf_append(&state->info_log, "%u:%u(%u): %s: ",
+ locp->source,
+ locp->first_line,
+ locp->first_column,
+ error ? "error" : "warning");
+ ralloc_vasprintf_append(&state->info_log, fmt, ap);
+
+ const char *const msg = &state->info_log[msg_offset];
+ struct gl_context *ctx = state->ctx;
+
+ /* Report the error via GL_ARB_debug_output. */
+ _mesa_shader_debug(ctx, type, &msg_id, msg);
+
+ ralloc_strcat(&state->info_log, "\n");
+ }
+
+ void
+ _mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state,
+ const char *fmt, ...)
+ {
+ va_list ap;
+
+ state->error = true;
+
+ va_start(ap, fmt);
+ _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_ERROR, fmt, ap);
+ va_end(ap);
+ }
+
+
+ void
+ _mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
+ const char *fmt, ...)
+ {
+ va_list ap;
+
+ va_start(ap, fmt);
+ _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap);
+ va_end(ap);
+ }
+
+
+ /**
+ * Enum representing the possible behaviors that can be specified in
+ * an #extension directive.
+ */
+ enum ext_behavior {
+ extension_disable,
+ extension_enable,
+ extension_require,
+ extension_warn
+ };
+
+ /**
+ * Element type for _mesa_glsl_supported_extensions
+ */
+ struct _mesa_glsl_extension {
+ /**
+ * Name of the extension when referred to in a GLSL extension
+ * statement
+ */
+ const char *name;
+
+ /** True if this extension is available to desktop GL shaders */
+ bool avail_in_GL;
+
+ /** True if this extension is available to GLES shaders */
+ bool avail_in_ES;
+
+ /**
+ * Flag in the gl_extensions struct indicating whether this
+ * extension is supported by the driver, or
+ * &gl_extensions::dummy_true if supported by all drivers.
+ *
+ * Note: the type (GLboolean gl_extensions::*) is a "pointer to
+ * member" type, the type-safe alternative to the "offsetof" macro.
+ * In a nutshell:
+ *
+ * - foo bar::* p declares p to be an "offset" to a field of type
+ * foo that exists within struct bar
+ * - &bar::baz computes the "offset" of field baz within struct bar
+ * - x.*p accesses the field of x that exists at "offset" p
+ * - x->*p is equivalent to (*x).*p
+ */
+ const GLboolean gl_extensions::* supported_flag;
+
+ /**
+ * Flag in the _mesa_glsl_parse_state struct that should be set
+ * when this extension is enabled.
+ *
+ * See note in _mesa_glsl_extension::supported_flag about "pointer
+ * to member" types.
+ */
+ bool _mesa_glsl_parse_state::* enable_flag;
+
+ /**
+ * Flag in the _mesa_glsl_parse_state struct that should be set
+ * when the shader requests "warn" behavior for this extension.
+ *
+ * See note in _mesa_glsl_extension::supported_flag about "pointer
+ * to member" types.
+ */
+ bool _mesa_glsl_parse_state::* warn_flag;
+
+
+ bool compatible_with_state(const _mesa_glsl_parse_state *state) const;
+ void set_flags(_mesa_glsl_parse_state *state, ext_behavior behavior) const;
+ };
+
+ #define EXT(NAME, GL, ES, SUPPORTED_FLAG) \
+ { "GL_" #NAME, GL, ES, &gl_extensions::SUPPORTED_FLAG, \
+ &_mesa_glsl_parse_state::NAME##_enable, \
+ &_mesa_glsl_parse_state::NAME##_warn }
+
+ /**
+ * Table of extensions that can be enabled/disabled within a shader,
+ * and the conditions under which they are supported.
+ */
+ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
+ /* API availability */
+ /* name GL ES supported flag */
+
+ /* ARB extensions go here, sorted alphabetically.
+ */
+ EXT(ARB_arrays_of_arrays, true, false, ARB_arrays_of_arrays),
+ EXT(ARB_compute_shader, true, false, ARB_compute_shader),
+ EXT(ARB_conservative_depth, true, false, ARB_conservative_depth),
+ EXT(ARB_derivative_control, true, false, ARB_derivative_control),
+ EXT(ARB_draw_buffers, true, false, dummy_true),
+ EXT(ARB_draw_instanced, true, false, ARB_draw_instanced),
+ EXT(ARB_enhanced_layouts, true, false, ARB_enhanced_layouts),
+ EXT(ARB_explicit_attrib_location, true, false, ARB_explicit_attrib_location),
+ EXT(ARB_explicit_uniform_location, true, false, ARB_explicit_uniform_location),
+ EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions),
+ EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport),
+ EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5),
+ EXT(ARB_gpu_shader_fp64, true, false, ARB_gpu_shader_fp64),
+ EXT(ARB_sample_shading, true, false, ARB_sample_shading),
+ EXT(ARB_separate_shader_objects, true, false, dummy_true),
+ EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters),
+ EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding),
+ EXT(ARB_shader_clock, true, false, ARB_shader_clock),
+ EXT(ARB_shader_draw_parameters, true, false, ARB_shader_draw_parameters),
+ EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store),
+ EXT(ARB_shader_image_size, true, false, ARB_shader_image_size),
+ EXT(ARB_shader_precision, true, false, ARB_shader_precision),
+ EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export),
+ EXT(ARB_shader_storage_buffer_object, true, true, ARB_shader_storage_buffer_object),
+ EXT(ARB_shader_subroutine, true, false, ARB_shader_subroutine),
+ EXT(ARB_shader_texture_image_samples, true, false, ARB_shader_texture_image_samples),
+ EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod),
+ EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack),
+ EXT(ARB_shading_language_packing, true, false, ARB_shading_language_packing),
+ EXT(ARB_tessellation_shader, true, false, ARB_tessellation_shader),
+ EXT(ARB_texture_cube_map_array, true, false, ARB_texture_cube_map_array),
+ EXT(ARB_texture_gather, true, false, ARB_texture_gather),
+ EXT(ARB_texture_multisample, true, false, ARB_texture_multisample),
+ EXT(ARB_texture_query_levels, true, false, ARB_texture_query_levels),
+ EXT(ARB_texture_query_lod, true, false, ARB_texture_query_lod),
+ EXT(ARB_texture_rectangle, true, false, dummy_true),
+ EXT(ARB_uniform_buffer_object, true, false, ARB_uniform_buffer_object),
+ EXT(ARB_vertex_attrib_64bit, true, false, ARB_vertex_attrib_64bit),
+ EXT(ARB_viewport_array, true, false, ARB_viewport_array),
+
+ /* KHR extensions go here, sorted alphabetically.
+ */
+
+ /* OES extensions go here, sorted alphabetically.
+ */
+ EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
+ EXT(OES_geometry_shader, false, true, OES_geometry_shader),
+ EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
+ EXT(OES_texture_3D, false, true, dummy_true),
+ EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
+
+ /* All other extensions go here, sorted alphabetically.
+ */
+ EXT(AMD_conservative_depth, true, false, ARB_conservative_depth),
+ EXT(AMD_shader_stencil_export, true, false, ARB_shader_stencil_export),
+ EXT(AMD_shader_trinary_minmax, true, false, dummy_true),
+ EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer),
+ EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index),
+ EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended),
+ EXT(EXT_draw_buffers, false, true, dummy_true),
+ EXT(EXT_separate_shader_objects, false, true, dummy_true),
+ EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix),
+ EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical),
+ EXT(EXT_texture_array, true, false, EXT_texture_array),
+ };
+
+ #undef EXT
+
+
+ /**
+ * Determine whether a given extension is compatible with the target,
+ * API, and extension information in the current parser state.
+ */
+ bool _mesa_glsl_extension::compatible_with_state(const _mesa_glsl_parse_state *
+ state) const
+ {
+ /* Check that this extension matches whether we are compiling
+ * for desktop GL or GLES.
+ */
+ if (state->es_shader) {
+ if (!this->avail_in_ES) return false;
+ } else {
+ if (!this->avail_in_GL) return false;
+ }
+
+ /* Check that this extension is supported by the OpenGL
+ * implementation.
+ *
+ * Note: the ->* operator indexes into state->extensions by the
+ * offset this->supported_flag. See
+ * _mesa_glsl_extension::supported_flag for more info.
+ */
+ return state->extensions->*(this->supported_flag);
+ }
+
+ /**
+ * Set the appropriate flags in the parser state to establish the
+ * given behavior for this extension.
+ */
+ void _mesa_glsl_extension::set_flags(_mesa_glsl_parse_state *state,
+ ext_behavior behavior) const
+ {
+ /* Note: the ->* operator indexes into state by the
+ * offsets this->enable_flag and this->warn_flag. See
+ * _mesa_glsl_extension::supported_flag for more info.
+ */
+ state->*(this->enable_flag) = (behavior != extension_disable);
+ state->*(this->warn_flag) = (behavior == extension_warn);
+ }
+
+ /**
+ * Find an extension by name in _mesa_glsl_supported_extensions. If
+ * the name is not found, return NULL.
+ */
+ static const _mesa_glsl_extension *find_extension(const char *name)
+ {
+ for (unsigned i = 0; i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) {
+ if (strcmp(name, _mesa_glsl_supported_extensions[i].name) == 0) {
+ return &_mesa_glsl_supported_extensions[i];
+ }
+ }
+ return NULL;
+ }
+
+
+ bool
+ _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp,
+ const char *behavior_string, YYLTYPE *behavior_locp,
+ _mesa_glsl_parse_state *state)
+ {
+ ext_behavior behavior;
+ if (strcmp(behavior_string, "warn") == 0) {
+ behavior = extension_warn;
+ } else if (strcmp(behavior_string, "require") == 0) {
+ behavior = extension_require;
+ } else if (strcmp(behavior_string, "enable") == 0) {
+ behavior = extension_enable;
+ } else if (strcmp(behavior_string, "disable") == 0) {
+ behavior = extension_disable;
+ } else {
+ _mesa_glsl_error(behavior_locp, state,
+ "unknown extension behavior `%s'",
+ behavior_string);
+ return false;
+ }
+
+ if (strcmp(name, "all") == 0) {
+ if ((behavior == extension_enable) || (behavior == extension_require)) {
+ _mesa_glsl_error(name_locp, state, "cannot %s all extensions",
+ (behavior == extension_enable)
+ ? "enable" : "require");
+ return false;
+ } else {
+ for (unsigned i = 0;
+ i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) {
+ const _mesa_glsl_extension *extension
+ = &_mesa_glsl_supported_extensions[i];
+ if (extension->compatible_with_state(state)) {
+ _mesa_glsl_supported_extensions[i].set_flags(state, behavior);
+ }
+ }
+ }
+ } else {
+ const _mesa_glsl_extension *extension = find_extension(name);
+ if (extension && extension->compatible_with_state(state)) {
+ extension->set_flags(state, behavior);
+ } else {
+ static const char fmt[] = "extension `%s' unsupported in %s shader";
+
+ if (behavior == extension_require) {
+ _mesa_glsl_error(name_locp, state, fmt,
+ name, _mesa_shader_stage_to_string(state->stage));
+ return false;
+ } else {
+ _mesa_glsl_warning(name_locp, state, fmt,
+ name, _mesa_shader_stage_to_string(state->stage));
+ }
+ }
+ }
+
+ return true;
+ }
+
+
+ /**
+ * Recurses through <type> and <expr> if <expr> is an aggregate initializer
+ * and sets <expr>'s <constructor_type> field to <type>. Gives later functions
+ * (process_array_constructor, et al) sufficient information to do type
+ * checking.
+ *
+ * Operates on assignments involving an aggregate initializer. E.g.,
+ *
+ * vec4 pos = {1.0, -1.0, 0.0, 1.0};
+ *
+ * or more ridiculously,
+ *
+ * struct S {
+ * vec4 v[2];
+ * };
+ *
+ * struct {
+ * S a[2], b;
+ * int c;
+ * } aggregate = {
+ * {
+ * {
+ * {
+ * {1.0, 2.0, 3.0, 4.0}, // a[0].v[0]
+ * {5.0, 6.0, 7.0, 8.0} // a[0].v[1]
+ * } // a[0].v
+ * }, // a[0]
+ * {
+ * {
+ * {1.0, 2.0, 3.0, 4.0}, // a[1].v[0]
+ * {5.0, 6.0, 7.0, 8.0} // a[1].v[1]
+ * } // a[1].v
+ * } // a[1]
+ * }, // a
+ * {
+ * {
+ * {1.0, 2.0, 3.0, 4.0}, // b.v[0]
+ * {5.0, 6.0, 7.0, 8.0} // b.v[1]
+ * } // b.v
+ * }, // b
+ * 4 // c
+ * };
+ *
+ * This pass is necessary because the right-hand side of <type> e = { ... }
+ * doesn't contain sufficient information to determine if the types match.
+ */
+ void
+ _mesa_ast_set_aggregate_type(const glsl_type *type,
+ ast_expression *expr)
+ {
+ ast_aggregate_initializer *ai = (ast_aggregate_initializer *)expr;
+ ai->constructor_type = type;
+
+ /* If the aggregate is an array, recursively set its elements' types. */
+ if (type->is_array()) {
+ /* Each array element has the type type->fields.array.
+ *
+ * E.g., if <type> if struct S[2] we want to set each element's type to
+ * struct S.
+ */
+ for (exec_node *expr_node = ai->expressions.head;
+ !expr_node->is_tail_sentinel();
+ expr_node = expr_node->next) {
+ ast_expression *expr = exec_node_data(ast_expression, expr_node,
+ link);
+
+ if (expr->oper == ast_aggregate)
+ _mesa_ast_set_aggregate_type(type->fields.array, expr);
+ }
+
+ /* If the aggregate is a struct, recursively set its fields' types. */
+ } else if (type->is_record()) {
+ exec_node *expr_node = ai->expressions.head;
+
+ /* Iterate through the struct's fields. */
+ for (unsigned i = 0; !expr_node->is_tail_sentinel() && i < type->length;
+ i++, expr_node = expr_node->next) {
+ ast_expression *expr = exec_node_data(ast_expression, expr_node,
+ link);
+
+ if (expr->oper == ast_aggregate) {
+ _mesa_ast_set_aggregate_type(type->fields.structure[i].type, expr);
+ }
+ }
+ /* If the aggregate is a matrix, set its columns' types. */
+ } else if (type->is_matrix()) {
+ for (exec_node *expr_node = ai->expressions.head;
+ !expr_node->is_tail_sentinel();
+ expr_node = expr_node->next) {
+ ast_expression *expr = exec_node_data(ast_expression, expr_node,
+ link);
+
+ if (expr->oper == ast_aggregate)
+ _mesa_ast_set_aggregate_type(type->column_type(), expr);
+ }
+ }
+ }
+
+ void
+ _mesa_ast_process_interface_block(YYLTYPE *locp,
+ _mesa_glsl_parse_state *state,
+ ast_interface_block *const block,
+ const struct ast_type_qualifier &q)
+ {
+ if (q.flags.q.buffer) {
+ if (!state->has_shader_storage_buffer_objects()) {
+ _mesa_glsl_error(locp, state,
+ "#version 430 / GL_ARB_shader_storage_buffer_object "
+ "required for defining shader storage blocks");
+ } else if (state->ARB_shader_storage_buffer_object_warn) {
+ _mesa_glsl_warning(locp, state,
+ "#version 430 / GL_ARB_shader_storage_buffer_object "
+ "required for defining shader storage blocks");
+ }
+ } else if (q.flags.q.uniform) {
+ if (!state->has_uniform_buffer_objects()) {
+ _mesa_glsl_error(locp, state,
+ "#version 140 / GL_ARB_uniform_buffer_object "
+ "required for defining uniform blocks");
+ } else if (state->ARB_uniform_buffer_object_warn) {
+ _mesa_glsl_warning(locp, state,
+ "#version 140 / GL_ARB_uniform_buffer_object "
+ "required for defining uniform blocks");
+ }
+ } else {
+ if (state->es_shader || state->language_version < 150) {
+ _mesa_glsl_error(locp, state,
+ "#version 150 required for using "
+ "interface blocks");
+ }
+ }
+
+ /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+ * "It is illegal to have an input block in a vertex shader
+ * or an output block in a fragment shader"
+ */
+ if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+ _mesa_glsl_error(locp, state,
+ "`in' interface block is not allowed for "
+ "a vertex shader");
+ } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+ _mesa_glsl_error(locp, state,
+ "`out' interface block is not allowed for "
+ "a fragment shader");
+ }
+
+ /* Since block arrays require names, and both features are added in
+ * the same language versions, we don't have to explicitly
+ * version-check both things.
+ */
+ if (block->instance_name != NULL) {
+ state->check_version(150, 300, locp, "interface blocks with "
+ "an instance name are not allowed");
+ }
+
+ uint64_t interface_type_mask;
+ struct ast_type_qualifier temp_type_qualifier;
+
+ /* Get a bitmask containing only the in/out/uniform/buffer
+ * flags, allowing us to ignore other irrelevant flags like
+ * interpolation qualifiers.
+ */
+ temp_type_qualifier.flags.i = 0;
+ temp_type_qualifier.flags.q.uniform = true;
+ temp_type_qualifier.flags.q.in = true;
+ temp_type_qualifier.flags.q.out = true;
+ temp_type_qualifier.flags.q.buffer = true;
+ interface_type_mask = temp_type_qualifier.flags.i;
+
+ /* Get the block's interface qualifier. The interface_qualifier
+ * production rule guarantees that only one bit will be set (and
+ * it will be in/out/uniform).
+ */
+ uint64_t block_interface_qualifier = q.flags.i;
+
+ block->layout.flags.i |= block_interface_qualifier;
+
+ if (state->stage == MESA_SHADER_GEOMETRY &&
+ state->has_explicit_attrib_stream()) {
+ /* Assign global layout's stream value. */
+ block->layout.flags.q.stream = 1;
+ block->layout.flags.q.explicit_stream = 0;
+ block->layout.stream = state->out_qualifier->stream;
+ }
+
+ foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+ ast_type_qualifier& qualifier = member->type->qualifier;
+ if ((qualifier.flags.i & interface_type_mask) == 0) {
+ /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+ * "If no optional qualifier is used in a member declaration, the
+ * qualifier of the variable is just in, out, or uniform as declared
+ * by interface-qualifier."
+ */
+ qualifier.flags.i |= block_interface_qualifier;
+ } else if ((qualifier.flags.i & interface_type_mask) !=
+ block_interface_qualifier) {
+ /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+ * "If optional qualifiers are used, they can include interpolation
+ * and storage qualifiers and they must declare an input, output,
+ * or uniform variable consistent with the interface qualifier of
+ * the block."
+ */
+ _mesa_glsl_error(locp, state,
+ "uniform/in/out qualifier on "
+ "interface block member does not match "
+ "the interface block");
+ }
+
+ /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+ *
+ * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+ * outputs."
+ *
+ * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+ *
+ * "Only variables output from a shader can be candidates for
+ * invariance."
+ *
+ * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+ *
+ * "If optional qualifiers are used, they can include interpolation
+ * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+ * and they must declare an input, output, or uniform member
+ * consistent with the interface qualifier of the block"
+ */
+ if (qualifier.flags.q.invariant)
+ _mesa_glsl_error(locp, state,
+ "invariant qualifiers cannot be used "
+ "with interface blocks members");
+ }
+ }
+
+ void
+ _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
+ {
+ if (q->flags.q.subroutine)
+ printf("subroutine ");
+
+ if (q->flags.q.subroutine_def) {
+ printf("subroutine (");
+ q->subroutine_list->print();
+ printf(")");
+ }
+
+ if (q->flags.q.constant)
+ printf("const ");
+
+ if (q->flags.q.invariant)
+ printf("invariant ");
+
+ if (q->flags.q.attribute)
+ printf("attribute ");
+
+ if (q->flags.q.varying)
+ printf("varying ");
+
+ if (q->flags.q.in && q->flags.q.out)
+ printf("inout ");
+ else {
+ if (q->flags.q.in)
+ printf("in ");
+
+ if (q->flags.q.out)
+ printf("out ");
+ }
+
+ if (q->flags.q.centroid)
+ printf("centroid ");
+ if (q->flags.q.sample)
+ printf("sample ");
+ if (q->flags.q.patch)
+ printf("patch ");
+ if (q->flags.q.uniform)
+ printf("uniform ");
+ if (q->flags.q.buffer)
+ printf("buffer ");
+ if (q->flags.q.smooth)
+ printf("smooth ");
+ if (q->flags.q.flat)
+ printf("flat ");
+ if (q->flags.q.noperspective)
+ printf("noperspective ");
+ }
+
+
+ void
+ ast_node::print(void) const
+ {
+ printf("unhandled node ");
+ }
+
+
+ ast_node::ast_node(void)
+ {
+ this->location.source = 0;
+ this->location.first_line = 0;
+ this->location.first_column = 0;
+ this->location.last_line = 0;
+ this->location.last_column = 0;
+ }
+
+
+ static void
+ ast_opt_array_dimensions_print(const ast_array_specifier *array_specifier)
+ {
+ if (array_specifier)
+ array_specifier->print();
+ }
+
+
+ void
+ ast_compound_statement::print(void) const
+ {
+ printf("{\n");
+
+ foreach_list_typed(ast_node, ast, link, &this->statements) {
+ ast->print();
+ }
+
+ printf("}\n");
+ }
+
+
+ ast_compound_statement::ast_compound_statement(int new_scope,
+ ast_node *statements)
+ {
+ this->new_scope = new_scope;
+
+ if (statements != NULL) {
+ this->statements.push_degenerate_list_at_head(&statements->link);
+ }
+ }
+
+
+ void
+ ast_expression::print(void) const
+ {
+ switch (oper) {
+ case ast_assign:
+ case ast_mul_assign:
+ case ast_div_assign:
+ case ast_mod_assign:
+ case ast_add_assign:
+ case ast_sub_assign:
+ case ast_ls_assign:
+ case ast_rs_assign:
+ case ast_and_assign:
+ case ast_xor_assign:
+ case ast_or_assign:
+ subexpressions[0]->print();
+ printf("%s ", operator_string(oper));
+ subexpressions[1]->print();
+ break;
+
+ case ast_field_selection:
+ subexpressions[0]->print();
+ printf(". %s ", primary_expression.identifier);
+ break;
+
+ case ast_plus:
+ case ast_neg:
+ case ast_bit_not:
+ case ast_logic_not:
+ case ast_pre_inc:
+ case ast_pre_dec:
+ printf("%s ", operator_string(oper));
+ subexpressions[0]->print();
+ break;
+
+ case ast_post_inc:
+ case ast_post_dec:
+ subexpressions[0]->print();
+ printf("%s ", operator_string(oper));
+ break;
+
+ case ast_conditional:
+ subexpressions[0]->print();
+ printf("? ");
+ subexpressions[1]->print();
+ printf(": ");
+ subexpressions[2]->print();
+ break;
+
+ case ast_array_index:
+ subexpressions[0]->print();
+ printf("[ ");
+ subexpressions[1]->print();
+ printf("] ");
+ break;
+
+ case ast_function_call: {
+ subexpressions[0]->print();
+ printf("( ");
+
+ foreach_list_typed (ast_node, ast, link, &this->expressions) {
+ if (&ast->link != this->expressions.get_head())
+ printf(", ");
+
+ ast->print();
+ }
+
+ printf(") ");
+ break;
+ }
+
+ case ast_identifier:
+ printf("%s ", primary_expression.identifier);
+ break;
+
+ case ast_int_constant:
+ printf("%d ", primary_expression.int_constant);
+ break;
+
+ case ast_uint_constant:
+ printf("%u ", primary_expression.uint_constant);
+ break;
+
+ case ast_float_constant:
+ printf("%f ", primary_expression.float_constant);
+ break;
+
+ case ast_double_constant:
+ printf("%f ", primary_expression.double_constant);
+ break;
+
+ case ast_bool_constant:
+ printf("%s ",
+ primary_expression.bool_constant
+ ? "true" : "false");
+ break;
+
+ case ast_sequence: {
+ printf("( ");
+ foreach_list_typed (ast_node, ast, link, & this->expressions) {
+ if (&ast->link != this->expressions.get_head())
+ printf(", ");
+
+ ast->print();
+ }
+ printf(") ");
+ break;
+ }
+
+ case ast_aggregate: {
+ printf("{ ");
+ foreach_list_typed (ast_node, ast, link, & this->expressions) {
+ if (&ast->link != this->expressions.get_head())
+ printf(", ");
+
+ ast->print();
+ }
+ printf("} ");
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ ast_expression::ast_expression(int oper,
+ ast_expression *ex0,
+ ast_expression *ex1,
+ ast_expression *ex2) :
+ primary_expression()
+ {
+ this->oper = ast_operators(oper);
+ this->subexpressions[0] = ex0;
+ this->subexpressions[1] = ex1;
+ this->subexpressions[2] = ex2;
+ this->non_lvalue_description = NULL;
+ }
+
+
+ void
+ ast_expression_statement::print(void) const
+ {
+ if (expression)
+ expression->print();
+
+ printf("; ");
+ }
+
+
+ ast_expression_statement::ast_expression_statement(ast_expression *ex) :
+ expression(ex)
+ {
+ /* empty */
+ }
+
+
+ void
+ ast_function::print(void) const
+ {
+ return_type->print();
+ printf(" %s (", identifier);
+
+ foreach_list_typed(ast_node, ast, link, & this->parameters) {
+ ast->print();
+ }
+
+ printf(")");
+ }
+
+
+ ast_function::ast_function(void)
+ : return_type(NULL), identifier(NULL), is_definition(false),
+ signature(NULL)
+ {
+ /* empty */
+ }
+
+
+ void
+ ast_fully_specified_type::print(void) const
+ {
+ _mesa_ast_type_qualifier_print(& qualifier);
+ specifier->print();
+ }
+
+
+ void
+ ast_parameter_declarator::print(void) const
+ {
+ type->print();
+ if (identifier)
+ printf("%s ", identifier);
+ ast_opt_array_dimensions_print(array_specifier);
+ }
+
+
+ void
+ ast_function_definition::print(void) const
+ {
+ prototype->print();
+ body->print();
+ }
+
+
+ void
+ ast_declaration::print(void) const
+ {
+ printf("%s ", identifier);
+ ast_opt_array_dimensions_print(array_specifier);
+
+ if (initializer) {
+ printf("= ");
+ initializer->print();
+ }
+ }
+
+
+ ast_declaration::ast_declaration(const char *identifier,
+ ast_array_specifier *array_specifier,
+ ast_expression *initializer)
+ {
+ this->identifier = identifier;
+ this->array_specifier = array_specifier;
+ this->initializer = initializer;
+ }
+
+
+ void
+ ast_declarator_list::print(void) const
+ {
+ assert(type || invariant);
+
+ if (type)
+ type->print();
+ else if (invariant)
+ printf("invariant ");
+ else
+ printf("precise ");
+
+ foreach_list_typed (ast_node, ast, link, & this->declarations) {
+ if (&ast->link != this->declarations.get_head())
+ printf(", ");
+
+ ast->print();
+ }
+
+ printf("; ");
+ }
+
+
+ ast_declarator_list::ast_declarator_list(ast_fully_specified_type *type)
+ {
+ this->type = type;
+ this->invariant = false;
+ this->precise = false;
+ }
+
+ void
+ ast_jump_statement::print(void) const
+ {
+ switch (mode) {
+ case ast_continue:
+ printf("continue; ");
+ break;
+ case ast_break:
+ printf("break; ");
+ break;
+ case ast_return:
+ printf("return ");
+ if (opt_return_value)
+ opt_return_value->print();
+
+ printf("; ");
+ break;
+ case ast_discard:
+ printf("discard; ");
+ break;
+ }
+ }
+
+
+ ast_jump_statement::ast_jump_statement(int mode, ast_expression *return_value)
+ : opt_return_value(NULL)
+ {
+ this->mode = ast_jump_modes(mode);
+
+ if (mode == ast_return)
+ opt_return_value = return_value;
+ }
+
+
+ void
+ ast_selection_statement::print(void) const
+ {
+ printf("if ( ");
+ condition->print();
+ printf(") ");
+
+ then_statement->print();
+
+ if (else_statement) {
+ printf("else ");
+ else_statement->print();
+ }
+ }
+
+
+ ast_selection_statement::ast_selection_statement(ast_expression *condition,
+ ast_node *then_statement,
+ ast_node *else_statement)
+ {
+ this->condition = condition;
+ this->then_statement = then_statement;
+ this->else_statement = else_statement;
+ }
+
+
+ void
+ ast_switch_statement::print(void) const
+ {
+ printf("switch ( ");
+ test_expression->print();
+ printf(") ");
+
+ body->print();
+ }
+
+
+ ast_switch_statement::ast_switch_statement(ast_expression *test_expression,
+ ast_node *body)
+ {
+ this->test_expression = test_expression;
+ this->body = body;
+ }
+
+
+ void
+ ast_switch_body::print(void) const
+ {
+ printf("{\n");
+ if (stmts != NULL) {
+ stmts->print();
+ }
+ printf("}\n");
+ }
+
+
+ ast_switch_body::ast_switch_body(ast_case_statement_list *stmts)
+ {
+ this->stmts = stmts;
+ }
+
+
+ void ast_case_label::print(void) const
+ {
+ if (test_value != NULL) {
+ printf("case ");
+ test_value->print();
+ printf(": ");
+ } else {
+ printf("default: ");
+ }
+ }
+
+
+ ast_case_label::ast_case_label(ast_expression *test_value)
+ {
+ this->test_value = test_value;
+ }
+
+
+ void ast_case_label_list::print(void) const
+ {
+ foreach_list_typed(ast_node, ast, link, & this->labels) {
+ ast->print();
+ }
+ printf("\n");
+ }
+
+
+ ast_case_label_list::ast_case_label_list(void)
+ {
+ }
+
+
+ void ast_case_statement::print(void) const
+ {
+ labels->print();
+ foreach_list_typed(ast_node, ast, link, & this->stmts) {
+ ast->print();
+ printf("\n");
+ }
+ }
+
+
+ ast_case_statement::ast_case_statement(ast_case_label_list *labels)
+ {
+ this->labels = labels;
+ }
+
+
+ void ast_case_statement_list::print(void) const
+ {
+ foreach_list_typed(ast_node, ast, link, & this->cases) {
+ ast->print();
+ }
+ }
+
+
+ ast_case_statement_list::ast_case_statement_list(void)
+ {
+ }
+
+
+ void
+ ast_iteration_statement::print(void) const
+ {
+ switch (mode) {
+ case ast_for:
+ printf("for( ");
+ if (init_statement)
+ init_statement->print();
+ printf("; ");
+
+ if (condition)
+ condition->print();
+ printf("; ");
+
+ if (rest_expression)
+ rest_expression->print();
+ printf(") ");
+
+ body->print();
+ break;
+
+ case ast_while:
+ printf("while ( ");
+ if (condition)
+ condition->print();
+ printf(") ");
+ body->print();
+ break;
+
+ case ast_do_while:
+ printf("do ");
+ body->print();
+ printf("while ( ");
+ if (condition)
+ condition->print();
+ printf("); ");
+ break;
+ }
+ }
+
+
+ ast_iteration_statement::ast_iteration_statement(int mode,
+ ast_node *init,
+ ast_node *condition,
+ ast_expression *rest_expression,
+ ast_node *body)
+ {
+ this->mode = ast_iteration_modes(mode);
+ this->init_statement = init;
+ this->condition = condition;
+ this->rest_expression = rest_expression;
+ this->body = body;
+ }
+
+
+ void
+ ast_struct_specifier::print(void) const
+ {
+ printf("struct %s { ", name);
+ foreach_list_typed(ast_node, ast, link, &this->declarations) {
+ ast->print();
+ }
+ printf("} ");
+ }
+
+
+ ast_struct_specifier::ast_struct_specifier(const char *identifier,
+ ast_declarator_list *declarator_list)
+ {
+ if (identifier == NULL) {
+ static mtx_t mutex = _MTX_INITIALIZER_NP;
+ static unsigned anon_count = 1;
+ unsigned count;
+
+ mtx_lock(&mutex);
+ count = anon_count++;
+ mtx_unlock(&mutex);
+
+ identifier = ralloc_asprintf(this, "#anon_struct_%04x", count);
+ }
+ name = identifier;
+ this->declarations.push_degenerate_list_at_head(&declarator_list->link);
+ is_declaration = true;
+ }
+
+ void ast_subroutine_list::print(void) const
+ {
+ foreach_list_typed (ast_node, ast, link, & this->declarations) {
+ if (&ast->link != this->declarations.get_head())
+ printf(", ");
+ ast->print();
+ }
+ }
+
+ static void
+ set_shader_inout_layout(struct gl_shader *shader,
+ struct _mesa_glsl_parse_state *state)
+ {
+ /* Should have been prevented by the parser. */
+ if (shader->Stage == MESA_SHADER_TESS_CTRL) {
+ assert(!state->in_qualifier->flags.i);
+ } else if (shader->Stage == MESA_SHADER_TESS_EVAL) {
+ assert(!state->out_qualifier->flags.i);
+ } else if (shader->Stage != MESA_SHADER_GEOMETRY) {
+ assert(!state->in_qualifier->flags.i);
+ assert(!state->out_qualifier->flags.i);
+ }
+
+ if (shader->Stage != MESA_SHADER_COMPUTE) {
+ /* Should have been prevented by the parser. */
+ assert(!state->cs_input_local_size_specified);
+ }
+
+ if (shader->Stage != MESA_SHADER_FRAGMENT) {
+ /* Should have been prevented by the parser. */
+ assert(!state->fs_uses_gl_fragcoord);
+ assert(!state->fs_redeclares_gl_fragcoord);
+ assert(!state->fs_pixel_center_integer);
+ assert(!state->fs_origin_upper_left);
+ assert(!state->fs_early_fragment_tests);
+ }
+
+ switch (shader->Stage) {
+ case MESA_SHADER_TESS_CTRL:
+ shader->TessCtrl.VerticesOut = 0;
+ if (state->tcs_output_vertices_specified) {
+ unsigned vertices;
+ if (state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices", &vertices,
+ false)) {
+
+ YYLTYPE loc = state->out_qualifier->vertices->get_location();
+ if (vertices > state->Const.MaxPatchVertices) {
+ _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+ "GL_MAX_PATCH_VERTICES", vertices);
+ }
+ shader->TessCtrl.VerticesOut = vertices;
+ }
+ }
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
+ if (state->in_qualifier->flags.q.prim_type)
+ shader->TessEval.PrimitiveMode = state->in_qualifier->prim_type;
+
+ shader->TessEval.Spacing = 0;
+ if (state->in_qualifier->flags.q.vertex_spacing)
+ shader->TessEval.Spacing = state->in_qualifier->vertex_spacing;
+
+ shader->TessEval.VertexOrder = 0;
+ if (state->in_qualifier->flags.q.ordering)
+ shader->TessEval.VertexOrder = state->in_qualifier->ordering;
+
+ shader->TessEval.PointMode = -1;
+ if (state->in_qualifier->flags.q.point_mode)
+ shader->TessEval.PointMode = state->in_qualifier->point_mode;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ shader->Geom.VerticesOut = 0;
+ if (state->out_qualifier->flags.q.max_vertices) {
+ unsigned qual_max_vertices;
+ if (state->out_qualifier->max_vertices->
+ process_qualifier_constant(state, "max_vertices",
+ &qual_max_vertices, true)) {
+ shader->Geom.VerticesOut = qual_max_vertices;
+ }
+ }
+
+ if (state->gs_input_prim_type_specified) {
+ shader->Geom.InputType = state->in_qualifier->prim_type;
+ } else {
+ shader->Geom.InputType = PRIM_UNKNOWN;
+ }
+
+ if (state->out_qualifier->flags.q.prim_type) {
+ shader->Geom.OutputType = state->out_qualifier->prim_type;
+ } else {
+ shader->Geom.OutputType = PRIM_UNKNOWN;
+ }
+
+ shader->Geom.Invocations = 0;
+ if (state->in_qualifier->flags.q.invocations) {
+ unsigned invocations;
+ if (state->in_qualifier->invocations->
+ process_qualifier_constant(state, "invocations",
+ &invocations, false)) {
+
+ YYLTYPE loc = state->in_qualifier->invocations->get_location();
+ if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) {
+ _mesa_glsl_error(&loc, state,
+ "invocations (%d) exceeds "
+ "GL_MAX_GEOMETRY_SHADER_INVOCATIONS",
+ invocations);
+ }
+ shader->Geom.Invocations = invocations;
+ }
+ }
+ break;
+
+ case MESA_SHADER_COMPUTE:
+ if (state->cs_input_local_size_specified) {
+ for (int i = 0; i < 3; i++)
+ shader->Comp.LocalSize[i] = state->cs_input_local_size[i];
+ } else {
+ for (int i = 0; i < 3; i++)
+ shader->Comp.LocalSize[i] = 0;
+ }
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ shader->redeclares_gl_fragcoord = state->fs_redeclares_gl_fragcoord;
+ shader->uses_gl_fragcoord = state->fs_uses_gl_fragcoord;
+ shader->pixel_center_integer = state->fs_pixel_center_integer;
+ shader->origin_upper_left = state->fs_origin_upper_left;
+ shader->ARB_fragment_coord_conventions_enable =
+ state->ARB_fragment_coord_conventions_enable;
+ shader->EarlyFragmentTests = state->fs_early_fragment_tests;
+ break;
+
+ default:
+ /* Nothing to do. */
+ break;
+ }
+ }
+
+ extern "C" {
+
+ void
+ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
+ bool dump_ast, bool dump_hir)
+ {
+ struct _mesa_glsl_parse_state *state =
+ new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader);
+ const char *source = shader->Source;
+
+ if (ctx->Const.GenerateTemporaryNames)
+ (void) p_atomic_cmpxchg(&ir_variable::temporaries_allocate_names,
+ false, true);
+
+ state->error = glcpp_preprocess(state, &source, &state->info_log,
+ &ctx->Extensions, ctx);
+
+ if (!state->error) {
+ _mesa_glsl_lexer_ctor(state, source);
+ _mesa_glsl_parse(state);
+ _mesa_glsl_lexer_dtor(state);
+ }
+
+ if (dump_ast) {
+ foreach_list_typed(ast_node, ast, link, &state->translation_unit) {
+ ast->print();
+ }
+ printf("\n\n");
+ }
+
+ ralloc_free(shader->ir);
+ shader->ir = new(shader) exec_list;
+ if (!state->error && !state->translation_unit.is_empty())
+ _mesa_ast_to_hir(shader->ir, state);
+
+ if (!state->error) {
+ validate_ir_tree(shader->ir);
+
+ /* Print out the unoptimized IR. */
+ if (dump_hir) {
+ _mesa_print_ir(stdout, shader->ir, state);
+ }
+ }
+
+
+ if (!state->error && !shader->ir->is_empty()) {
+ struct gl_shader_compiler_options *options =
+ &ctx->Const.ShaderCompilerOptions[shader->Stage];
+
+ lower_subroutine(shader->ir, state);
+ /* Do some optimization at compile time to reduce shader IR size
+ * and reduce later work if the same shader is linked multiple times
+ */
+ while (do_common_optimization(shader->ir, false, false, options,
+ ctx->Const.NativeIntegers))
+ ;
+
+ validate_ir_tree(shader->ir);
+
+ enum ir_variable_mode other;
+ switch (shader->Stage) {
+ case MESA_SHADER_VERTEX:
+ other = ir_var_shader_in;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ other = ir_var_shader_out;
+ break;
+ default:
+ /* Something invalid to ensure optimize_dead_builtin_uniforms
+ * doesn't remove anything other than uniforms or constants.
+ */
+ other = ir_var_mode_count;
+ break;
+ }
+
+ optimize_dead_builtin_variables(shader->ir, other);
+
+ validate_ir_tree(shader->ir);
+ }
+
+ if (shader->InfoLog)
+ ralloc_free(shader->InfoLog);
+
+ if (!state->error)
+ set_shader_inout_layout(shader, state);
+
+ shader->symbols = new(shader->ir) glsl_symbol_table;
+ shader->CompileStatus = !state->error;
+ shader->InfoLog = state->info_log;
+ shader->Version = state->language_version;
+ shader->IsES = state->es_shader;
+ shader->uses_builtin_functions = state->uses_builtin_functions;
+
+ /* Retain any live IR, but trash the rest. */
+ reparent_ir(shader->ir, shader->ir);
+
+ /* Destroy the symbol table. Create a new symbol table that contains only
+ * the variables and functions that still exist in the IR. The symbol
+ * table will be used later during linking.
+ *
+ * There must NOT be any freed objects still referenced by the symbol
+ * table. That could cause the linker to dereference freed memory.
+ *
+ * We don't have to worry about types or interface-types here because those
+ * are fly-weights that are looked up by glsl_type.
+ */
+ foreach_in_list (ir_instruction, ir, shader->ir) {
+ switch (ir->ir_type) {
+ case ir_type_function:
+ shader->symbols->add_function((ir_function *) ir);
+ break;
+ case ir_type_variable: {
+ ir_variable *const var = (ir_variable *) ir;
+
+ if (var->data.mode != ir_var_temporary)
+ shader->symbols->add_variable(var);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ _mesa_glsl_initialize_derived_variables(shader);
+
+ delete state->symbols;
+ ralloc_free(state);
+ }
+
+ } /* extern "C" */
+ /**
+ * Do the set of common optimizations passes
+ *
+ * \param ir List of instructions to be optimized
+ * \param linked Is the shader linked? This enables
+ * optimizations passes that remove code at
+ * global scope and could cause linking to
+ * fail.
+ * \param uniform_locations_assigned Have locations already been assigned for
+ * uniforms? This prevents the declarations
+ * of unused uniforms from being removed.
+ * The setting of this flag only matters if
+ * \c linked is \c true.
+ * \param max_unroll_iterations Maximum number of loop iterations to be
+ * unrolled. Setting to 0 disables loop
+ * unrolling.
+ * \param options The driver's preferred shader options.
+ */
+ bool
+ do_common_optimization(exec_list *ir, bool linked,
+ bool uniform_locations_assigned,
+ const struct gl_shader_compiler_options *options,
+ bool native_integers)
+ {
+ GLboolean progress = GL_FALSE;
+
+ progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
+
+ if (linked) {
+ progress = do_function_inlining(ir) || progress;
+ progress = do_dead_functions(ir) || progress;
+ progress = do_structure_splitting(ir) || progress;
+ }
+ progress = do_if_simplification(ir) || progress;
+ progress = opt_flatten_nested_if_blocks(ir) || progress;
+ progress = opt_conditional_discard(ir) || progress;
+ progress = do_copy_propagation(ir) || progress;
+ progress = do_copy_propagation_elements(ir) || progress;
+
+ if (options->OptimizeForAOS && !linked)
+ progress = opt_flip_matrices(ir) || progress;
+
+ if (linked && options->OptimizeForAOS) {
+ progress = do_vectorize(ir) || progress;
+ }
+
+ if (linked)
+ progress = do_dead_code(ir, uniform_locations_assigned) || progress;
+ else
+ progress = do_dead_code_unlinked(ir) || progress;
+ progress = do_dead_code_local(ir) || progress;
+ progress = do_tree_grafting(ir) || progress;
+ progress = do_constant_propagation(ir) || progress;
+ if (linked)
+ progress = do_constant_variable(ir) || progress;
+ else
+ progress = do_constant_variable_unlinked(ir) || progress;
+ progress = do_constant_folding(ir) || progress;
+ progress = do_minmax_prune(ir) || progress;
+ progress = do_rebalance_tree(ir) || progress;
+ progress = do_algebraic(ir, native_integers, options) || progress;
+ progress = do_lower_jumps(ir) || progress;
+ progress = do_vec_index_to_swizzle(ir) || progress;
+ progress = lower_vector_insert(ir, false) || progress;
+ progress = do_swizzle_swizzle(ir) || progress;
+ progress = do_noop_swizzle(ir) || progress;
+
+ progress = optimize_split_arrays(ir, linked) || progress;
+ progress = optimize_redundant_jumps(ir) || progress;
+
+ loop_state *ls = analyze_loop_variables(ir);
+ if (ls->loop_found) {
+ progress = set_loop_controls(ir, ls) || progress;
+ progress = unroll_loops(ir, ls, options) || progress;
+ }
+ delete ls;
+
+ return progress;
+ }
+
+ extern "C" {
+
+ /**
+ * To be called at GL teardown time, this frees compiler datastructures.
+ *
+ * After calling this, any previously compiled shaders and shader
+ * programs would be invalid. So this should happen at approximately
+ * program exit.
+ */
+ void
+ _mesa_destroy_shader_compiler(void)
+ {
+ _mesa_destroy_shader_compiler_caches();
+
+ _mesa_glsl_release_types();
+ }
+
+ /**
+ * Releases compiler caches to trade off performance for memory.
+ *
+ * Intended to be used with glReleaseShaderCompiler().
+ */
+ void
+ _mesa_destroy_shader_compiler_caches(void)
+ {
+ _mesa_glsl_release_builtin_functions();
+ }
+
+ }
--- /dev/null
- case ir_unop_unpack_half_2x16_split_x:
- case ir_unop_unpack_half_2x16_split_y:
+ /*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+ #include <string.h>
+ #include "main/core.h" /* for MAX2 */
+ #include "ir.h"
+ #include "compiler/glsl_types.h"
+
+ ir_rvalue::ir_rvalue(enum ir_node_type t)
+ : ir_instruction(t)
+ {
+ this->type = glsl_type::error_type;
+ }
+
+ bool ir_rvalue::is_zero() const
+ {
+ return false;
+ }
+
+ bool ir_rvalue::is_one() const
+ {
+ return false;
+ }
+
+ bool ir_rvalue::is_negative_one() const
+ {
+ return false;
+ }
+
+ /**
+ * Modify the swizzle make to move one component to another
+ *
+ * \param m IR swizzle to be modified
+ * \param from Component in the RHS that is to be swizzled
+ * \param to Desired swizzle location of \c from
+ */
+ static void
+ update_rhs_swizzle(ir_swizzle_mask &m, unsigned from, unsigned to)
+ {
+ switch (to) {
+ case 0: m.x = from; break;
+ case 1: m.y = from; break;
+ case 2: m.z = from; break;
+ case 3: m.w = from; break;
+ default: assert(!"Should not get here.");
+ }
+ }
+
+ void
+ ir_assignment::set_lhs(ir_rvalue *lhs)
+ {
+ void *mem_ctx = this;
+ bool swizzled = false;
+
+ while (lhs != NULL) {
+ ir_swizzle *swiz = lhs->as_swizzle();
+
+ if (swiz == NULL)
+ break;
+
+ unsigned write_mask = 0;
+ ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 };
+
+ for (unsigned i = 0; i < swiz->mask.num_components; i++) {
+ unsigned c = 0;
+
+ switch (i) {
+ case 0: c = swiz->mask.x; break;
+ case 1: c = swiz->mask.y; break;
+ case 2: c = swiz->mask.z; break;
+ case 3: c = swiz->mask.w; break;
+ default: assert(!"Should not get here.");
+ }
+
+ write_mask |= (((this->write_mask >> i) & 1) << c);
+ update_rhs_swizzle(rhs_swiz, i, c);
+ rhs_swiz.num_components = swiz->val->type->vector_elements;
+ }
+
+ this->write_mask = write_mask;
+ lhs = swiz->val;
+
+ this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz);
+ swizzled = true;
+ }
+
+ if (swizzled) {
+ /* Now, RHS channels line up with the LHS writemask. Collapse it
+ * to just the channels that will be written.
+ */
+ ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 };
+ int rhs_chan = 0;
+ for (int i = 0; i < 4; i++) {
+ if (write_mask & (1 << i))
+ update_rhs_swizzle(rhs_swiz, i, rhs_chan++);
+ }
+ rhs_swiz.num_components = rhs_chan;
+ this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz);
+ }
+
+ assert((lhs == NULL) || lhs->as_dereference());
+
+ this->lhs = (ir_dereference *) lhs;
+ }
+
+ ir_variable *
+ ir_assignment::whole_variable_written()
+ {
+ ir_variable *v = this->lhs->whole_variable_referenced();
+
+ if (v == NULL)
+ return NULL;
+
+ if (v->type->is_scalar())
+ return v;
+
+ if (v->type->is_vector()) {
+ const unsigned mask = (1U << v->type->vector_elements) - 1;
+
+ if (mask != this->write_mask)
+ return NULL;
+ }
+
+ /* Either all the vector components are assigned or the variable is some
+ * composite type (and the whole thing is assigned.
+ */
+ return v;
+ }
+
+ ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs,
+ ir_rvalue *condition, unsigned write_mask)
+ : ir_instruction(ir_type_assignment)
+ {
+ this->condition = condition;
+ this->rhs = rhs;
+ this->lhs = lhs;
+ this->write_mask = write_mask;
+
+ if (lhs->type->is_scalar() || lhs->type->is_vector()) {
+ int lhs_components = 0;
+ for (int i = 0; i < 4; i++) {
+ if (write_mask & (1 << i))
+ lhs_components++;
+ }
+
+ assert(lhs_components == this->rhs->type->vector_elements);
+ }
+ }
+
+ ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs,
+ ir_rvalue *condition)
+ : ir_instruction(ir_type_assignment)
+ {
+ this->condition = condition;
+ this->rhs = rhs;
+
+ /* If the RHS is a vector type, assume that all components of the vector
+ * type are being written to the LHS. The write mask comes from the RHS
+ * because we can have a case where the LHS is a vec4 and the RHS is a
+ * vec3. In that case, the assignment is:
+ *
+ * (assign (...) (xyz) (var_ref lhs) (var_ref rhs))
+ */
+ if (rhs->type->is_vector())
+ this->write_mask = (1U << rhs->type->vector_elements) - 1;
+ else if (rhs->type->is_scalar())
+ this->write_mask = 1;
+ else
+ this->write_mask = 0;
+
+ this->set_lhs(lhs);
+ }
+
+ ir_expression::ir_expression(int op, const struct glsl_type *type,
+ ir_rvalue *op0, ir_rvalue *op1,
+ ir_rvalue *op2, ir_rvalue *op3)
+ : ir_rvalue(ir_type_expression)
+ {
+ this->type = type;
+ this->operation = ir_expression_operation(op);
+ this->operands[0] = op0;
+ this->operands[1] = op1;
+ this->operands[2] = op2;
+ this->operands[3] = op3;
+ #ifndef NDEBUG
+ int num_operands = get_num_operands(this->operation);
+ for (int i = num_operands; i < 4; i++) {
+ assert(this->operands[i] == NULL);
+ }
+ #endif
+ }
+
+ ir_expression::ir_expression(int op, ir_rvalue *op0)
+ : ir_rvalue(ir_type_expression)
+ {
+ this->operation = ir_expression_operation(op);
+ this->operands[0] = op0;
+ this->operands[1] = NULL;
+ this->operands[2] = NULL;
+ this->operands[3] = NULL;
+
+ assert(op <= ir_last_unop);
+
+ switch (this->operation) {
+ case ir_unop_bit_not:
+ case ir_unop_logic_not:
+ case ir_unop_neg:
+ case ir_unop_abs:
+ case ir_unop_sign:
+ case ir_unop_rcp:
+ case ir_unop_rsq:
+ case ir_unop_sqrt:
+ case ir_unop_exp:
+ case ir_unop_log:
+ case ir_unop_exp2:
+ case ir_unop_log2:
+ case ir_unop_trunc:
+ case ir_unop_ceil:
+ case ir_unop_floor:
+ case ir_unop_fract:
+ case ir_unop_round_even:
+ case ir_unop_sin:
+ case ir_unop_cos:
+ case ir_unop_dFdx:
+ case ir_unop_dFdx_coarse:
+ case ir_unop_dFdx_fine:
+ case ir_unop_dFdy:
+ case ir_unop_dFdy_coarse:
+ case ir_unop_dFdy_fine:
+ case ir_unop_bitfield_reverse:
+ case ir_unop_interpolate_at_centroid:
+ case ir_unop_saturate:
+ this->type = op0->type;
+ break;
+
+ case ir_unop_f2i:
+ case ir_unop_b2i:
+ case ir_unop_u2i:
+ case ir_unop_d2i:
+ case ir_unop_bitcast_f2i:
+ case ir_unop_bit_count:
+ case ir_unop_find_msb:
+ case ir_unop_find_lsb:
+ case ir_unop_subroutine_to_int:
+ this->type = glsl_type::get_instance(GLSL_TYPE_INT,
+ op0->type->vector_elements, 1);
+ break;
+
+ case ir_unop_b2f:
+ case ir_unop_i2f:
+ case ir_unop_u2f:
+ case ir_unop_d2f:
+ case ir_unop_bitcast_i2f:
+ case ir_unop_bitcast_u2f:
+ this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ op0->type->vector_elements, 1);
+ break;
+
+ case ir_unop_f2b:
+ case ir_unop_i2b:
+ case ir_unop_d2b:
+ this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
+ op0->type->vector_elements, 1);
+ break;
+
+ case ir_unop_f2d:
+ case ir_unop_i2d:
+ case ir_unop_u2d:
+ this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
+ op0->type->vector_elements, 1);
+ break;
+
+ case ir_unop_i2u:
+ case ir_unop_f2u:
+ case ir_unop_d2u:
+ case ir_unop_bitcast_f2u:
+ this->type = glsl_type::get_instance(GLSL_TYPE_UINT,
+ op0->type->vector_elements, 1);
+ break;
+
+ case ir_unop_noise:
- case ir_binop_pack_half_2x16_split:
- this->type = glsl_type::uint_type;
- break;
-
+ this->type = glsl_type::float_type;
+ break;
+
+ case ir_unop_unpack_double_2x32:
+ this->type = glsl_type::uvec2_type;
+ break;
+
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_unorm_4x8:
+ case ir_unop_pack_half_2x16:
+ this->type = glsl_type::uint_type;
+ break;
+
+ case ir_unop_pack_double_2x32:
+ this->type = glsl_type::double_type;
+ break;
+
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ this->type = glsl_type::vec2_type;
+ break;
+
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_4x8:
+ this->type = glsl_type::vec4_type;
+ break;
+
+ case ir_unop_frexp_sig:
+ this->type = op0->type;
+ break;
+ case ir_unop_frexp_exp:
+ this->type = glsl_type::get_instance(GLSL_TYPE_INT,
+ op0->type->vector_elements, 1);
+ break;
+
+ case ir_unop_get_buffer_size:
+ case ir_unop_ssbo_unsized_array_length:
+ this->type = glsl_type::int_type;
+ break;
+
+ default:
+ assert(!"not reached: missing automatic type setup for ir_expression");
+ this->type = op0->type;
+ break;
+ }
+ }
+
+ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
+ : ir_rvalue(ir_type_expression)
+ {
+ this->operation = ir_expression_operation(op);
+ this->operands[0] = op0;
+ this->operands[1] = op1;
+ this->operands[2] = NULL;
+ this->operands[3] = NULL;
+
+ assert(op > ir_last_unop);
+
+ switch (this->operation) {
+ case ir_binop_all_equal:
+ case ir_binop_any_nequal:
+ this->type = glsl_type::bool_type;
+ break;
+
+ case ir_binop_add:
+ case ir_binop_sub:
+ case ir_binop_min:
+ case ir_binop_max:
+ case ir_binop_pow:
+ case ir_binop_mul:
+ case ir_binop_div:
+ case ir_binop_mod:
+ if (op0->type->is_scalar()) {
+ this->type = op1->type;
+ } else if (op1->type->is_scalar()) {
+ this->type = op0->type;
+ } else {
+ if (this->operation == ir_binop_mul) {
+ this->type = glsl_type::get_mul_type(op0->type, op1->type);
+ } else {
+ assert(op0->type == op1->type);
+ this->type = op0->type;
+ }
+ }
+ break;
+
+ case ir_binop_logic_and:
+ case ir_binop_logic_xor:
+ case ir_binop_logic_or:
+ case ir_binop_bit_and:
+ case ir_binop_bit_xor:
+ case ir_binop_bit_or:
+ assert(!op0->type->is_matrix());
+ assert(!op1->type->is_matrix());
+ if (op0->type->is_scalar()) {
+ this->type = op1->type;
+ } else if (op1->type->is_scalar()) {
+ this->type = op0->type;
+ } else {
+ assert(op0->type->vector_elements == op1->type->vector_elements);
+ this->type = op0->type;
+ }
+ break;
+
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_greater:
+ assert(op0->type == op1->type);
+ this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
+ op0->type->vector_elements, 1);
+ break;
+
+ case ir_binop_dot:
+ this->type = op0->type->get_base_type();
+ break;
+
- "unpackHalf2x16_split_x",
- "unpackHalf2x16_split_y",
+ case ir_binop_imul_high:
+ case ir_binop_carry:
+ case ir_binop_borrow:
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ case ir_binop_ldexp:
+ case ir_binop_interpolate_at_offset:
+ case ir_binop_interpolate_at_sample:
+ this->type = op0->type;
+ break;
+
+ case ir_binop_vector_extract:
+ this->type = op0->type->get_scalar_type();
+ break;
+
+ default:
+ assert(!"not reached: missing automatic type setup for ir_expression");
+ this->type = glsl_type::float_type;
+ }
+ }
+
+ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1,
+ ir_rvalue *op2)
+ : ir_rvalue(ir_type_expression)
+ {
+ this->operation = ir_expression_operation(op);
+ this->operands[0] = op0;
+ this->operands[1] = op1;
+ this->operands[2] = op2;
+ this->operands[3] = NULL;
+
+ assert(op > ir_last_binop && op <= ir_last_triop);
+
+ switch (this->operation) {
+ case ir_triop_fma:
+ case ir_triop_lrp:
+ case ir_triop_bitfield_extract:
+ case ir_triop_vector_insert:
+ this->type = op0->type;
+ break;
+
+ case ir_triop_csel:
+ this->type = op1->type;
+ break;
+
+ default:
+ assert(!"not reached: missing automatic type setup for ir_expression");
+ this->type = glsl_type::float_type;
+ }
+ }
+
+ unsigned int
+ ir_expression::get_num_operands(ir_expression_operation op)
+ {
+ assert(op <= ir_last_opcode);
+
+ if (op <= ir_last_unop)
+ return 1;
+
+ if (op <= ir_last_binop)
+ return 2;
+
+ if (op <= ir_last_triop)
+ return 3;
+
+ if (op <= ir_last_quadop)
+ return 4;
+
+ assert(false);
+ return 0;
+ }
+
+ static const char *const operator_strs[] = {
+ "~",
+ "!",
+ "neg",
+ "abs",
+ "sign",
+ "rcp",
+ "rsq",
+ "sqrt",
+ "exp",
+ "log",
+ "exp2",
+ "log2",
+ "f2i",
+ "f2u",
+ "i2f",
+ "f2b",
+ "b2f",
+ "i2b",
+ "b2i",
+ "u2f",
+ "i2u",
+ "u2i",
+ "d2f",
+ "f2d",
+ "d2i",
+ "i2d",
+ "d2u",
+ "u2d",
+ "d2b",
+ "bitcast_i2f",
+ "bitcast_f2i",
+ "bitcast_u2f",
+ "bitcast_f2u",
+ "trunc",
+ "ceil",
+ "floor",
+ "fract",
+ "round_even",
+ "sin",
+ "cos",
+ "dFdx",
+ "dFdxCoarse",
+ "dFdxFine",
+ "dFdy",
+ "dFdyCoarse",
+ "dFdyFine",
+ "packSnorm2x16",
+ "packSnorm4x8",
+ "packUnorm2x16",
+ "packUnorm4x8",
+ "packHalf2x16",
+ "unpackSnorm2x16",
+ "unpackSnorm4x8",
+ "unpackUnorm2x16",
+ "unpackUnorm4x8",
+ "unpackHalf2x16",
- "packHalf2x16_split",
+ "bitfield_reverse",
+ "bit_count",
+ "find_msb",
+ "find_lsb",
+ "sat",
+ "packDouble2x32",
+ "unpackDouble2x32",
+ "frexp_sig",
+ "frexp_exp",
+ "noise",
+ "subroutine_to_int",
+ "interpolate_at_centroid",
+ "get_buffer_size",
+ "ssbo_unsized_array_length",
+ "+",
+ "-",
+ "*",
+ "imul_high",
+ "/",
+ "carry",
+ "borrow",
+ "%",
+ "<",
+ ">",
+ "<=",
+ ">=",
+ "==",
+ "!=",
+ "all_equal",
+ "any_nequal",
+ "<<",
+ ">>",
+ "&",
+ "^",
+ "|",
+ "&&",
+ "^^",
+ "||",
+ "dot",
+ "min",
+ "max",
+ "pow",
+ "ubo_load",
+ "ldexp",
+ "vector_extract",
+ "interpolate_at_offset",
+ "interpolate_at_sample",
+ "fma",
+ "lrp",
+ "csel",
+ "bitfield_extract",
+ "vector_insert",
+ "bitfield_insert",
+ "vector",
+ };
+
+ const char *ir_expression::operator_string(ir_expression_operation op)
+ {
+ assert((unsigned int) op < ARRAY_SIZE(operator_strs));
+ assert(ARRAY_SIZE(operator_strs) == (ir_quadop_vector + 1));
+ return operator_strs[op];
+ }
+
+ const char *ir_expression::operator_string()
+ {
+ return operator_string(this->operation);
+ }
+
+ const char*
+ depth_layout_string(ir_depth_layout layout)
+ {
+ switch(layout) {
+ case ir_depth_layout_none: return "";
+ case ir_depth_layout_any: return "depth_any";
+ case ir_depth_layout_greater: return "depth_greater";
+ case ir_depth_layout_less: return "depth_less";
+ case ir_depth_layout_unchanged: return "depth_unchanged";
+
+ default:
+ assert(0);
+ return "";
+ }
+ }
+
+ ir_expression_operation
+ ir_expression::get_operator(const char *str)
+ {
+ const int operator_count = sizeof(operator_strs) / sizeof(operator_strs[0]);
+ for (int op = 0; op < operator_count; op++) {
+ if (strcmp(str, operator_strs[op]) == 0)
+ return (ir_expression_operation) op;
+ }
+ return (ir_expression_operation) -1;
+ }
+
+ ir_variable *
+ ir_expression::variable_referenced() const
+ {
+ switch (operation) {
+ case ir_binop_vector_extract:
+ case ir_triop_vector_insert:
+ /* We get these for things like a[0] where a is a vector type. In these
+ * cases we want variable_referenced() to return the actual vector
+ * variable this is wrapping.
+ */
+ return operands[0]->variable_referenced();
+ default:
+ return ir_rvalue::variable_referenced();
+ }
+ }
+
+ ir_constant::ir_constant()
+ : ir_rvalue(ir_type_constant)
+ {
+ }
+
+ ir_constant::ir_constant(const struct glsl_type *type,
+ const ir_constant_data *data)
+ : ir_rvalue(ir_type_constant)
+ {
+ assert((type->base_type >= GLSL_TYPE_UINT)
+ && (type->base_type <= GLSL_TYPE_BOOL));
+
+ this->type = type;
+ memcpy(& this->value, data, sizeof(this->value));
+ }
+
+ ir_constant::ir_constant(float f, unsigned vector_elements)
+ : ir_rvalue(ir_type_constant)
+ {
+ assert(vector_elements <= 4);
+ this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, vector_elements, 1);
+ for (unsigned i = 0; i < vector_elements; i++) {
+ this->value.f[i] = f;
+ }
+ for (unsigned i = vector_elements; i < 16; i++) {
+ this->value.f[i] = 0;
+ }
+ }
+
+ ir_constant::ir_constant(double d, unsigned vector_elements)
+ : ir_rvalue(ir_type_constant)
+ {
+ assert(vector_elements <= 4);
+ this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1);
+ for (unsigned i = 0; i < vector_elements; i++) {
+ this->value.d[i] = d;
+ }
+ for (unsigned i = vector_elements; i < 16; i++) {
+ this->value.d[i] = 0.0;
+ }
+ }
+
+ ir_constant::ir_constant(unsigned int u, unsigned vector_elements)
+ : ir_rvalue(ir_type_constant)
+ {
+ assert(vector_elements <= 4);
+ this->type = glsl_type::get_instance(GLSL_TYPE_UINT, vector_elements, 1);
+ for (unsigned i = 0; i < vector_elements; i++) {
+ this->value.u[i] = u;
+ }
+ for (unsigned i = vector_elements; i < 16; i++) {
+ this->value.u[i] = 0;
+ }
+ }
+
+ ir_constant::ir_constant(int integer, unsigned vector_elements)
+ : ir_rvalue(ir_type_constant)
+ {
+ assert(vector_elements <= 4);
+ this->type = glsl_type::get_instance(GLSL_TYPE_INT, vector_elements, 1);
+ for (unsigned i = 0; i < vector_elements; i++) {
+ this->value.i[i] = integer;
+ }
+ for (unsigned i = vector_elements; i < 16; i++) {
+ this->value.i[i] = 0;
+ }
+ }
+
+ ir_constant::ir_constant(bool b, unsigned vector_elements)
+ : ir_rvalue(ir_type_constant)
+ {
+ assert(vector_elements <= 4);
+ this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, vector_elements, 1);
+ for (unsigned i = 0; i < vector_elements; i++) {
+ this->value.b[i] = b;
+ }
+ for (unsigned i = vector_elements; i < 16; i++) {
+ this->value.b[i] = false;
+ }
+ }
+
+ ir_constant::ir_constant(const ir_constant *c, unsigned i)
+ : ir_rvalue(ir_type_constant)
+ {
+ this->type = c->type->get_base_type();
+
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT: this->value.u[0] = c->value.u[i]; break;
+ case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break;
+ case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break;
+ case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break;
+ case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break;
+ default: assert(!"Should not get here."); break;
+ }
+ }
+
+ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
+ : ir_rvalue(ir_type_constant)
+ {
+ this->type = type;
+
+ assert(type->is_scalar() || type->is_vector() || type->is_matrix()
+ || type->is_record() || type->is_array());
+
+ if (type->is_array()) {
+ this->array_elements = ralloc_array(this, ir_constant *, type->length);
+ unsigned i = 0;
+ foreach_in_list(ir_constant, value, value_list) {
+ assert(value->as_constant() != NULL);
+
+ this->array_elements[i++] = value;
+ }
+ return;
+ }
+
+ /* If the constant is a record, the types of each of the entries in
+ * value_list must be a 1-for-1 match with the structure components. Each
+ * entry must also be a constant. Just move the nodes from the value_list
+ * to the list in the ir_constant.
+ */
+ /* FINISHME: Should there be some type checking and / or assertions here? */
+ /* FINISHME: Should the new constant take ownership of the nodes from
+ * FINISHME: value_list, or should it make copies?
+ */
+ if (type->is_record()) {
+ value_list->move_nodes_to(& this->components);
+ return;
+ }
+
+ for (unsigned i = 0; i < 16; i++) {
+ this->value.u[i] = 0;
+ }
+
+ ir_constant *value = (ir_constant *) (value_list->head);
+
+ /* Constructors with exactly one scalar argument are special for vectors
+ * and matrices. For vectors, the scalar value is replicated to fill all
+ * the components. For matrices, the scalar fills the components of the
+ * diagonal while the rest is filled with 0.
+ */
+ if (value->type->is_scalar() && value->next->is_tail_sentinel()) {
+ if (type->is_matrix()) {
+ /* Matrix - fill diagonal (rest is already set to 0) */
+ assert(type->base_type == GLSL_TYPE_FLOAT ||
+ type->base_type == GLSL_TYPE_DOUBLE);
+ for (unsigned i = 0; i < type->matrix_columns; i++) {
+ if (type->base_type == GLSL_TYPE_FLOAT)
+ this->value.f[i * type->vector_elements + i] =
+ value->value.f[0];
+ else
+ this->value.d[i * type->vector_elements + i] =
+ value->value.d[0];
+ }
+ } else {
+ /* Vector or scalar - fill all components */
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ for (unsigned i = 0; i < type->components(); i++)
+ this->value.u[i] = value->value.u[0];
+ break;
+ case GLSL_TYPE_FLOAT:
+ for (unsigned i = 0; i < type->components(); i++)
+ this->value.f[i] = value->value.f[0];
+ break;
+ case GLSL_TYPE_DOUBLE:
+ for (unsigned i = 0; i < type->components(); i++)
+ this->value.d[i] = value->value.d[0];
+ break;
+ case GLSL_TYPE_BOOL:
+ for (unsigned i = 0; i < type->components(); i++)
+ this->value.b[i] = value->value.b[0];
+ break;
+ default:
+ assert(!"Should not get here.");
+ break;
+ }
+ }
+ return;
+ }
+
+ if (type->is_matrix() && value->type->is_matrix()) {
+ assert(value->next->is_tail_sentinel());
+
+ /* From section 5.4.2 of the GLSL 1.20 spec:
+ * "If a matrix is constructed from a matrix, then each component
+ * (column i, row j) in the result that has a corresponding component
+ * (column i, row j) in the argument will be initialized from there."
+ */
+ unsigned cols = MIN2(type->matrix_columns, value->type->matrix_columns);
+ unsigned rows = MIN2(type->vector_elements, value->type->vector_elements);
+ for (unsigned i = 0; i < cols; i++) {
+ for (unsigned j = 0; j < rows; j++) {
+ const unsigned src = i * value->type->vector_elements + j;
+ const unsigned dst = i * type->vector_elements + j;
+ this->value.f[dst] = value->value.f[src];
+ }
+ }
+
+ /* "All other components will be initialized to the identity matrix." */
+ for (unsigned i = cols; i < type->matrix_columns; i++)
+ this->value.f[i * type->vector_elements + i] = 1.0;
+
+ return;
+ }
+
+ /* Use each component from each entry in the value_list to initialize one
+ * component of the constant being constructed.
+ */
+ for (unsigned i = 0; i < type->components(); /* empty */) {
+ assert(value->as_constant() != NULL);
+ assert(!value->is_tail_sentinel());
+
+ for (unsigned j = 0; j < value->type->components(); j++) {
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ this->value.u[i] = value->get_uint_component(j);
+ break;
+ case GLSL_TYPE_INT:
+ this->value.i[i] = value->get_int_component(j);
+ break;
+ case GLSL_TYPE_FLOAT:
+ this->value.f[i] = value->get_float_component(j);
+ break;
+ case GLSL_TYPE_BOOL:
+ this->value.b[i] = value->get_bool_component(j);
+ break;
+ case GLSL_TYPE_DOUBLE:
+ this->value.d[i] = value->get_double_component(j);
+ break;
+ default:
+ /* FINISHME: What to do? Exceptions are not the answer.
+ */
+ break;
+ }
+
+ i++;
+ if (i >= type->components())
+ break;
+ }
+
+ value = (ir_constant *) value->next;
+ }
+ }
+
+ ir_constant *
+ ir_constant::zero(void *mem_ctx, const glsl_type *type)
+ {
+ assert(type->is_scalar() || type->is_vector() || type->is_matrix()
+ || type->is_record() || type->is_array());
+
+ ir_constant *c = new(mem_ctx) ir_constant;
+ c->type = type;
+ memset(&c->value, 0, sizeof(c->value));
+
+ if (type->is_array()) {
+ c->array_elements = ralloc_array(c, ir_constant *, type->length);
+
+ for (unsigned i = 0; i < type->length; i++)
+ c->array_elements[i] = ir_constant::zero(c, type->fields.array);
+ }
+
+ if (type->is_record()) {
+ for (unsigned i = 0; i < type->length; i++) {
+ ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type);
+ c->components.push_tail(comp);
+ }
+ }
+
+ return c;
+ }
+
+ bool
+ ir_constant::get_bool_component(unsigned i) const
+ {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT: return this->value.u[i] != 0;
+ case GLSL_TYPE_INT: return this->value.i[i] != 0;
+ case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0;
+ case GLSL_TYPE_BOOL: return this->value.b[i];
+ case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0;
+ default: assert(!"Should not get here."); break;
+ }
+
+ /* Must return something to make the compiler happy. This is clearly an
+ * error case.
+ */
+ return false;
+ }
+
+ float
+ ir_constant::get_float_component(unsigned i) const
+ {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT: return (float) this->value.u[i];
+ case GLSL_TYPE_INT: return (float) this->value.i[i];
+ case GLSL_TYPE_FLOAT: return this->value.f[i];
+ case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f;
+ case GLSL_TYPE_DOUBLE: return (float) this->value.d[i];
+ default: assert(!"Should not get here."); break;
+ }
+
+ /* Must return something to make the compiler happy. This is clearly an
+ * error case.
+ */
+ return 0.0;
+ }
+
+ double
+ ir_constant::get_double_component(unsigned i) const
+ {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT: return (double) this->value.u[i];
+ case GLSL_TYPE_INT: return (double) this->value.i[i];
+ case GLSL_TYPE_FLOAT: return (double) this->value.f[i];
+ case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0;
+ case GLSL_TYPE_DOUBLE: return this->value.d[i];
+ default: assert(!"Should not get here."); break;
+ }
+
+ /* Must return something to make the compiler happy. This is clearly an
+ * error case.
+ */
+ return 0.0;
+ }
+
+ int
+ ir_constant::get_int_component(unsigned i) const
+ {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT: return this->value.u[i];
+ case GLSL_TYPE_INT: return this->value.i[i];
+ case GLSL_TYPE_FLOAT: return (int) this->value.f[i];
+ case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
+ case GLSL_TYPE_DOUBLE: return (int) this->value.d[i];
+ default: assert(!"Should not get here."); break;
+ }
+
+ /* Must return something to make the compiler happy. This is clearly an
+ * error case.
+ */
+ return 0;
+ }
+
+ unsigned
+ ir_constant::get_uint_component(unsigned i) const
+ {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT: return this->value.u[i];
+ case GLSL_TYPE_INT: return this->value.i[i];
+ case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i];
+ case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
+ case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i];
+ default: assert(!"Should not get here."); break;
+ }
+
+ /* Must return something to make the compiler happy. This is clearly an
+ * error case.
+ */
+ return 0;
+ }
+
+ ir_constant *
+ ir_constant::get_array_element(unsigned i) const
+ {
+ assert(this->type->is_array());
+
+ /* From page 35 (page 41 of the PDF) of the GLSL 1.20 spec:
+ *
+ * "Behavior is undefined if a shader subscripts an array with an index
+ * less than 0 or greater than or equal to the size the array was
+ * declared with."
+ *
+ * Most out-of-bounds accesses are removed before things could get this far.
+ * There are cases where non-constant array index values can get constant
+ * folded.
+ */
+ if (int(i) < 0)
+ i = 0;
+ else if (i >= this->type->length)
+ i = this->type->length - 1;
+
+ return array_elements[i];
+ }
+
+ ir_constant *
+ ir_constant::get_record_field(const char *name)
+ {
+ int idx = this->type->field_index(name);
+
+ if (idx < 0)
+ return NULL;
+
+ if (this->components.is_empty())
+ return NULL;
+
+ exec_node *node = this->components.head;
+ for (int i = 0; i < idx; i++) {
+ node = node->next;
+
+ /* If the end of the list is encountered before the element matching the
+ * requested field is found, return NULL.
+ */
+ if (node->is_tail_sentinel())
+ return NULL;
+ }
+
+ return (ir_constant *) node;
+ }
+
+ void
+ ir_constant::copy_offset(ir_constant *src, int offset)
+ {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL: {
+ unsigned int size = src->type->components();
+ assert (size <= this->type->components() - offset);
+ for (unsigned int i=0; i<size; i++) {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT:
+ value.u[i+offset] = src->get_uint_component(i);
+ break;
+ case GLSL_TYPE_INT:
+ value.i[i+offset] = src->get_int_component(i);
+ break;
+ case GLSL_TYPE_FLOAT:
+ value.f[i+offset] = src->get_float_component(i);
+ break;
+ case GLSL_TYPE_BOOL:
+ value.b[i+offset] = src->get_bool_component(i);
+ break;
+ case GLSL_TYPE_DOUBLE:
+ value.d[i+offset] = src->get_double_component(i);
+ break;
+ default: // Shut up the compiler
+ break;
+ }
+ }
+ break;
+ }
+
+ case GLSL_TYPE_STRUCT: {
+ assert (src->type == this->type);
+ this->components.make_empty();
+ foreach_in_list(ir_constant, orig, &src->components) {
+ this->components.push_tail(orig->clone(this, NULL));
+ }
+ break;
+ }
+
+ case GLSL_TYPE_ARRAY: {
+ assert (src->type == this->type);
+ for (unsigned i = 0; i < this->type->length; i++) {
+ this->array_elements[i] = src->array_elements[i]->clone(this, NULL);
+ }
+ break;
+ }
+
+ default:
+ assert(!"Should not get here.");
+ break;
+ }
+ }
+
+ void
+ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
+ {
+ assert (!type->is_array() && !type->is_record());
+
+ if (!type->is_vector() && !type->is_matrix()) {
+ offset = 0;
+ mask = 1;
+ }
+
+ int id = 0;
+ for (int i=0; i<4; i++) {
+ if (mask & (1 << i)) {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT:
+ value.u[i+offset] = src->get_uint_component(id++);
+ break;
+ case GLSL_TYPE_INT:
+ value.i[i+offset] = src->get_int_component(id++);
+ break;
+ case GLSL_TYPE_FLOAT:
+ value.f[i+offset] = src->get_float_component(id++);
+ break;
+ case GLSL_TYPE_BOOL:
+ value.b[i+offset] = src->get_bool_component(id++);
+ break;
+ case GLSL_TYPE_DOUBLE:
+ value.d[i+offset] = src->get_double_component(id++);
+ break;
+ default:
+ assert(!"Should not get here.");
+ return;
+ }
+ }
+ }
+ }
+
+ bool
+ ir_constant::has_value(const ir_constant *c) const
+ {
+ if (this->type != c->type)
+ return false;
+
+ if (this->type->is_array()) {
+ for (unsigned i = 0; i < this->type->length; i++) {
+ if (!this->array_elements[i]->has_value(c->array_elements[i]))
+ return false;
+ }
+ return true;
+ }
+
+ if (this->type->base_type == GLSL_TYPE_STRUCT) {
+ const exec_node *a_node = this->components.head;
+ const exec_node *b_node = c->components.head;
+
+ while (!a_node->is_tail_sentinel()) {
+ assert(!b_node->is_tail_sentinel());
+
+ const ir_constant *const a_field = (ir_constant *) a_node;
+ const ir_constant *const b_field = (ir_constant *) b_node;
+
+ if (!a_field->has_value(b_field))
+ return false;
+
+ a_node = a_node->next;
+ b_node = b_node->next;
+ }
+
+ return true;
+ }
+
+ for (unsigned i = 0; i < this->type->components(); i++) {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT:
+ if (this->value.u[i] != c->value.u[i])
+ return false;
+ break;
+ case GLSL_TYPE_INT:
+ if (this->value.i[i] != c->value.i[i])
+ return false;
+ break;
+ case GLSL_TYPE_FLOAT:
+ if (this->value.f[i] != c->value.f[i])
+ return false;
+ break;
+ case GLSL_TYPE_BOOL:
+ if (this->value.b[i] != c->value.b[i])
+ return false;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ if (this->value.d[i] != c->value.d[i])
+ return false;
+ break;
+ default:
+ assert(!"Should not get here.");
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool
+ ir_constant::is_value(float f, int i) const
+ {
+ if (!this->type->is_scalar() && !this->type->is_vector())
+ return false;
+
+ /* Only accept boolean values for 0/1. */
+ if (int(bool(i)) != i && this->type->is_boolean())
+ return false;
+
+ for (unsigned c = 0; c < this->type->vector_elements; c++) {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ if (this->value.f[c] != f)
+ return false;
+ break;
+ case GLSL_TYPE_INT:
+ if (this->value.i[c] != i)
+ return false;
+ break;
+ case GLSL_TYPE_UINT:
+ if (this->value.u[c] != unsigned(i))
+ return false;
+ break;
+ case GLSL_TYPE_BOOL:
+ if (this->value.b[c] != bool(i))
+ return false;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ if (this->value.d[c] != double(f))
+ return false;
+ break;
+ default:
+ /* The only other base types are structures, arrays, and samplers.
+ * Samplers cannot be constants, and the others should have been
+ * filtered out above.
+ */
+ assert(!"Should not get here.");
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool
+ ir_constant::is_zero() const
+ {
+ return is_value(0.0, 0);
+ }
+
+ bool
+ ir_constant::is_one() const
+ {
+ return is_value(1.0, 1);
+ }
+
+ bool
+ ir_constant::is_negative_one() const
+ {
+ return is_value(-1.0, -1);
+ }
+
+ bool
+ ir_constant::is_uint16_constant() const
+ {
+ if (!type->is_integer())
+ return false;
+
+ return value.u[0] < (1 << 16);
+ }
+
+ ir_loop::ir_loop()
+ : ir_instruction(ir_type_loop)
+ {
+ }
+
+
+ ir_dereference_variable::ir_dereference_variable(ir_variable *var)
+ : ir_dereference(ir_type_dereference_variable)
+ {
+ assert(var != NULL);
+
+ this->var = var;
+ this->type = var->type;
+ }
+
+
+ ir_dereference_array::ir_dereference_array(ir_rvalue *value,
+ ir_rvalue *array_index)
+ : ir_dereference(ir_type_dereference_array)
+ {
+ this->array_index = array_index;
+ this->set_array(value);
+ }
+
+
+ ir_dereference_array::ir_dereference_array(ir_variable *var,
+ ir_rvalue *array_index)
+ : ir_dereference(ir_type_dereference_array)
+ {
+ void *ctx = ralloc_parent(var);
+
+ this->array_index = array_index;
+ this->set_array(new(ctx) ir_dereference_variable(var));
+ }
+
+
+ void
+ ir_dereference_array::set_array(ir_rvalue *value)
+ {
+ assert(value != NULL);
+
+ this->array = value;
+
+ const glsl_type *const vt = this->array->type;
+
+ if (vt->is_array()) {
+ type = vt->fields.array;
+ } else if (vt->is_matrix()) {
+ type = vt->column_type();
+ } else if (vt->is_vector()) {
+ type = vt->get_base_type();
+ }
+ }
+
+
+ ir_dereference_record::ir_dereference_record(ir_rvalue *value,
+ const char *field)
+ : ir_dereference(ir_type_dereference_record)
+ {
+ assert(value != NULL);
+
+ this->record = value;
+ this->field = ralloc_strdup(this, field);
+ this->type = this->record->type->field_type(field);
+ }
+
+
+ ir_dereference_record::ir_dereference_record(ir_variable *var,
+ const char *field)
+ : ir_dereference(ir_type_dereference_record)
+ {
+ void *ctx = ralloc_parent(var);
+
+ this->record = new(ctx) ir_dereference_variable(var);
+ this->field = ralloc_strdup(this, field);
+ this->type = this->record->type->field_type(field);
+ }
+
+ bool
+ ir_dereference::is_lvalue() const
+ {
+ ir_variable *var = this->variable_referenced();
+
+ /* Every l-value derference chain eventually ends in a variable.
+ */
+ if ((var == NULL) || var->data.read_only)
+ return false;
+
+ /* From section 4.1.7 of the GLSL 4.40 spec:
+ *
+ * "Opaque variables cannot be treated as l-values; hence cannot
+ * be used as out or inout function parameters, nor can they be
+ * assigned into."
+ */
+ if (this->type->contains_opaque())
+ return false;
+
+ return true;
+ }
+
+
+ static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" };
+
+ const char *ir_texture::opcode_string()
+ {
+ assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
+ return tex_opcode_strs[op];
+ }
+
+ ir_texture_opcode
+ ir_texture::get_opcode(const char *str)
+ {
+ const int count = sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]);
+ for (int op = 0; op < count; op++) {
+ if (strcmp(str, tex_opcode_strs[op]) == 0)
+ return (ir_texture_opcode) op;
+ }
+ return (ir_texture_opcode) -1;
+ }
+
+
+ void
+ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
+ {
+ assert(sampler != NULL);
+ assert(type != NULL);
+ this->sampler = sampler;
+ this->type = type;
+
+ if (this->op == ir_txs || this->op == ir_query_levels ||
+ this->op == ir_texture_samples) {
+ assert(type->base_type == GLSL_TYPE_INT);
+ } else if (this->op == ir_lod) {
+ assert(type->vector_elements == 2);
+ assert(type->base_type == GLSL_TYPE_FLOAT);
+ } else if (this->op == ir_samples_identical) {
+ assert(type == glsl_type::bool_type);
+ assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
+ assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
+ } else {
+ assert(sampler->type->sampler_type == (int) type->base_type);
+ if (sampler->type->sampler_shadow)
+ assert(type->vector_elements == 4 || type->vector_elements == 1);
+ else
+ assert(type->vector_elements == 4);
+ }
+ }
+
+
+ void
+ ir_swizzle::init_mask(const unsigned *comp, unsigned count)
+ {
+ assert((count >= 1) && (count <= 4));
+
+ memset(&this->mask, 0, sizeof(this->mask));
+ this->mask.num_components = count;
+
+ unsigned dup_mask = 0;
+ switch (count) {
+ case 4:
+ assert(comp[3] <= 3);
+ dup_mask |= (1U << comp[3])
+ & ((1U << comp[0]) | (1U << comp[1]) | (1U << comp[2]));
+ this->mask.w = comp[3];
+
+ case 3:
+ assert(comp[2] <= 3);
+ dup_mask |= (1U << comp[2])
+ & ((1U << comp[0]) | (1U << comp[1]));
+ this->mask.z = comp[2];
+
+ case 2:
+ assert(comp[1] <= 3);
+ dup_mask |= (1U << comp[1])
+ & ((1U << comp[0]));
+ this->mask.y = comp[1];
+
+ case 1:
+ assert(comp[0] <= 3);
+ this->mask.x = comp[0];
+ }
+
+ this->mask.has_duplicates = dup_mask != 0;
+
+ /* Based on the number of elements in the swizzle and the base type
+ * (i.e., float, int, unsigned, or bool) of the vector being swizzled,
+ * generate the type of the resulting value.
+ */
+ type = glsl_type::get_instance(val->type->base_type, mask.num_components, 1);
+ }
+
+ ir_swizzle::ir_swizzle(ir_rvalue *val, unsigned x, unsigned y, unsigned z,
+ unsigned w, unsigned count)
+ : ir_rvalue(ir_type_swizzle), val(val)
+ {
+ const unsigned components[4] = { x, y, z, w };
+ this->init_mask(components, count);
+ }
+
+ ir_swizzle::ir_swizzle(ir_rvalue *val, const unsigned *comp,
+ unsigned count)
+ : ir_rvalue(ir_type_swizzle), val(val)
+ {
+ this->init_mask(comp, count);
+ }
+
+ ir_swizzle::ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask)
+ : ir_rvalue(ir_type_swizzle)
+ {
+ this->val = val;
+ this->mask = mask;
+ this->type = glsl_type::get_instance(val->type->base_type,
+ mask.num_components, 1);
+ }
+
+ #define X 1
+ #define R 5
+ #define S 9
+ #define I 13
+
+ ir_swizzle *
+ ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length)
+ {
+ void *ctx = ralloc_parent(val);
+
+ /* For each possible swizzle character, this table encodes the value in
+ * \c idx_map that represents the 0th element of the vector. For invalid
+ * swizzle characters (e.g., 'k'), a special value is used that will allow
+ * detection of errors.
+ */
+ static const unsigned char base_idx[26] = {
+ /* a b c d e f g h i j k l m */
+ R, R, I, I, I, I, R, I, I, I, I, I, I,
+ /* n o p q r s t u v w x y z */
+ I, I, S, S, R, S, S, I, I, X, X, X, X
+ };
+
+ /* Each valid swizzle character has an entry in the previous table. This
+ * table encodes the base index encoded in the previous table plus the actual
+ * index of the swizzle character. When processing swizzles, the first
+ * character in the string is indexed in the previous table. Each character
+ * in the string is indexed in this table, and the value found there has the
+ * value form the first table subtracted. The result must be on the range
+ * [0,3].
+ *
+ * For example, the string "wzyx" will get X from the first table. Each of
+ * the charcaters will get X+3, X+2, X+1, and X+0 from this table. After
+ * subtraction, the swizzle values are { 3, 2, 1, 0 }.
+ *
+ * The string "wzrg" will get X from the first table. Each of the characters
+ * will get X+3, X+2, R+0, and R+1 from this table. After subtraction, the
+ * swizzle values are { 3, 2, 4, 5 }. Since 4 and 5 are outside the range
+ * [0,3], the error is detected.
+ */
+ static const unsigned char idx_map[26] = {
+ /* a b c d e f g h i j k l m */
+ R+3, R+2, 0, 0, 0, 0, R+1, 0, 0, 0, 0, 0, 0,
+ /* n o p q r s t u v w x y z */
+ 0, 0, S+2, S+3, R+0, S+0, S+1, 0, 0, X+3, X+0, X+1, X+2
+ };
+
+ int swiz_idx[4] = { 0, 0, 0, 0 };
+ unsigned i;
+
+
+ /* Validate the first character in the swizzle string and look up the base
+ * index value as described above.
+ */
+ if ((str[0] < 'a') || (str[0] > 'z'))
+ return NULL;
+
+ const unsigned base = base_idx[str[0] - 'a'];
+
+
+ for (i = 0; (i < 4) && (str[i] != '\0'); i++) {
+ /* Validate the next character, and, as described above, convert it to a
+ * swizzle index.
+ */
+ if ((str[i] < 'a') || (str[i] > 'z'))
+ return NULL;
+
+ swiz_idx[i] = idx_map[str[i] - 'a'] - base;
+ if ((swiz_idx[i] < 0) || (swiz_idx[i] >= (int) vector_length))
+ return NULL;
+ }
+
+ if (str[i] != '\0')
+ return NULL;
+
+ return new(ctx) ir_swizzle(val, swiz_idx[0], swiz_idx[1], swiz_idx[2],
+ swiz_idx[3], i);
+ }
+
+ #undef X
+ #undef R
+ #undef S
+ #undef I
+
+ ir_variable *
+ ir_swizzle::variable_referenced() const
+ {
+ return this->val->variable_referenced();
+ }
+
+
+ bool ir_variable::temporaries_allocate_names = false;
+
+ const char ir_variable::tmp_name[] = "compiler_temp";
+
+ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
+ ir_variable_mode mode)
+ : ir_instruction(ir_type_variable)
+ {
+ this->type = type;
+
+ if (mode == ir_var_temporary && !ir_variable::temporaries_allocate_names)
+ name = NULL;
+
+ /* The ir_variable clone method may call this constructor with name set to
+ * tmp_name.
+ */
+ assert(name != NULL
+ || mode == ir_var_temporary
+ || mode == ir_var_function_in
+ || mode == ir_var_function_out
+ || mode == ir_var_function_inout);
+ assert(name != ir_variable::tmp_name
+ || mode == ir_var_temporary);
+ if (mode == ir_var_temporary
+ && (name == NULL || name == ir_variable::tmp_name)) {
+ this->name = ir_variable::tmp_name;
+ } else {
+ this->name = ralloc_strdup(this, name);
+ }
+
+ this->u.max_ifc_array_access = NULL;
+
+ this->data.explicit_location = false;
+ this->data.has_initializer = false;
+ this->data.location = -1;
+ this->data.location_frac = 0;
+ this->data.binding = 0;
+ this->data.warn_extension_index = 0;
+ this->constant_value = NULL;
+ this->constant_initializer = NULL;
+ this->data.origin_upper_left = false;
+ this->data.pixel_center_integer = false;
+ this->data.depth_layout = ir_depth_layout_none;
+ this->data.used = false;
+ this->data.always_active_io = false;
+ this->data.read_only = false;
+ this->data.centroid = false;
+ this->data.sample = false;
+ this->data.patch = false;
+ this->data.invariant = false;
+ this->data.how_declared = ir_var_declared_normally;
+ this->data.mode = mode;
+ this->data.interpolation = INTERP_QUALIFIER_NONE;
+ this->data.max_array_access = 0;
+ this->data.offset = 0;
+ this->data.precision = GLSL_PRECISION_NONE;
+ this->data.image_read_only = false;
+ this->data.image_write_only = false;
+ this->data.image_coherent = false;
+ this->data.image_volatile = false;
+ this->data.image_restrict = false;
+ this->data.from_ssbo_unsized_array = false;
+
+ if (type != NULL) {
+ if (type->base_type == GLSL_TYPE_SAMPLER)
+ this->data.read_only = true;
+
+ if (type->is_interface())
+ this->init_interface_type(type);
+ else if (type->without_array()->is_interface())
+ this->init_interface_type(type->without_array());
+ }
+ }
+
+
+ const char *
+ interpolation_string(unsigned interpolation)
+ {
+ switch (interpolation) {
+ case INTERP_QUALIFIER_NONE: return "no";
+ case INTERP_QUALIFIER_SMOOTH: return "smooth";
+ case INTERP_QUALIFIER_FLAT: return "flat";
+ case INTERP_QUALIFIER_NOPERSPECTIVE: return "noperspective";
+ }
+
+ assert(!"Should not get here.");
+ return "";
+ }
+
+
+ glsl_interp_qualifier
+ ir_variable::determine_interpolation_mode(bool flat_shade)
+ {
+ if (this->data.interpolation != INTERP_QUALIFIER_NONE)
+ return (glsl_interp_qualifier) this->data.interpolation;
+ int location = this->data.location;
+ bool is_gl_Color =
+ location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
+ if (flat_shade && is_gl_Color)
+ return INTERP_QUALIFIER_FLAT;
+ else
+ return INTERP_QUALIFIER_SMOOTH;
+ }
+
+ const char *const ir_variable::warn_extension_table[] = {
+ "",
+ "GL_ARB_shader_stencil_export",
+ "GL_AMD_shader_stencil_export",
+ };
+
+ void
+ ir_variable::enable_extension_warning(const char *extension)
+ {
+ for (unsigned i = 0; i < ARRAY_SIZE(warn_extension_table); i++) {
+ if (strcmp(warn_extension_table[i], extension) == 0) {
+ this->data.warn_extension_index = i;
+ return;
+ }
+ }
+
+ assert(!"Should not get here.");
+ this->data.warn_extension_index = 0;
+ }
+
+ const char *
+ ir_variable::get_extension_warning() const
+ {
+ return this->data.warn_extension_index == 0
+ ? NULL : warn_extension_table[this->data.warn_extension_index];
+ }
+
+ ir_function_signature::ir_function_signature(const glsl_type *return_type,
+ builtin_available_predicate b)
+ : ir_instruction(ir_type_function_signature),
+ return_type(return_type), is_defined(false), is_intrinsic(false),
+ builtin_avail(b), _function(NULL)
+ {
+ this->origin = NULL;
+ }
+
+
+ bool
+ ir_function_signature::is_builtin() const
+ {
+ return builtin_avail != NULL;
+ }
+
+
+ bool
+ ir_function_signature::is_builtin_available(const _mesa_glsl_parse_state *state) const
+ {
+ /* We can't call the predicate without a state pointer, so just say that
+ * the signature is available. At compile time, we need the filtering,
+ * but also receive a valid state pointer. At link time, we're resolving
+ * imported built-in prototypes to their definitions, which will always
+ * be an exact match. So we can skip the filtering.
+ */
+ if (state == NULL)
+ return true;
+
+ assert(builtin_avail != NULL);
+ return builtin_avail(state);
+ }
+
+
+ static bool
+ modes_match(unsigned a, unsigned b)
+ {
+ if (a == b)
+ return true;
+
+ /* Accept "in" vs. "const in" */
+ if ((a == ir_var_const_in && b == ir_var_function_in) ||
+ (b == ir_var_const_in && a == ir_var_function_in))
+ return true;
+
+ return false;
+ }
+
+
+ const char *
+ ir_function_signature::qualifiers_match(exec_list *params)
+ {
+ /* check that the qualifiers match. */
+ foreach_two_lists(a_node, &this->parameters, b_node, params) {
+ ir_variable *a = (ir_variable *) a_node;
+ ir_variable *b = (ir_variable *) b_node;
+
+ if (a->data.read_only != b->data.read_only ||
+ !modes_match(a->data.mode, b->data.mode) ||
+ a->data.interpolation != b->data.interpolation ||
+ a->data.centroid != b->data.centroid ||
+ a->data.sample != b->data.sample ||
+ a->data.patch != b->data.patch ||
+ a->data.image_read_only != b->data.image_read_only ||
+ a->data.image_write_only != b->data.image_write_only ||
+ a->data.image_coherent != b->data.image_coherent ||
+ a->data.image_volatile != b->data.image_volatile ||
+ a->data.image_restrict != b->data.image_restrict) {
+
+ /* parameter a's qualifiers don't match */
+ return a->name;
+ }
+ }
+ return NULL;
+ }
+
+
+ void
+ ir_function_signature::replace_parameters(exec_list *new_params)
+ {
+ /* Destroy all of the previous parameter information. If the previous
+ * parameter information comes from the function prototype, it may either
+ * specify incorrect parameter names or not have names at all.
+ */
+ new_params->move_nodes_to(¶meters);
+ }
+
+
+ ir_function::ir_function(const char *name)
+ : ir_instruction(ir_type_function)
+ {
+ this->subroutine_index = -1;
+ this->name = ralloc_strdup(this, name);
+ }
+
+
+ bool
+ ir_function::has_user_signature()
+ {
+ foreach_in_list(ir_function_signature, sig, &this->signatures) {
+ if (!sig->is_builtin())
+ return true;
+ }
+ return false;
+ }
+
+
+ ir_rvalue *
+ ir_rvalue::error_value(void *mem_ctx)
+ {
+ ir_rvalue *v = new(mem_ctx) ir_rvalue(ir_type_unset);
+
+ v->type = glsl_type::error_type;
+ return v;
+ }
+
+
+ void
+ visit_exec_list(exec_list *list, ir_visitor *visitor)
+ {
+ foreach_in_list_safe(ir_instruction, node, list) {
+ node->accept(visitor);
+ }
+ }
+
+
+ static void
+ steal_memory(ir_instruction *ir, void *new_ctx)
+ {
+ ir_variable *var = ir->as_variable();
+ ir_function *fn = ir->as_function();
+ ir_constant *constant = ir->as_constant();
+ if (var != NULL && var->constant_value != NULL)
+ steal_memory(var->constant_value, ir);
+
+ if (var != NULL && var->constant_initializer != NULL)
+ steal_memory(var->constant_initializer, ir);
+
+ if (fn != NULL && fn->subroutine_types)
+ ralloc_steal(new_ctx, fn->subroutine_types);
+
+ /* The components of aggregate constants are not visited by the normal
+ * visitor, so steal their values by hand.
+ */
+ if (constant != NULL) {
+ if (constant->type->is_record()) {
+ foreach_in_list(ir_constant, field, &constant->components) {
+ steal_memory(field, ir);
+ }
+ } else if (constant->type->is_array()) {
+ for (unsigned int i = 0; i < constant->type->length; i++) {
+ steal_memory(constant->array_elements[i], ir);
+ }
+ }
+ }
+
+ ralloc_steal(new_ctx, ir);
+ }
+
+
+ void
+ reparent_ir(exec_list *list, void *mem_ctx)
+ {
+ foreach_in_list(ir_instruction, node, list) {
+ visit_tree(node, steal_memory, mem_ctx);
+ }
+ }
+
+
+ static ir_rvalue *
+ try_min_one(ir_rvalue *ir)
+ {
+ ir_expression *expr = ir->as_expression();
+
+ if (!expr || expr->operation != ir_binop_min)
+ return NULL;
+
+ if (expr->operands[0]->is_one())
+ return expr->operands[1];
+
+ if (expr->operands[1]->is_one())
+ return expr->operands[0];
+
+ return NULL;
+ }
+
+ static ir_rvalue *
+ try_max_zero(ir_rvalue *ir)
+ {
+ ir_expression *expr = ir->as_expression();
+
+ if (!expr || expr->operation != ir_binop_max)
+ return NULL;
+
+ if (expr->operands[0]->is_zero())
+ return expr->operands[1];
+
+ if (expr->operands[1]->is_zero())
+ return expr->operands[0];
+
+ return NULL;
+ }
+
+ ir_rvalue *
+ ir_rvalue::as_rvalue_to_saturate()
+ {
+ ir_expression *expr = this->as_expression();
+
+ if (!expr)
+ return NULL;
+
+ ir_rvalue *max_zero = try_max_zero(expr);
+ if (max_zero) {
+ return try_min_one(max_zero);
+ } else {
+ ir_rvalue *min_one = try_min_one(expr);
+ if (min_one) {
+ return try_max_zero(min_one);
+ }
+ }
+
+ return NULL;
+ }
+
+
+ unsigned
+ vertices_per_prim(GLenum prim)
+ {
+ switch (prim) {
+ case GL_POINTS:
+ return 1;
+ case GL_LINES:
+ return 2;
+ case GL_TRIANGLES:
+ return 3;
+ case GL_LINES_ADJACENCY:
+ return 4;
+ case GL_TRIANGLES_ADJACENCY:
+ return 6;
+ default:
+ assert(!"Bad primitive");
+ return 3;
+ }
+ }
+
+ /**
+ * Generate a string describing the mode of a variable
+ */
+ const char *
+ mode_string(const ir_variable *var)
+ {
+ switch (var->data.mode) {
+ case ir_var_auto:
+ return (var->data.read_only) ? "global constant" : "global variable";
+
+ case ir_var_uniform:
+ return "uniform";
+
+ case ir_var_shader_storage:
+ return "buffer";
+
+ case ir_var_shader_in:
+ return "shader input";
+
+ case ir_var_shader_out:
+ return "shader output";
+
+ case ir_var_function_in:
+ case ir_var_const_in:
+ return "function input";
+
+ case ir_var_function_out:
+ return "function output";
+
+ case ir_var_function_inout:
+ return "function inout";
+
+ case ir_var_system_value:
+ return "shader input";
+
+ case ir_var_temporary:
+ return "compiler temporary";
+
+ case ir_var_mode_count:
+ break;
+ }
+
+ assert(!"Should not get here.");
+ return "invalid variable";
+ }
--- /dev/null
- /**
- * \name Lowered floating point unpacking operations.
- *
- * \see lower_packing_builtins_visitor::split_unpack_half_2x16
- */
- /*@{*/
- ir_unop_unpack_half_2x16_split_x,
- ir_unop_unpack_half_2x16_split_y,
- /*@}*/
-
+ /* -*- c++ -*- */
+ /*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #pragma once
+ #ifndef IR_H
+ #define IR_H
+
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "util/ralloc.h"
+ #include "compiler/glsl_types.h"
+ #include "list.h"
+ #include "ir_visitor.h"
+ #include "ir_hierarchical_visitor.h"
+ #include "main/mtypes.h"
+
+ #ifdef __cplusplus
+
+ /**
+ * \defgroup IR Intermediate representation nodes
+ *
+ * @{
+ */
+
+ /**
+ * Class tags
+ *
+ * Each concrete class derived from \c ir_instruction has a value in this
+ * enumerant. The value for the type is stored in \c ir_instruction::ir_type
+ * by the constructor. While using type tags is not very C++, it is extremely
+ * convenient. For example, during debugging you can simply inspect
+ * \c ir_instruction::ir_type to find out the actual type of the object.
+ *
+ * In addition, it is possible to use a switch-statement based on \c
+ * \c ir_instruction::ir_type to select different behavior for different object
+ * types. For functions that have only slight differences for several object
+ * types, this allows writing very straightforward, readable code.
+ */
+ enum ir_node_type {
+ ir_type_dereference_array,
+ ir_type_dereference_record,
+ ir_type_dereference_variable,
+ ir_type_constant,
+ ir_type_expression,
+ ir_type_swizzle,
+ ir_type_texture,
+ ir_type_variable,
+ ir_type_assignment,
+ ir_type_call,
+ ir_type_function,
+ ir_type_function_signature,
+ ir_type_if,
+ ir_type_loop,
+ ir_type_loop_jump,
+ ir_type_return,
+ ir_type_discard,
+ ir_type_emit_vertex,
+ ir_type_end_primitive,
+ ir_type_barrier,
+ ir_type_max, /**< maximum ir_type enum number, for validation */
+ ir_type_unset = ir_type_max
+ };
+
+
+ /**
+ * Base class of all IR instructions
+ */
+ class ir_instruction : public exec_node {
+ public:
+ enum ir_node_type ir_type;
+
+ /**
+ * GCC 4.7+ and clang warn when deleting an ir_instruction unless
+ * there's a virtual destructor present. Because we almost
+ * universally use ralloc for our memory management of
+ * ir_instructions, the destructor doesn't need to do any work.
+ */
+ virtual ~ir_instruction()
+ {
+ }
+
+ /** ir_print_visitor helper for debugging. */
+ void print(void) const;
+ void fprint(FILE *f) const;
+
+ virtual void accept(ir_visitor *) = 0;
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *) = 0;
+ virtual ir_instruction *clone(void *mem_ctx,
+ struct hash_table *ht) const = 0;
+
+ bool is_rvalue() const
+ {
+ return ir_type == ir_type_dereference_array ||
+ ir_type == ir_type_dereference_record ||
+ ir_type == ir_type_dereference_variable ||
+ ir_type == ir_type_constant ||
+ ir_type == ir_type_expression ||
+ ir_type == ir_type_swizzle ||
+ ir_type == ir_type_texture;
+ }
+
+ bool is_dereference() const
+ {
+ return ir_type == ir_type_dereference_array ||
+ ir_type == ir_type_dereference_record ||
+ ir_type == ir_type_dereference_variable;
+ }
+
+ bool is_jump() const
+ {
+ return ir_type == ir_type_loop_jump ||
+ ir_type == ir_type_return ||
+ ir_type == ir_type_discard;
+ }
+
+ /**
+ * \name IR instruction downcast functions
+ *
+ * These functions either cast the object to a derived class or return
+ * \c NULL if the object's type does not match the specified derived class.
+ * Additional downcast functions will be added as needed.
+ */
+ /*@{*/
+ #define AS_BASE(TYPE) \
+ class ir_##TYPE *as_##TYPE() \
+ { \
+ assume(this != NULL); \
+ return is_##TYPE() ? (ir_##TYPE *) this : NULL; \
+ } \
+ const class ir_##TYPE *as_##TYPE() const \
+ { \
+ assume(this != NULL); \
+ return is_##TYPE() ? (ir_##TYPE *) this : NULL; \
+ }
+
+ AS_BASE(rvalue)
+ AS_BASE(dereference)
+ AS_BASE(jump)
+ #undef AS_BASE
+
+ #define AS_CHILD(TYPE) \
+ class ir_##TYPE * as_##TYPE() \
+ { \
+ assume(this != NULL); \
+ return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \
+ } \
+ const class ir_##TYPE * as_##TYPE() const \
+ { \
+ assume(this != NULL); \
+ return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \
+ }
+ AS_CHILD(variable)
+ AS_CHILD(function)
+ AS_CHILD(dereference_array)
+ AS_CHILD(dereference_variable)
+ AS_CHILD(dereference_record)
+ AS_CHILD(expression)
+ AS_CHILD(loop)
+ AS_CHILD(assignment)
+ AS_CHILD(call)
+ AS_CHILD(return)
+ AS_CHILD(if)
+ AS_CHILD(swizzle)
+ AS_CHILD(texture)
+ AS_CHILD(constant)
+ AS_CHILD(discard)
+ #undef AS_CHILD
+ /*@}*/
+
+ /**
+ * IR equality method: Return true if the referenced instruction would
+ * return the same value as this one.
+ *
+ * This intended to be used for CSE and algebraic optimizations, on rvalues
+ * in particular. No support for other instruction types (assignments,
+ * jumps, calls, etc.) is planned.
+ */
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
+
+ protected:
+ ir_instruction(enum ir_node_type t)
+ : ir_type(t)
+ {
+ }
+
+ private:
+ ir_instruction()
+ {
+ assert(!"Should not get here.");
+ }
+ };
+
+
+ /**
+ * The base class for all "values"/expression trees.
+ */
+ class ir_rvalue : public ir_instruction {
+ public:
+ const struct glsl_type *type;
+
+ virtual ir_rvalue *clone(void *mem_ctx, struct hash_table *) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ ir_rvalue *as_rvalue_to_saturate();
+
+ virtual bool is_lvalue() const
+ {
+ return false;
+ }
+
+ /**
+ * Get the variable that is ultimately referenced by an r-value
+ */
+ virtual ir_variable *variable_referenced() const
+ {
+ return NULL;
+ }
+
+
+ /**
+ * If an r-value is a reference to a whole variable, get that variable
+ *
+ * \return
+ * Pointer to a variable that is completely dereferenced by the r-value. If
+ * the r-value is not a dereference or the dereference does not access the
+ * entire variable (i.e., it's just one array element, struct field), \c NULL
+ * is returned.
+ */
+ virtual ir_variable *whole_variable_referenced()
+ {
+ return NULL;
+ }
+
+ /**
+ * Determine if an r-value has the value zero
+ *
+ * The base implementation of this function always returns \c false. The
+ * \c ir_constant class over-rides this function to return \c true \b only
+ * for vector and scalar types that have all elements set to the value
+ * zero (or \c false for booleans).
+ *
+ * \sa ir_constant::has_value, ir_rvalue::is_one, ir_rvalue::is_negative_one
+ */
+ virtual bool is_zero() const;
+
+ /**
+ * Determine if an r-value has the value one
+ *
+ * The base implementation of this function always returns \c false. The
+ * \c ir_constant class over-rides this function to return \c true \b only
+ * for vector and scalar types that have all elements set to the value
+ * one (or \c true for booleans).
+ *
+ * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_negative_one
+ */
+ virtual bool is_one() const;
+
+ /**
+ * Determine if an r-value has the value negative one
+ *
+ * The base implementation of this function always returns \c false. The
+ * \c ir_constant class over-rides this function to return \c true \b only
+ * for vector and scalar types that have all elements set to the value
+ * negative one. For boolean types, the result is always \c false.
+ *
+ * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_one
+ */
+ virtual bool is_negative_one() const;
+
+ /**
+ * Determine if an r-value is an unsigned integer constant which can be
+ * stored in 16 bits.
+ *
+ * \sa ir_constant::is_uint16_constant.
+ */
+ virtual bool is_uint16_constant() const { return false; }
+
+ /**
+ * Return a generic value of error_type.
+ *
+ * Allocation will be performed with 'mem_ctx' as ralloc owner.
+ */
+ static ir_rvalue *error_value(void *mem_ctx);
+
+ protected:
+ ir_rvalue(enum ir_node_type t);
+ };
+
+
+ /**
+ * Variable storage classes
+ */
+ enum ir_variable_mode {
+ ir_var_auto = 0, /**< Function local variables and globals. */
+ ir_var_uniform, /**< Variable declared as a uniform. */
+ ir_var_shader_storage, /**< Variable declared as an ssbo. */
+ ir_var_shader_shared, /**< Variable declared as shared. */
+ ir_var_shader_in,
+ ir_var_shader_out,
+ ir_var_function_in,
+ ir_var_function_out,
+ ir_var_function_inout,
+ ir_var_const_in, /**< "in" param that must be a constant expression */
+ ir_var_system_value, /**< Ex: front-face, instance-id, etc. */
+ ir_var_temporary, /**< Temporary variable generated during compilation. */
+ ir_var_mode_count /**< Number of variable modes */
+ };
+
+ /**
+ * Enum keeping track of how a variable was declared. For error checking of
+ * the gl_PerVertex redeclaration rules.
+ */
+ enum ir_var_declaration_type {
+ /**
+ * Normal declaration (for most variables, this means an explicit
+ * declaration. Exception: temporaries are always implicitly declared, but
+ * they still use ir_var_declared_normally).
+ *
+ * Note: an ir_variable that represents a named interface block uses
+ * ir_var_declared_normally.
+ */
+ ir_var_declared_normally = 0,
+
+ /**
+ * Variable was explicitly declared (or re-declared) in an unnamed
+ * interface block.
+ */
+ ir_var_declared_in_block,
+
+ /**
+ * Variable is an implicitly declared built-in that has not been explicitly
+ * re-declared by the shader.
+ */
+ ir_var_declared_implicitly,
+
+ /**
+ * Variable is implicitly generated by the compiler and should not be
+ * visible via the API.
+ */
+ ir_var_hidden,
+ };
+
+ /**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
+ * with a layout qualifier.
+ */
+ enum ir_depth_layout {
+ ir_depth_layout_none, /**< No depth layout is specified. */
+ ir_depth_layout_any,
+ ir_depth_layout_greater,
+ ir_depth_layout_less,
+ ir_depth_layout_unchanged
+ };
+
+ /**
+ * \brief Convert depth layout qualifier to string.
+ */
+ const char*
+ depth_layout_string(ir_depth_layout layout);
+
+ /**
+ * Description of built-in state associated with a uniform
+ *
+ * \sa ir_variable::state_slots
+ */
+ struct ir_state_slot {
+ int tokens[5];
+ int swizzle;
+ };
+
+
+ /**
+ * Get the string value for an interpolation qualifier
+ *
+ * \return The string that would be used in a shader to specify \c
+ * mode will be returned.
+ *
+ * This function is used to generate error messages of the form "shader
+ * uses %s interpolation qualifier", so in the case where there is no
+ * interpolation qualifier, it returns "no".
+ *
+ * This function should only be used on a shader input or output variable.
+ */
+ const char *interpolation_string(unsigned interpolation);
+
+
+ class ir_variable : public ir_instruction {
+ public:
+ ir_variable(const struct glsl_type *, const char *, ir_variable_mode);
+
+ virtual ir_variable *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+
+ /**
+ * Determine how this variable should be interpolated based on its
+ * interpolation qualifier (if present), whether it is gl_Color or
+ * gl_SecondaryColor, and whether flatshading is enabled in the current GL
+ * state.
+ *
+ * The return value will always be either INTERP_QUALIFIER_SMOOTH,
+ * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT.
+ */
+ glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
+
+ /**
+ * Determine whether or not a variable is part of a uniform or
+ * shader storage block.
+ */
+ inline bool is_in_buffer_block() const
+ {
+ return (this->data.mode == ir_var_uniform ||
+ this->data.mode == ir_var_shader_storage) &&
+ this->interface_type != NULL;
+ }
+
+ /**
+ * Determine whether or not a variable is part of a shader storage block.
+ */
+ inline bool is_in_shader_storage_block() const
+ {
+ return this->data.mode == ir_var_shader_storage &&
+ this->interface_type != NULL;
+ }
+
+ /**
+ * Determine whether or not a variable is the declaration of an interface
+ * block
+ *
+ * For the first declaration below, there will be an \c ir_variable named
+ * "instance" whose type and whose instance_type will be the same
+ * \cglsl_type. For the second declaration, there will be an \c ir_variable
+ * named "f" whose type is float and whose instance_type is B2.
+ *
+ * "instance" is an interface instance variable, but "f" is not.
+ *
+ * uniform B1 {
+ * float f;
+ * } instance;
+ *
+ * uniform B2 {
+ * float f;
+ * };
+ */
+ inline bool is_interface_instance() const
+ {
+ return this->type->without_array() == this->interface_type;
+ }
+
+ /**
+ * Set this->interface_type on a newly created variable.
+ */
+ void init_interface_type(const struct glsl_type *type)
+ {
+ assert(this->interface_type == NULL);
+ this->interface_type = type;
+ if (this->is_interface_instance()) {
+ this->u.max_ifc_array_access =
+ rzalloc_array(this, unsigned, type->length);
+ }
+ }
+
+ /**
+ * Change this->interface_type on a variable that previously had a
+ * different, but compatible, interface_type. This is used during linking
+ * to set the size of arrays in interface blocks.
+ */
+ void change_interface_type(const struct glsl_type *type)
+ {
+ if (this->u.max_ifc_array_access != NULL) {
+ /* max_ifc_array_access has already been allocated, so make sure the
+ * new interface has the same number of fields as the old one.
+ */
+ assert(this->interface_type->length == type->length);
+ }
+ this->interface_type = type;
+ }
+
+ /**
+ * Change this->interface_type on a variable that previously had a
+ * different, and incompatible, interface_type. This is used during
+ * compilation to handle redeclaration of the built-in gl_PerVertex
+ * interface block.
+ */
+ void reinit_interface_type(const struct glsl_type *type)
+ {
+ if (this->u.max_ifc_array_access != NULL) {
+ #ifndef NDEBUG
+ /* Redeclaring gl_PerVertex is only allowed if none of the built-ins
+ * it defines have been accessed yet; so it's safe to throw away the
+ * old max_ifc_array_access pointer, since all of its values are
+ * zero.
+ */
+ for (unsigned i = 0; i < this->interface_type->length; i++)
+ assert(this->u.max_ifc_array_access[i] == 0);
+ #endif
+ ralloc_free(this->u.max_ifc_array_access);
+ this->u.max_ifc_array_access = NULL;
+ }
+ this->interface_type = NULL;
+ init_interface_type(type);
+ }
+
+ const glsl_type *get_interface_type() const
+ {
+ return this->interface_type;
+ }
+
+ /**
+ * Get the max_ifc_array_access pointer
+ *
+ * A "set" function is not needed because the array is dynmically allocated
+ * as necessary.
+ */
+ inline unsigned *get_max_ifc_array_access()
+ {
+ assert(this->data._num_state_slots == 0);
+ return this->u.max_ifc_array_access;
+ }
+
+ inline unsigned get_num_state_slots() const
+ {
+ assert(!this->is_interface_instance()
+ || this->data._num_state_slots == 0);
+ return this->data._num_state_slots;
+ }
+
+ inline void set_num_state_slots(unsigned n)
+ {
+ assert(!this->is_interface_instance()
+ || n == 0);
+ this->data._num_state_slots = n;
+ }
+
+ inline ir_state_slot *get_state_slots()
+ {
+ return this->is_interface_instance() ? NULL : this->u.state_slots;
+ }
+
+ inline const ir_state_slot *get_state_slots() const
+ {
+ return this->is_interface_instance() ? NULL : this->u.state_slots;
+ }
+
+ inline ir_state_slot *allocate_state_slots(unsigned n)
+ {
+ assert(!this->is_interface_instance());
+
+ this->u.state_slots = ralloc_array(this, ir_state_slot, n);
+ this->data._num_state_slots = 0;
+
+ if (this->u.state_slots != NULL)
+ this->data._num_state_slots = n;
+
+ return this->u.state_slots;
+ }
+
+ inline bool is_name_ralloced() const
+ {
+ return this->name != ir_variable::tmp_name;
+ }
+
+ /**
+ * Enable emitting extension warnings for this variable
+ */
+ void enable_extension_warning(const char *extension);
+
+ /**
+ * Get the extension warning string for this variable
+ *
+ * If warnings are not enabled, \c NULL is returned.
+ */
+ const char *get_extension_warning() const;
+
+ /**
+ * Declared type of the variable
+ */
+ const struct glsl_type *type;
+
+ /**
+ * Declared name of the variable
+ */
+ const char *name;
+
+ struct ir_variable_data {
+
+ /**
+ * Is the variable read-only?
+ *
+ * This is set for variables declared as \c const, shader inputs,
+ * and uniforms.
+ */
+ unsigned read_only:1;
+ unsigned centroid:1;
+ unsigned sample:1;
+ unsigned patch:1;
+ unsigned invariant:1;
+ unsigned precise:1;
+
+ /**
+ * Has this variable been used for reading or writing?
+ *
+ * Several GLSL semantic checks require knowledge of whether or not a
+ * variable has been used. For example, it is an error to redeclare a
+ * variable as invariant after it has been used.
+ *
+ * This is only maintained in the ast_to_hir.cpp path, not in
+ * Mesa's fixed function or ARB program paths.
+ */
+ unsigned used:1;
+
+ /**
+ * Has this variable been statically assigned?
+ *
+ * This answers whether the variable was assigned in any path of
+ * the shader during ast_to_hir. This doesn't answer whether it is
+ * still written after dead code removal, nor is it maintained in
+ * non-ast_to_hir.cpp (GLSL parsing) paths.
+ */
+ unsigned assigned:1;
+
+ /**
+ * When separate shader programs are enabled, only input/outputs between
+ * the stages of a multi-stage separate program can be safely removed
+ * from the shader interface. Other input/outputs must remains active.
+ */
+ unsigned always_active_io:1;
+
+ /**
+ * Enum indicating how the variable was declared. See
+ * ir_var_declaration_type.
+ *
+ * This is used to detect certain kinds of illegal variable redeclarations.
+ */
+ unsigned how_declared:2;
+
+ /**
+ * Storage class of the variable.
+ *
+ * \sa ir_variable_mode
+ */
+ unsigned mode:4;
+
+ /**
+ * Interpolation mode for shader inputs / outputs
+ *
+ * \sa ir_variable_interpolation
+ */
+ unsigned interpolation:2;
+
+ /**
+ * \name ARB_fragment_coord_conventions
+ * @{
+ */
+ unsigned origin_upper_left:1;
+ unsigned pixel_center_integer:1;
+ /*@}*/
+
+ /**
+ * Was the location explicitly set in the shader?
+ *
+ * If the location is explicitly set in the shader, it \b cannot be changed
+ * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+ * no effect).
+ */
+ unsigned explicit_location:1;
+ unsigned explicit_index:1;
+
+ /**
+ * Was an initial binding explicitly set in the shader?
+ *
+ * If so, constant_value contains an integer ir_constant representing the
+ * initial binding point.
+ */
+ unsigned explicit_binding:1;
+
+ /**
+ * Does this variable have an initializer?
+ *
+ * This is used by the linker to cross-validiate initializers of global
+ * variables.
+ */
+ unsigned has_initializer:1;
+
+ /**
+ * Is this variable a generic output or input that has not yet been matched
+ * up to a variable in another stage of the pipeline?
+ *
+ * This is used by the linker as scratch storage while assigning locations
+ * to generic inputs and outputs.
+ */
+ unsigned is_unmatched_generic_inout:1;
+
+ /**
+ * If non-zero, then this variable may be packed along with other variables
+ * into a single varying slot, so this offset should be applied when
+ * accessing components. For example, an offset of 1 means that the x
+ * component of this variable is actually stored in component y of the
+ * location specified by \c location.
+ */
+ unsigned location_frac:2;
+
+ /**
+ * Layout of the matrix. Uses glsl_matrix_layout values.
+ */
+ unsigned matrix_layout:2;
+
+ /**
+ * Non-zero if this variable was created by lowering a named interface
+ * block which was not an array.
+ *
+ * Note that this variable and \c from_named_ifc_block_array will never
+ * both be non-zero.
+ */
+ unsigned from_named_ifc_block_nonarray:1;
+
+ /**
+ * Non-zero if this variable was created by lowering a named interface
+ * block which was an array.
+ *
+ * Note that this variable and \c from_named_ifc_block_nonarray will never
+ * both be non-zero.
+ */
+ unsigned from_named_ifc_block_array:1;
+
+ /**
+ * Non-zero if the variable must be a shader input. This is useful for
+ * constraints on function parameters.
+ */
+ unsigned must_be_shader_input:1;
+
+ /**
+ * Output index for dual source blending.
+ *
+ * \note
+ * The GLSL spec only allows the values 0 or 1 for the index in \b dual
+ * source blending.
+ */
+ unsigned index:1;
+
+ /**
+ * Precision qualifier.
+ *
+ * In desktop GLSL we do not care about precision qualifiers at all, in
+ * fact, the spec says that precision qualifiers are ignored.
+ *
+ * To make things easy, we make it so that this field is always
+ * GLSL_PRECISION_NONE on desktop shaders. This way all the variables
+ * have the same precision value and the checks we add in the compiler
+ * for this field will never break a desktop shader compile.
+ */
+ unsigned precision:2;
+
+ /**
+ * \brief Layout qualifier for gl_FragDepth.
+ *
+ * This is not equal to \c ir_depth_layout_none if and only if this
+ * variable is \c gl_FragDepth and a layout qualifier is specified.
+ */
+ ir_depth_layout depth_layout:3;
+
+ /**
+ * ARB_shader_image_load_store qualifiers.
+ */
+ unsigned image_read_only:1; /**< "readonly" qualifier. */
+ unsigned image_write_only:1; /**< "writeonly" qualifier. */
+ unsigned image_coherent:1;
+ unsigned image_volatile:1;
+ unsigned image_restrict:1;
+
+ /**
+ * ARB_shader_storage_buffer_object
+ */
+ unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */
+
+ /**
+ * Emit a warning if this variable is accessed.
+ */
+ private:
+ uint8_t warn_extension_index;
+
+ public:
+ /** Image internal format if specified explicitly, otherwise GL_NONE. */
+ uint16_t image_format;
+
+ private:
+ /**
+ * Number of state slots used
+ *
+ * \note
+ * This could be stored in as few as 7-bits, if necessary. If it is made
+ * smaller, add an assertion to \c ir_variable::allocate_state_slots to
+ * be safe.
+ */
+ uint16_t _num_state_slots;
+
+ public:
+ /**
+ * Initial binding point for a sampler, atomic, or UBO.
+ *
+ * For array types, this represents the binding point for the first element.
+ */
+ int16_t binding;
+
+ /**
+ * Storage location of the base of this variable
+ *
+ * The precise meaning of this field depends on the nature of the variable.
+ *
+ * - Vertex shader input: one of the values from \c gl_vert_attrib.
+ * - Vertex shader output: one of the values from \c gl_varying_slot.
+ * - Geometry shader input: one of the values from \c gl_varying_slot.
+ * - Geometry shader output: one of the values from \c gl_varying_slot.
+ * - Fragment shader input: one of the values from \c gl_varying_slot.
+ * - Fragment shader output: one of the values from \c gl_frag_result.
+ * - Uniforms: Per-stage uniform slot number for default uniform block.
+ * - Uniforms: Index within the uniform block definition for UBO members.
+ * - Non-UBO Uniforms: explicit location until linking then reused to
+ * store uniform slot number.
+ * - Other: This field is not currently used.
+ *
+ * If the variable is a uniform, shader input, or shader output, and the
+ * slot has not been assigned, the value will be -1.
+ */
+ int location;
+
+ /**
+ * Vertex stream output identifier.
+ */
+ unsigned stream;
+
+ /**
+ * Location an atomic counter is stored at.
+ */
+ unsigned offset;
+
+ /**
+ * Highest element accessed with a constant expression array index
+ *
+ * Not used for non-array variables.
+ */
+ unsigned max_array_access;
+
+ /**
+ * Allow (only) ir_variable direct access private members.
+ */
+ friend class ir_variable;
+ } data;
+
+ /**
+ * Value assigned in the initializer of a variable declared "const"
+ */
+ ir_constant *constant_value;
+
+ /**
+ * Constant expression assigned in the initializer of the variable
+ *
+ * \warning
+ * This field and \c ::constant_value are distinct. Even if the two fields
+ * refer to constants with the same value, they must point to separate
+ * objects.
+ */
+ ir_constant *constant_initializer;
+
+ private:
+ static const char *const warn_extension_table[];
+
+ union {
+ /**
+ * For variables which satisfy the is_interface_instance() predicate,
+ * this points to an array of integers such that if the ith member of
+ * the interface block is an array, max_ifc_array_access[i] is the
+ * maximum array element of that member that has been accessed. If the
+ * ith member of the interface block is not an array,
+ * max_ifc_array_access[i] is unused.
+ *
+ * For variables whose type is not an interface block, this pointer is
+ * NULL.
+ */
+ unsigned *max_ifc_array_access;
+
+ /**
+ * Built-in state that backs this uniform
+ *
+ * Once set at variable creation, \c state_slots must remain invariant.
+ *
+ * If the variable is not a uniform, \c _num_state_slots will be zero
+ * and \c state_slots will be \c NULL.
+ */
+ ir_state_slot *state_slots;
+ } u;
+
+ /**
+ * For variables that are in an interface block or are an instance of an
+ * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+ *
+ * \sa ir_variable::location
+ */
+ const glsl_type *interface_type;
+
+ /**
+ * Name used for anonymous compiler temporaries
+ */
+ static const char tmp_name[];
+
+ public:
+ /**
+ * Should the construct keep names for ir_var_temporary variables?
+ *
+ * When this global is false, names passed to the constructor for
+ * \c ir_var_temporary variables will be dropped. Instead, the variable will
+ * be named "compiler_temp". This name will be in static storage.
+ *
+ * \warning
+ * \b NEVER change the mode of an \c ir_var_temporary.
+ *
+ * \warning
+ * This variable is \b not thread-safe. It is global, \b not
+ * per-context. It begins life false. A context can, at some point, make
+ * it true. From that point on, it will be true forever. This should be
+ * okay since it will only be set true while debugging.
+ */
+ static bool temporaries_allocate_names;
+ };
+
+ /**
+ * A function that returns whether a built-in function is available in the
+ * current shading language (based on version, ES or desktop, and extensions).
+ */
+ typedef bool (*builtin_available_predicate)(const _mesa_glsl_parse_state *);
+
+ /*@{*/
+ /**
+ * The representation of a function instance; may be the full definition or
+ * simply a prototype.
+ */
+ class ir_function_signature : public ir_instruction {
+ /* An ir_function_signature will be part of the list of signatures in
+ * an ir_function.
+ */
+ public:
+ ir_function_signature(const glsl_type *return_type,
+ builtin_available_predicate builtin_avail = NULL);
+
+ virtual ir_function_signature *clone(void *mem_ctx,
+ struct hash_table *ht) const;
+ ir_function_signature *clone_prototype(void *mem_ctx,
+ struct hash_table *ht) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ /**
+ * Attempt to evaluate this function as a constant expression,
+ * given a list of the actual parameters and the variable context.
+ * Returns NULL for non-built-ins.
+ */
+ ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context);
+
+ /**
+ * Get the name of the function for which this is a signature
+ */
+ const char *function_name() const;
+
+ /**
+ * Get a handle to the function for which this is a signature
+ *
+ * There is no setter function, this function returns a \c const pointer,
+ * and \c ir_function_signature::_function is private for a reason. The
+ * only way to make a connection between a function and function signature
+ * is via \c ir_function::add_signature. This helps ensure that certain
+ * invariants (i.e., a function signature is in the list of signatures for
+ * its \c _function) are met.
+ *
+ * \sa ir_function::add_signature
+ */
+ inline const class ir_function *function() const
+ {
+ return this->_function;
+ }
+
+ /**
+ * Check whether the qualifiers match between this signature's parameters
+ * and the supplied parameter list. If not, returns the name of the first
+ * parameter with mismatched qualifiers (for use in error messages).
+ */
+ const char *qualifiers_match(exec_list *params);
+
+ /**
+ * Replace the current parameter list with the given one. This is useful
+ * if the current information came from a prototype, and either has invalid
+ * or missing parameter names.
+ */
+ void replace_parameters(exec_list *new_params);
+
+ /**
+ * Function return type.
+ *
+ * \note This discards the optional precision qualifier.
+ */
+ const struct glsl_type *return_type;
+
+ /**
+ * List of ir_variable of function parameters.
+ *
+ * This represents the storage. The paramaters passed in a particular
+ * call will be in ir_call::actual_paramaters.
+ */
+ struct exec_list parameters;
+
+ /** Whether or not this function has a body (which may be empty). */
+ unsigned is_defined:1;
+
+ /** Whether or not this function signature is a built-in. */
+ bool is_builtin() const;
+
+ /**
+ * Whether or not this function is an intrinsic to be implemented
+ * by the driver.
+ */
+ bool is_intrinsic;
+
+ /** Whether or not a built-in is available for this shader. */
+ bool is_builtin_available(const _mesa_glsl_parse_state *state) const;
+
+ /** Body of instructions in the function. */
+ struct exec_list body;
+
+ private:
+ /**
+ * A function pointer to a predicate that answers whether a built-in
+ * function is available in the current shader. NULL if not a built-in.
+ */
+ builtin_available_predicate builtin_avail;
+
+ /** Function of which this signature is one overload. */
+ class ir_function *_function;
+
+ /** Function signature of which this one is a prototype clone */
+ const ir_function_signature *origin;
+
+ friend class ir_function;
+
+ /**
+ * Helper function to run a list of instructions for constant
+ * expression evaluation.
+ *
+ * The hash table represents the values of the visible variables.
+ * There are no scoping issues because the table is indexed on
+ * ir_variable pointers, not variable names.
+ *
+ * Returns false if the expression is not constant, true otherwise,
+ * and the value in *result if result is non-NULL.
+ */
+ bool constant_expression_evaluate_expression_list(const struct exec_list &body,
+ struct hash_table *variable_context,
+ ir_constant **result);
+ };
+
+
+ /**
+ * Header for tracking multiple overloaded functions with the same name.
+ * Contains a list of ir_function_signatures representing each of the
+ * actual functions.
+ */
+ class ir_function : public ir_instruction {
+ public:
+ ir_function(const char *name);
+
+ virtual ir_function *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ void add_signature(ir_function_signature *sig)
+ {
+ sig->_function = this;
+ this->signatures.push_tail(sig);
+ }
+
+ /**
+ * Find a signature that matches a set of actual parameters, taking implicit
+ * conversions into account. Also flags whether the match was exact.
+ */
+ ir_function_signature *matching_signature(_mesa_glsl_parse_state *state,
+ const exec_list *actual_param,
+ bool allow_builtins,
+ bool *match_is_exact);
+
+ /**
+ * Find a signature that matches a set of actual parameters, taking implicit
+ * conversions into account.
+ */
+ ir_function_signature *matching_signature(_mesa_glsl_parse_state *state,
+ const exec_list *actual_param,
+ bool allow_builtins);
+
+ /**
+ * Find a signature that exactly matches a set of actual parameters without
+ * any implicit type conversions.
+ */
+ ir_function_signature *exact_matching_signature(_mesa_glsl_parse_state *state,
+ const exec_list *actual_ps);
+
+ /**
+ * Name of the function.
+ */
+ const char *name;
+
+ /** Whether or not this function has a signature that isn't a built-in. */
+ bool has_user_signature();
+
+ /**
+ * List of ir_function_signature for each overloaded function with this name.
+ */
+ struct exec_list signatures;
+
+ /**
+ * is this function a subroutine type declaration
+ * e.g. subroutine void type1(float arg1);
+ */
+ bool is_subroutine;
+
+ /**
+ * is this function associated to a subroutine type
+ * e.g. subroutine (type1, type2) function_name { function_body };
+ * would have num_subroutine_types 2,
+ * and pointers to the type1 and type2 types.
+ */
+ int num_subroutine_types;
+ const struct glsl_type **subroutine_types;
+
+ int subroutine_index;
+ };
+
+ inline const char *ir_function_signature::function_name() const
+ {
+ return this->_function->name;
+ }
+ /*@}*/
+
+
+ /**
+ * IR instruction representing high-level if-statements
+ */
+ class ir_if : public ir_instruction {
+ public:
+ ir_if(ir_rvalue *condition)
+ : ir_instruction(ir_type_if), condition(condition)
+ {
+ }
+
+ virtual ir_if *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ ir_rvalue *condition;
+ /** List of ir_instruction for the body of the then branch */
+ exec_list then_instructions;
+ /** List of ir_instruction for the body of the else branch */
+ exec_list else_instructions;
+ };
+
+
+ /**
+ * IR instruction representing a high-level loop structure.
+ */
+ class ir_loop : public ir_instruction {
+ public:
+ ir_loop();
+
+ virtual ir_loop *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ /** List of ir_instruction that make up the body of the loop. */
+ exec_list body_instructions;
+ };
+
+
+ class ir_assignment : public ir_instruction {
+ public:
+ ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition = NULL);
+
+ /**
+ * Construct an assignment with an explicit write mask
+ *
+ * \note
+ * Since a write mask is supplied, the LHS must already be a bare
+ * \c ir_dereference. The cannot be any swizzles in the LHS.
+ */
+ ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, ir_rvalue *condition,
+ unsigned write_mask);
+
+ virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ /**
+ * Get a whole variable written by an assignment
+ *
+ * If the LHS of the assignment writes a whole variable, the variable is
+ * returned. Otherwise \c NULL is returned. Examples of whole-variable
+ * assignment are:
+ *
+ * - Assigning to a scalar
+ * - Assigning to all components of a vector
+ * - Whole array (or matrix) assignment
+ * - Whole structure assignment
+ */
+ ir_variable *whole_variable_written();
+
+ /**
+ * Set the LHS of an assignment
+ */
+ void set_lhs(ir_rvalue *lhs);
+
+ /**
+ * Left-hand side of the assignment.
+ *
+ * This should be treated as read only. If you need to set the LHS of an
+ * assignment, use \c ir_assignment::set_lhs.
+ */
+ ir_dereference *lhs;
+
+ /**
+ * Value being assigned
+ */
+ ir_rvalue *rhs;
+
+ /**
+ * Optional condition for the assignment.
+ */
+ ir_rvalue *condition;
+
+
+ /**
+ * Component mask written
+ *
+ * For non-vector types in the LHS, this field will be zero. For vector
+ * types, a bit will be set for each component that is written. Note that
+ * for \c vec2 and \c vec3 types only the lower bits will ever be set.
+ *
+ * A partially-set write mask means that each enabled channel gets
+ * the value from a consecutive channel of the rhs. For example,
+ * to write just .xyw of gl_FrontColor with color:
+ *
+ * (assign (constant bool (1)) (xyw)
+ * (var_ref gl_FragColor)
+ * (swiz xyw (var_ref color)))
+ */
+ unsigned write_mask:4;
+ };
+
+ /* Update ir_expression::get_num_operands() and operator_strs when
+ * updating this list.
+ */
+ enum ir_expression_operation {
+ ir_unop_bit_not,
+ ir_unop_logic_not,
+ ir_unop_neg,
+ ir_unop_abs,
+ ir_unop_sign,
+ ir_unop_rcp,
+ ir_unop_rsq,
+ ir_unop_sqrt,
+ ir_unop_exp, /**< Log base e on gentype */
+ ir_unop_log, /**< Natural log on gentype */
+ ir_unop_exp2,
+ ir_unop_log2,
+ ir_unop_f2i, /**< Float-to-integer conversion. */
+ ir_unop_f2u, /**< Float-to-unsigned conversion. */
+ ir_unop_i2f, /**< Integer-to-float conversion. */
+ ir_unop_f2b, /**< Float-to-boolean conversion */
+ ir_unop_b2f, /**< Boolean-to-float conversion */
+ ir_unop_i2b, /**< int-to-boolean conversion */
+ ir_unop_b2i, /**< Boolean-to-int conversion */
+ ir_unop_u2f, /**< Unsigned-to-float conversion. */
+ ir_unop_i2u, /**< Integer-to-unsigned conversion. */
+ ir_unop_u2i, /**< Unsigned-to-integer conversion. */
+ ir_unop_d2f, /**< Double-to-float conversion. */
+ ir_unop_f2d, /**< Float-to-double conversion. */
+ ir_unop_d2i, /**< Double-to-integer conversion. */
+ ir_unop_i2d, /**< Integer-to-double conversion. */
+ ir_unop_d2u, /**< Double-to-unsigned conversion. */
+ ir_unop_u2d, /**< Unsigned-to-double conversion. */
+ ir_unop_d2b, /**< Double-to-boolean conversion. */
+ ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */
+ ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */
+ ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */
+ ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */
+
+ /**
+ * \name Unary floating-point rounding operations.
+ */
+ /*@{*/
+ ir_unop_trunc,
+ ir_unop_ceil,
+ ir_unop_floor,
+ ir_unop_fract,
+ ir_unop_round_even,
+ /*@}*/
+
+ /**
+ * \name Trigonometric operations.
+ */
+ /*@{*/
+ ir_unop_sin,
+ ir_unop_cos,
+ /*@}*/
+
+ /**
+ * \name Partial derivatives.
+ */
+ /*@{*/
+ ir_unop_dFdx,
+ ir_unop_dFdx_coarse,
+ ir_unop_dFdx_fine,
+ ir_unop_dFdy,
+ ir_unop_dFdy_coarse,
+ ir_unop_dFdy_fine,
+ /*@}*/
+
+ /**
+ * \name Floating point pack and unpack operations.
+ */
+ /*@{*/
+ ir_unop_pack_snorm_2x16,
+ ir_unop_pack_snorm_4x8,
+ ir_unop_pack_unorm_2x16,
+ ir_unop_pack_unorm_4x8,
+ ir_unop_pack_half_2x16,
+ ir_unop_unpack_snorm_2x16,
+ ir_unop_unpack_snorm_4x8,
+ ir_unop_unpack_unorm_2x16,
+ ir_unop_unpack_unorm_4x8,
+ ir_unop_unpack_half_2x16,
+ /*@}*/
+
- /**
- * \name Lowered floating point packing operations.
- *
- * \see lower_packing_builtins_visitor::split_pack_half_2x16
- */
- /*@{*/
- ir_binop_pack_half_2x16_split,
- /*@}*/
-
+ /**
+ * \name Bit operations, part of ARB_gpu_shader5.
+ */
+ /*@{*/
+ ir_unop_bitfield_reverse,
+ ir_unop_bit_count,
+ ir_unop_find_msb,
+ ir_unop_find_lsb,
+ /*@}*/
+
+ ir_unop_saturate,
+
+ /**
+ * \name Double packing, part of ARB_gpu_shader_fp64.
+ */
+ /*@{*/
+ ir_unop_pack_double_2x32,
+ ir_unop_unpack_double_2x32,
+ /*@}*/
+
+ ir_unop_frexp_sig,
+ ir_unop_frexp_exp,
+
+ ir_unop_noise,
+
+ ir_unop_subroutine_to_int,
+ /**
+ * Interpolate fs input at centroid
+ *
+ * operand0 is the fs input.
+ */
+ ir_unop_interpolate_at_centroid,
+
+ /**
+ * Ask the driver for the total size of a buffer block.
+ *
+ * operand0 is the ir_constant buffer block index in the linked shader.
+ */
+ ir_unop_get_buffer_size,
+
+ /**
+ * Calculate length of an unsized array inside a buffer block.
+ * This opcode is going to be replaced in a lowering pass inside
+ * the linker.
+ *
+ * operand0 is the unsized array's ir_value for the calculation
+ * of its length.
+ */
+ ir_unop_ssbo_unsized_array_length,
+
+ /**
+ * A sentinel marking the last of the unary operations.
+ */
+ ir_last_unop = ir_unop_ssbo_unsized_array_length,
+
+ ir_binop_add,
+ ir_binop_sub,
+ ir_binop_mul, /**< Floating-point or low 32-bit integer multiply. */
+ ir_binop_imul_high, /**< Calculates the high 32-bits of a 64-bit multiply. */
+ ir_binop_div,
+
+ /**
+ * Returns the carry resulting from the addition of the two arguments.
+ */
+ /*@{*/
+ ir_binop_carry,
+ /*@}*/
+
+ /**
+ * Returns the borrow resulting from the subtraction of the second argument
+ * from the first argument.
+ */
+ /*@{*/
+ ir_binop_borrow,
+ /*@}*/
+
+ /**
+ * Takes one of two combinations of arguments:
+ *
+ * - mod(vecN, vecN)
+ * - mod(vecN, float)
+ *
+ * Does not take integer types.
+ */
+ ir_binop_mod,
+
+ /**
+ * \name Binary comparison operators which return a boolean vector.
+ * The type of both operands must be equal.
+ */
+ /*@{*/
+ ir_binop_less,
+ ir_binop_greater,
+ ir_binop_lequal,
+ ir_binop_gequal,
+ ir_binop_equal,
+ ir_binop_nequal,
+ /**
+ * Returns single boolean for whether all components of operands[0]
+ * equal the components of operands[1].
+ */
+ ir_binop_all_equal,
+ /**
+ * Returns single boolean for whether any component of operands[0]
+ * is not equal to the corresponding component of operands[1].
+ */
+ ir_binop_any_nequal,
+ /*@}*/
+
+ /**
+ * \name Bit-wise binary operations.
+ */
+ /*@{*/
+ ir_binop_lshift,
+ ir_binop_rshift,
+ ir_binop_bit_and,
+ ir_binop_bit_xor,
+ ir_binop_bit_or,
+ /*@}*/
+
+ ir_binop_logic_and,
+ ir_binop_logic_xor,
+ ir_binop_logic_or,
+
+ ir_binop_dot,
+ ir_binop_min,
+ ir_binop_max,
+
+ ir_binop_pow,
+
+ /**
+ * Load a value the size of a given GLSL type from a uniform block.
+ *
+ * operand0 is the ir_constant uniform block index in the linked shader.
+ * operand1 is a byte offset within the uniform block.
+ */
+ ir_binop_ubo_load,
+
+ /**
+ * \name Multiplies a number by two to a power, part of ARB_gpu_shader5.
+ */
+ /*@{*/
+ ir_binop_ldexp,
+ /*@}*/
+
+ /**
+ * Extract a scalar from a vector
+ *
+ * operand0 is the vector
+ * operand1 is the index of the field to read from operand0
+ */
+ ir_binop_vector_extract,
+
+ /**
+ * Interpolate fs input at offset
+ *
+ * operand0 is the fs input
+ * operand1 is the offset from the pixel center
+ */
+ ir_binop_interpolate_at_offset,
+
+ /**
+ * Interpolate fs input at sample position
+ *
+ * operand0 is the fs input
+ * operand1 is the sample ID
+ */
+ ir_binop_interpolate_at_sample,
+
+ /**
+ * A sentinel marking the last of the binary operations.
+ */
+ ir_last_binop = ir_binop_interpolate_at_sample,
+
+ /**
+ * \name Fused floating-point multiply-add, part of ARB_gpu_shader5.
+ */
+ /*@{*/
+ ir_triop_fma,
+ /*@}*/
+
+ ir_triop_lrp,
+
+ /**
+ * \name Conditional Select
+ *
+ * A vector conditional select instruction (like ?:, but operating per-
+ * component on vectors).
+ *
+ * \see lower_instructions_visitor::ldexp_to_arith
+ */
+ /*@{*/
+ ir_triop_csel,
+ /*@}*/
+
+ ir_triop_bitfield_extract,
+
+ /**
+ * Generate a value with one field of a vector changed
+ *
+ * operand0 is the vector
+ * operand1 is the value to write into the vector result
+ * operand2 is the index in operand0 to be modified
+ */
+ ir_triop_vector_insert,
+
+ /**
+ * A sentinel marking the last of the ternary operations.
+ */
+ ir_last_triop = ir_triop_vector_insert,
+
+ ir_quadop_bitfield_insert,
+
+ ir_quadop_vector,
+
+ /**
+ * A sentinel marking the last of the ternary operations.
+ */
+ ir_last_quadop = ir_quadop_vector,
+
+ /**
+ * A sentinel marking the last of all operations.
+ */
+ ir_last_opcode = ir_quadop_vector
+ };
+
+ class ir_expression : public ir_rvalue {
+ public:
+ ir_expression(int op, const struct glsl_type *type,
+ ir_rvalue *op0, ir_rvalue *op1 = NULL,
+ ir_rvalue *op2 = NULL, ir_rvalue *op3 = NULL);
+
+ /**
+ * Constructor for unary operation expressions
+ */
+ ir_expression(int op, ir_rvalue *);
+
+ /**
+ * Constructor for binary operation expressions
+ */
+ ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1);
+
+ /**
+ * Constructor for ternary operation expressions
+ */
+ ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2);
+
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
+
+ virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ /**
+ * Attempt to constant-fold the expression
+ *
+ * The "variable_context" hash table links ir_variable * to ir_constant *
+ * that represent the variables' values. \c NULL represents an empty
+ * context.
+ *
+ * If the expression cannot be constant folded, this method will return
+ * \c NULL.
+ */
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ /**
+ * Determine the number of operands used by an expression
+ */
+ static unsigned int get_num_operands(ir_expression_operation);
+
+ /**
+ * Determine the number of operands used by an expression
+ */
+ unsigned int get_num_operands() const
+ {
+ return (this->operation == ir_quadop_vector)
+ ? this->type->vector_elements : get_num_operands(operation);
+ }
+
+ /**
+ * Return whether the expression operates on vectors horizontally.
+ */
+ bool is_horizontal() const
+ {
+ return operation == ir_binop_all_equal ||
+ operation == ir_binop_any_nequal ||
+ operation == ir_binop_dot ||
+ operation == ir_binop_vector_extract ||
+ operation == ir_triop_vector_insert ||
+ operation == ir_quadop_vector;
+ }
+
+ /**
+ * Return a string representing this expression's operator.
+ */
+ const char *operator_string();
+
+ /**
+ * Return a string representing this expression's operator.
+ */
+ static const char *operator_string(ir_expression_operation);
+
+
+ /**
+ * Do a reverse-lookup to translate the given string into an operator.
+ */
+ static ir_expression_operation get_operator(const char *);
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ virtual ir_variable *variable_referenced() const;
+
+ ir_expression_operation operation;
+ ir_rvalue *operands[4];
+ };
+
+
+ /**
+ * HIR instruction representing a high-level function call, containing a list
+ * of parameters and returning a value in the supplied temporary.
+ */
+ class ir_call : public ir_instruction {
+ public:
+ ir_call(ir_function_signature *callee,
+ ir_dereference_variable *return_deref,
+ exec_list *actual_parameters)
+ : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(NULL), array_idx(NULL)
+ {
+ assert(callee->return_type != NULL);
+ actual_parameters->move_nodes_to(& this->actual_parameters);
+ this->use_builtin = callee->is_builtin();
+ }
+
+ ir_call(ir_function_signature *callee,
+ ir_dereference_variable *return_deref,
+ exec_list *actual_parameters,
+ ir_variable *var, ir_rvalue *array_idx)
+ : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(var), array_idx(array_idx)
+ {
+ assert(callee->return_type != NULL);
+ actual_parameters->move_nodes_to(& this->actual_parameters);
+ this->use_builtin = callee->is_builtin();
+ }
+
+ virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ /**
+ * Get the name of the function being called.
+ */
+ const char *callee_name() const
+ {
+ return callee->function_name();
+ }
+
+ /**
+ * Generates an inline version of the function before @ir,
+ * storing the return value in return_deref.
+ */
+ void generate_inline(ir_instruction *ir);
+
+ /**
+ * Storage for the function's return value.
+ * This must be NULL if the return type is void.
+ */
+ ir_dereference_variable *return_deref;
+
+ /**
+ * The specific function signature being called.
+ */
+ ir_function_signature *callee;
+
+ /* List of ir_rvalue of paramaters passed in this call. */
+ exec_list actual_parameters;
+
+ /** Should this call only bind to a built-in function? */
+ bool use_builtin;
+
+ /*
+ * ARB_shader_subroutine support -
+ * the subroutine uniform variable and array index
+ * rvalue to be used in the lowering pass later.
+ */
+ ir_variable *sub_var;
+ ir_rvalue *array_idx;
+ };
+
+
+ /**
+ * \name Jump-like IR instructions.
+ *
+ * These include \c break, \c continue, \c return, and \c discard.
+ */
+ /*@{*/
+ class ir_jump : public ir_instruction {
+ protected:
+ ir_jump(enum ir_node_type t)
+ : ir_instruction(t)
+ {
+ }
+ };
+
+ class ir_return : public ir_jump {
+ public:
+ ir_return()
+ : ir_jump(ir_type_return), value(NULL)
+ {
+ }
+
+ ir_return(ir_rvalue *value)
+ : ir_jump(ir_type_return), value(value)
+ {
+ }
+
+ virtual ir_return *clone(void *mem_ctx, struct hash_table *) const;
+
+ ir_rvalue *get_value() const
+ {
+ return value;
+ }
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ ir_rvalue *value;
+ };
+
+
+ /**
+ * Jump instructions used inside loops
+ *
+ * These include \c break and \c continue. The \c break within a loop is
+ * different from the \c break within a switch-statement.
+ *
+ * \sa ir_switch_jump
+ */
+ class ir_loop_jump : public ir_jump {
+ public:
+ enum jump_mode {
+ jump_break,
+ jump_continue
+ };
+
+ ir_loop_jump(jump_mode mode)
+ : ir_jump(ir_type_loop_jump)
+ {
+ this->mode = mode;
+ }
+
+ virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ bool is_break() const
+ {
+ return mode == jump_break;
+ }
+
+ bool is_continue() const
+ {
+ return mode == jump_continue;
+ }
+
+ /** Mode selector for the jump instruction. */
+ enum jump_mode mode;
+ };
+
+ /**
+ * IR instruction representing discard statements.
+ */
+ class ir_discard : public ir_jump {
+ public:
+ ir_discard()
+ : ir_jump(ir_type_discard)
+ {
+ this->condition = NULL;
+ }
+
+ ir_discard(ir_rvalue *cond)
+ : ir_jump(ir_type_discard)
+ {
+ this->condition = cond;
+ }
+
+ virtual ir_discard *clone(void *mem_ctx, struct hash_table *ht) const;
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ ir_rvalue *condition;
+ };
+ /*@}*/
+
+
+ /**
+ * Texture sampling opcodes used in ir_texture
+ */
+ enum ir_texture_opcode {
+ ir_tex, /**< Regular texture look-up */
+ ir_txb, /**< Texture look-up with LOD bias */
+ ir_txl, /**< Texture look-up with explicit LOD */
+ ir_txd, /**< Texture look-up with partial derivatvies */
+ ir_txf, /**< Texel fetch with explicit LOD */
+ ir_txf_ms, /**< Multisample texture fetch */
+ ir_txs, /**< Texture size */
+ ir_lod, /**< Texture lod query */
+ ir_tg4, /**< Texture gather */
+ ir_query_levels, /**< Texture levels query */
+ ir_texture_samples, /**< Texture samples query */
+ ir_samples_identical, /**< Query whether all samples are definitely identical. */
+ };
+
+
+ /**
+ * IR instruction to sample a texture
+ *
+ * The specific form of the IR instruction depends on the \c mode value
+ * selected from \c ir_texture_opcodes. In the printed IR, these will
+ * appear as:
+ *
+ * Texel offset (0 or an expression)
+ * | Projection divisor
+ * | | Shadow comparitor
+ * | | |
+ * v v v
+ * (tex <type> <sampler> <coordinate> 0 1 ( ))
+ * (txb <type> <sampler> <coordinate> 0 1 ( ) <bias>)
+ * (txl <type> <sampler> <coordinate> 0 1 ( ) <lod>)
+ * (txd <type> <sampler> <coordinate> 0 1 ( ) (dPdx dPdy))
+ * (txf <type> <sampler> <coordinate> 0 <lod>)
+ * (txf_ms
+ * <type> <sampler> <coordinate> <sample_index>)
+ * (txs <type> <sampler> <lod>)
+ * (lod <type> <sampler> <coordinate>)
+ * (tg4 <type> <sampler> <coordinate> <offset> <component>)
+ * (query_levels <type> <sampler>)
+ * (samples_identical <sampler> <coordinate>)
+ */
+ class ir_texture : public ir_rvalue {
+ public:
+ ir_texture(enum ir_texture_opcode op)
+ : ir_rvalue(ir_type_texture),
+ op(op), sampler(NULL), coordinate(NULL), projector(NULL),
+ shadow_comparitor(NULL), offset(NULL)
+ {
+ memset(&lod_info, 0, sizeof(lod_info));
+ }
+
+ virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
+
+ /**
+ * Return a string representing the ir_texture_opcode.
+ */
+ const char *opcode_string();
+
+ /** Set the sampler and type. */
+ void set_sampler(ir_dereference *sampler, const glsl_type *type);
+
+ /**
+ * Do a reverse-lookup to translate a string into an ir_texture_opcode.
+ */
+ static ir_texture_opcode get_opcode(const char *);
+
+ enum ir_texture_opcode op;
+
+ /** Sampler to use for the texture access. */
+ ir_dereference *sampler;
+
+ /** Texture coordinate to sample */
+ ir_rvalue *coordinate;
+
+ /**
+ * Value used for projective divide.
+ *
+ * If there is no projective divide (the common case), this will be
+ * \c NULL. Optimization passes should check for this to point to a constant
+ * of 1.0 and replace that with \c NULL.
+ */
+ ir_rvalue *projector;
+
+ /**
+ * Coordinate used for comparison on shadow look-ups.
+ *
+ * If there is no shadow comparison, this will be \c NULL. For the
+ * \c ir_txf opcode, this *must* be \c NULL.
+ */
+ ir_rvalue *shadow_comparitor;
+
+ /** Texel offset. */
+ ir_rvalue *offset;
+
+ union {
+ ir_rvalue *lod; /**< Floating point LOD */
+ ir_rvalue *bias; /**< Floating point LOD bias */
+ ir_rvalue *sample_index; /**< MSAA sample index */
+ ir_rvalue *component; /**< Gather component selector */
+ struct {
+ ir_rvalue *dPdx; /**< Partial derivative of coordinate wrt X */
+ ir_rvalue *dPdy; /**< Partial derivative of coordinate wrt Y */
+ } grad;
+ } lod_info;
+ };
+
+
+ struct ir_swizzle_mask {
+ unsigned x:2;
+ unsigned y:2;
+ unsigned z:2;
+ unsigned w:2;
+
+ /**
+ * Number of components in the swizzle.
+ */
+ unsigned num_components:3;
+
+ /**
+ * Does the swizzle contain duplicate components?
+ *
+ * L-value swizzles cannot contain duplicate components.
+ */
+ unsigned has_duplicates:1;
+ };
+
+
+ class ir_swizzle : public ir_rvalue {
+ public:
+ ir_swizzle(ir_rvalue *, unsigned x, unsigned y, unsigned z, unsigned w,
+ unsigned count);
+
+ ir_swizzle(ir_rvalue *val, const unsigned *components, unsigned count);
+
+ ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask);
+
+ virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ /**
+ * Construct an ir_swizzle from the textual representation. Can fail.
+ */
+ static ir_swizzle *create(ir_rvalue *, const char *, unsigned vector_length);
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
+
+ bool is_lvalue() const
+ {
+ return val->is_lvalue() && !mask.has_duplicates;
+ }
+
+ /**
+ * Get the variable that is ultimately referenced by an r-value
+ */
+ virtual ir_variable *variable_referenced() const;
+
+ ir_rvalue *val;
+ ir_swizzle_mask mask;
+
+ private:
+ /**
+ * Initialize the mask component of a swizzle
+ *
+ * This is used by the \c ir_swizzle constructors.
+ */
+ void init_mask(const unsigned *components, unsigned count);
+ };
+
+
+ class ir_dereference : public ir_rvalue {
+ public:
+ virtual ir_dereference *clone(void *mem_ctx, struct hash_table *) const = 0;
+
+ bool is_lvalue() const;
+
+ /**
+ * Get the variable that is ultimately referenced by an r-value
+ */
+ virtual ir_variable *variable_referenced() const = 0;
+
+ protected:
+ ir_dereference(enum ir_node_type t)
+ : ir_rvalue(t)
+ {
+ }
+ };
+
+
+ class ir_dereference_variable : public ir_dereference {
+ public:
+ ir_dereference_variable(ir_variable *var);
+
+ virtual ir_dereference_variable *clone(void *mem_ctx,
+ struct hash_table *) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
+
+ /**
+ * Get the variable that is ultimately referenced by an r-value
+ */
+ virtual ir_variable *variable_referenced() const
+ {
+ return this->var;
+ }
+
+ virtual ir_variable *whole_variable_referenced()
+ {
+ /* ir_dereference_variable objects always dereference the entire
+ * variable. However, if this dereference is dereferenced by anything
+ * else, the complete deferefernce chain is not a whole-variable
+ * dereference. This method should only be called on the top most
+ * ir_rvalue in a dereference chain.
+ */
+ return this->var;
+ }
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ /**
+ * Object being dereferenced.
+ */
+ ir_variable *var;
+ };
+
+
+ class ir_dereference_array : public ir_dereference {
+ public:
+ ir_dereference_array(ir_rvalue *value, ir_rvalue *array_index);
+
+ ir_dereference_array(ir_variable *var, ir_rvalue *array_index);
+
+ virtual ir_dereference_array *clone(void *mem_ctx,
+ struct hash_table *) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
+
+ /**
+ * Get the variable that is ultimately referenced by an r-value
+ */
+ virtual ir_variable *variable_referenced() const
+ {
+ return this->array->variable_referenced();
+ }
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ ir_rvalue *array;
+ ir_rvalue *array_index;
+
+ private:
+ void set_array(ir_rvalue *value);
+ };
+
+
+ class ir_dereference_record : public ir_dereference {
+ public:
+ ir_dereference_record(ir_rvalue *value, const char *field);
+
+ ir_dereference_record(ir_variable *var, const char *field);
+
+ virtual ir_dereference_record *clone(void *mem_ctx,
+ struct hash_table *) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ /**
+ * Get the variable that is ultimately referenced by an r-value
+ */
+ virtual ir_variable *variable_referenced() const
+ {
+ return this->record->variable_referenced();
+ }
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ ir_rvalue *record;
+ const char *field;
+ };
+
+
+ /**
+ * Data stored in an ir_constant
+ */
+ union ir_constant_data {
+ unsigned u[16];
+ int i[16];
+ float f[16];
+ bool b[16];
+ double d[16];
+ };
+
+
+ class ir_constant : public ir_rvalue {
+ public:
+ ir_constant(const struct glsl_type *type, const ir_constant_data *data);
+ ir_constant(bool b, unsigned vector_elements=1);
+ ir_constant(unsigned int u, unsigned vector_elements=1);
+ ir_constant(int i, unsigned vector_elements=1);
+ ir_constant(float f, unsigned vector_elements=1);
+ ir_constant(double d, unsigned vector_elements=1);
+
+ /**
+ * Construct an ir_constant from a list of ir_constant values
+ */
+ ir_constant(const struct glsl_type *type, exec_list *values);
+
+ /**
+ * Construct an ir_constant from a scalar component of another ir_constant
+ *
+ * The new \c ir_constant inherits the type of the component from the
+ * source constant.
+ *
+ * \note
+ * In the case of a matrix constant, the new constant is a scalar, \b not
+ * a vector.
+ */
+ ir_constant(const ir_constant *c, unsigned i);
+
+ /**
+ * Return a new ir_constant of the specified type containing all zeros.
+ */
+ static ir_constant *zero(void *mem_ctx, const glsl_type *type);
+
+ virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const;
+
+ virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
+
+ /**
+ * Get a particular component of a constant as a specific type
+ *
+ * This is useful, for example, to get a value from an integer constant
+ * as a float or bool. This appears frequently when constructors are
+ * called with all constant parameters.
+ */
+ /*@{*/
+ bool get_bool_component(unsigned i) const;
+ float get_float_component(unsigned i) const;
+ double get_double_component(unsigned i) const;
+ int get_int_component(unsigned i) const;
+ unsigned get_uint_component(unsigned i) const;
+ /*@}*/
+
+ ir_constant *get_array_element(unsigned i) const;
+
+ ir_constant *get_record_field(const char *name);
+
+ /**
+ * Copy the values on another constant at a given offset.
+ *
+ * The offset is ignored for array or struct copies, it's only for
+ * scalars or vectors into vectors or matrices.
+ *
+ * With identical types on both sides and zero offset it's clone()
+ * without creating a new object.
+ */
+
+ void copy_offset(ir_constant *src, int offset);
+
+ /**
+ * Copy the values on another constant at a given offset and
+ * following an assign-like mask.
+ *
+ * The mask is ignored for scalars.
+ *
+ * Note that this function only handles what assign can handle,
+ * i.e. at most a vector as source and a column of a matrix as
+ * destination.
+ */
+
+ void copy_masked_offset(ir_constant *src, int offset, unsigned int mask);
+
+ /**
+ * Determine whether a constant has the same value as another constant
+ *
+ * \sa ir_constant::is_zero, ir_constant::is_one,
+ * ir_constant::is_negative_one
+ */
+ bool has_value(const ir_constant *) const;
+
+ /**
+ * Return true if this ir_constant represents the given value.
+ *
+ * For vectors, this checks that each component is the given value.
+ */
+ virtual bool is_value(float f, int i) const;
+ virtual bool is_zero() const;
+ virtual bool is_one() const;
+ virtual bool is_negative_one() const;
+
+ /**
+ * Return true for constants that could be stored as 16-bit unsigned values.
+ *
+ * Note that this will return true even for signed integer ir_constants, as
+ * long as the value is non-negative and fits in 16-bits.
+ */
+ virtual bool is_uint16_constant() const;
+
+ /**
+ * Value of the constant.
+ *
+ * The field used to back the values supplied by the constant is determined
+ * by the type associated with the \c ir_instruction. Constants may be
+ * scalars, vectors, or matrices.
+ */
+ union ir_constant_data value;
+
+ /* Array elements */
+ ir_constant **array_elements;
+
+ /* Structure fields */
+ exec_list components;
+
+ private:
+ /**
+ * Parameterless constructor only used by the clone method
+ */
+ ir_constant(void);
+ };
+
+ /**
+ * IR instruction to emit a vertex in a geometry shader.
+ */
+ class ir_emit_vertex : public ir_instruction {
+ public:
+ ir_emit_vertex(ir_rvalue *stream)
+ : ir_instruction(ir_type_emit_vertex),
+ stream(stream)
+ {
+ assert(stream);
+ }
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_emit_vertex *clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ return new(mem_ctx) ir_emit_vertex(this->stream->clone(mem_ctx, ht));
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ int stream_id() const
+ {
+ return stream->as_constant()->value.i[0];
+ }
+
+ ir_rvalue *stream;
+ };
+
+ /**
+ * IR instruction to complete the current primitive and start a new one in a
+ * geometry shader.
+ */
+ class ir_end_primitive : public ir_instruction {
+ public:
+ ir_end_primitive(ir_rvalue *stream)
+ : ir_instruction(ir_type_end_primitive),
+ stream(stream)
+ {
+ assert(stream);
+ }
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_end_primitive *clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ return new(mem_ctx) ir_end_primitive(this->stream->clone(mem_ctx, ht));
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+
+ int stream_id() const
+ {
+ return stream->as_constant()->value.i[0];
+ }
+
+ ir_rvalue *stream;
+ };
+
+ /**
+ * IR instruction for tessellation control and compute shader barrier.
+ */
+ class ir_barrier : public ir_instruction {
+ public:
+ ir_barrier()
+ : ir_instruction(ir_type_barrier)
+ {
+ }
+
+ virtual void accept(ir_visitor *v)
+ {
+ v->visit(this);
+ }
+
+ virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const
+ {
+ return new(mem_ctx) ir_barrier();
+ }
+
+ virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ };
+
+ /*@}*/
+
+ /**
+ * Apply a visitor to each IR node in a list
+ */
+ void
+ visit_exec_list(exec_list *list, ir_visitor *visitor);
+
+ /**
+ * Validate invariants on each IR node in a list
+ */
+ void validate_ir_tree(exec_list *instructions);
+
+ struct _mesa_glsl_parse_state;
+ struct gl_shader_program;
+
+ /**
+ * Detect whether an unlinked shader contains static recursion
+ *
+ * If the list of instructions is determined to contain static recursion,
+ * \c _mesa_glsl_error will be called to emit error messages for each function
+ * that is in the recursion cycle.
+ */
+ void
+ detect_recursion_unlinked(struct _mesa_glsl_parse_state *state,
+ exec_list *instructions);
+
+ /**
+ * Detect whether a linked shader contains static recursion
+ *
+ * If the list of instructions is determined to contain static recursion,
+ * \c link_error_printf will be called to emit error messages for each function
+ * that is in the recursion cycle. In addition,
+ * \c gl_shader_program::LinkStatus will be set to false.
+ */
+ void
+ detect_recursion_linked(struct gl_shader_program *prog,
+ exec_list *instructions);
+
+ /**
+ * Make a clone of each IR instruction in a list
+ *
+ * \param in List of IR instructions that are to be cloned
+ * \param out List to hold the cloned instructions
+ */
+ void
+ clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in);
+
+ extern void
+ _mesa_glsl_initialize_variables(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state);
+
+ extern void
+ _mesa_glsl_initialize_derived_variables(gl_shader *shader);
+
+ extern void
+ _mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
+
+ extern void
+ _mesa_glsl_initialize_builtin_functions();
+
+ extern ir_function_signature *
+ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
+ const char *name, exec_list *actual_parameters);
+
+ extern ir_function *
+ _mesa_glsl_find_builtin_function_by_name(const char *name);
+
+ extern gl_shader *
+ _mesa_glsl_get_builtin_function_shader(void);
+
+ extern ir_function_signature *
+ _mesa_get_main_function_signature(gl_shader *sh);
+
+ extern void
+ _mesa_glsl_release_functions(void);
+
+ extern void
+ _mesa_glsl_release_builtin_functions(void);
+
+ extern void
+ reparent_ir(exec_list *list, void *mem_ctx);
+
+ struct glsl_symbol_table;
+
+ extern void
+ import_prototypes(const exec_list *source, exec_list *dest,
+ struct glsl_symbol_table *symbols, void *mem_ctx);
+
+ extern bool
+ ir_has_call(ir_instruction *ir);
+
+ extern void
+ do_set_program_inouts(exec_list *instructions, struct gl_program *prog,
+ gl_shader_stage shader_stage);
+
+ extern char *
+ prototype_string(const glsl_type *return_type, const char *name,
+ exec_list *parameters);
+
+ const char *
+ mode_string(const ir_variable *var);
+
+ /**
+ * Built-in / reserved GL variables names start with "gl_"
+ */
+ static inline bool
+ is_gl_identifier(const char *s)
+ {
+ return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_';
+ }
+
+ extern "C" {
+ #endif /* __cplusplus */
+
+ extern void _mesa_print_ir(FILE *f, struct exec_list *instructions,
+ struct _mesa_glsl_parse_state *state);
+
+ extern void
+ fprint_ir(FILE *f, const void *instruction);
+
+ #ifdef __cplusplus
+ } /* extern "C" */
+ #endif
+
+ unsigned
+ vertices_per_prim(GLenum prim);
+
+ #endif /* IR_H */
--- /dev/null
+ /*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #include <string.h>
+ #include "main/compiler.h"
+ #include "ir.h"
+ #include "compiler/glsl_types.h"
+ #include "program/hash_table.h"
+
+ ir_rvalue *
+ ir_rvalue::clone(void *mem_ctx, struct hash_table *) const
+ {
+ /* The only possible instantiation is the generic error value. */
+ return error_value(mem_ctx);
+ }
+
+ /**
+ * Duplicate an IR variable
+ */
+ ir_variable *
+ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_variable *var = new(mem_ctx) ir_variable(this->type, this->name,
+ (ir_variable_mode) this->data.mode);
+
+ var->data.max_array_access = this->data.max_array_access;
+ if (this->is_interface_instance()) {
+ var->u.max_ifc_array_access =
+ rzalloc_array(var, unsigned, this->interface_type->length);
+ memcpy(var->u.max_ifc_array_access, this->u.max_ifc_array_access,
+ this->interface_type->length * sizeof(unsigned));
+ }
+
+ memcpy(&var->data, &this->data, sizeof(var->data));
+
+ if (this->get_state_slots()) {
+ ir_state_slot *s = var->allocate_state_slots(this->get_num_state_slots());
+ memcpy(s, this->get_state_slots(),
+ sizeof(s[0]) * var->get_num_state_slots());
+ }
+
+ if (this->constant_value)
+ var->constant_value = this->constant_value->clone(mem_ctx, ht);
+
+ if (this->constant_initializer)
+ var->constant_initializer =
+ this->constant_initializer->clone(mem_ctx, ht);
+
+ var->interface_type = this->interface_type;
+
+ if (ht) {
+ hash_table_insert(ht, var, (void *)const_cast<ir_variable *>(this));
+ }
+
+ return var;
+ }
+
+ ir_swizzle *
+ ir_swizzle::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ return new(mem_ctx) ir_swizzle(this->val->clone(mem_ctx, ht), this->mask);
+ }
+
+ ir_return *
+ ir_return::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_rvalue *new_value = NULL;
+
+ if (this->value)
+ new_value = this->value->clone(mem_ctx, ht);
+
+ return new(mem_ctx) ir_return(new_value);
+ }
+
+ ir_discard *
+ ir_discard::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_rvalue *new_condition = NULL;
+
+ if (this->condition != NULL)
+ new_condition = this->condition->clone(mem_ctx, ht);
+
+ return new(mem_ctx) ir_discard(new_condition);
+ }
+
+ ir_loop_jump *
+ ir_loop_jump::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ (void)ht;
+
+ return new(mem_ctx) ir_loop_jump(this->mode);
+ }
+
+ ir_if *
+ ir_if::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_if *new_if = new(mem_ctx) ir_if(this->condition->clone(mem_ctx, ht));
+
+ foreach_in_list(ir_instruction, ir, &this->then_instructions) {
+ new_if->then_instructions.push_tail(ir->clone(mem_ctx, ht));
+ }
+
+ foreach_in_list(ir_instruction, ir, &this->else_instructions) {
+ new_if->else_instructions.push_tail(ir->clone(mem_ctx, ht));
+ }
+
+ return new_if;
+ }
+
+ ir_loop *
+ ir_loop::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_loop *new_loop = new(mem_ctx) ir_loop();
+
+ foreach_in_list(ir_instruction, ir, &this->body_instructions) {
+ new_loop->body_instructions.push_tail(ir->clone(mem_ctx, ht));
+ }
+
+ return new_loop;
+ }
+
+ ir_call *
+ ir_call::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_dereference_variable *new_return_ref = NULL;
+ if (this->return_deref != NULL)
+ new_return_ref = this->return_deref->clone(mem_ctx, ht);
+
+ exec_list new_parameters;
+
+ foreach_in_list(ir_instruction, ir, &this->actual_parameters) {
+ new_parameters.push_tail(ir->clone(mem_ctx, ht));
+ }
+
+ return new(mem_ctx) ir_call(this->callee, new_return_ref, &new_parameters);
+ }
+
+ ir_expression *
+ ir_expression::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_rvalue *op[ARRAY_SIZE(this->operands)] = { NULL, };
+ unsigned int i;
+
+ for (i = 0; i < get_num_operands(); i++) {
+ op[i] = this->operands[i]->clone(mem_ctx, ht);
+ }
+
+ return new(mem_ctx) ir_expression(this->operation, this->type,
+ op[0], op[1], op[2], op[3]);
+ }
+
+ ir_dereference_variable *
+ ir_dereference_variable::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_variable *new_var;
+
+ if (ht) {
+ new_var = (ir_variable *)hash_table_find(ht, this->var);
+ if (!new_var)
+ new_var = this->var;
+ } else {
+ new_var = this->var;
+ }
+
+ return new(mem_ctx) ir_dereference_variable(new_var);
+ }
+
+ ir_dereference_array *
+ ir_dereference_array::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ return new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, ht),
+ this->array_index->clone(mem_ctx,
+ ht));
+ }
+
+ ir_dereference_record *
+ ir_dereference_record::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ return new(mem_ctx) ir_dereference_record(this->record->clone(mem_ctx, ht),
+ this->field);
+ }
+
+ ir_texture *
+ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_texture *new_tex = new(mem_ctx) ir_texture(this->op);
+ new_tex->type = this->type;
+
+ new_tex->sampler = this->sampler->clone(mem_ctx, ht);
+ if (this->coordinate)
+ new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
+ if (this->projector)
+ new_tex->projector = this->projector->clone(mem_ctx, ht);
+ if (this->shadow_comparitor) {
+ new_tex->shadow_comparitor = this->shadow_comparitor->clone(mem_ctx, ht);
+ }
+
+ if (this->offset != NULL)
+ new_tex->offset = this->offset->clone(mem_ctx, ht);
+
+ switch (this->op) {
+ case ir_tex:
+ case ir_lod:
+ case ir_query_levels:
+ case ir_texture_samples:
+ case ir_samples_identical:
+ break;
+ case ir_txb:
+ new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
+ break;
+ case ir_txl:
+ case ir_txf:
+ case ir_txs:
+ new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht);
+ break;
+ case ir_txf_ms:
+ new_tex->lod_info.sample_index = this->lod_info.sample_index->clone(mem_ctx, ht);
+ break;
+ case ir_txd:
+ new_tex->lod_info.grad.dPdx = this->lod_info.grad.dPdx->clone(mem_ctx, ht);
+ new_tex->lod_info.grad.dPdy = this->lod_info.grad.dPdy->clone(mem_ctx, ht);
+ break;
+ case ir_tg4:
+ new_tex->lod_info.component = this->lod_info.component->clone(mem_ctx, ht);
+ break;
+ }
+
+ return new_tex;
+ }
+
+ ir_assignment *
+ ir_assignment::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_rvalue *new_condition = NULL;
+
+ if (this->condition)
+ new_condition = this->condition->clone(mem_ctx, ht);
+
+ ir_assignment *cloned =
+ new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht),
+ this->rhs->clone(mem_ctx, ht),
+ new_condition);
+ cloned->write_mask = this->write_mask;
+ return cloned;
+ }
+
+ ir_function *
+ ir_function::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_function *copy = new(mem_ctx) ir_function(this->name);
+
+ copy->is_subroutine = this->is_subroutine;
+ copy->subroutine_index = this->subroutine_index;
+ copy->num_subroutine_types = this->num_subroutine_types;
+ copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types);
+ for (int i = 0; i < copy->num_subroutine_types; i++)
+ copy->subroutine_types[i] = this->subroutine_types[i];
+
+ foreach_in_list(const ir_function_signature, sig, &this->signatures) {
+ ir_function_signature *sig_copy = sig->clone(mem_ctx, ht);
+ copy->add_signature(sig_copy);
+
+ if (ht != NULL)
+ hash_table_insert(ht, sig_copy,
+ (void *)const_cast<ir_function_signature *>(sig));
+ }
+
+ return copy;
+ }
+
+ ir_function_signature *
+ ir_function_signature::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_function_signature *copy = this->clone_prototype(mem_ctx, ht);
+
+ copy->is_defined = this->is_defined;
+
+ /* Clone the instruction list.
+ */
+ foreach_in_list(const ir_instruction, inst, &this->body) {
+ ir_instruction *const inst_copy = inst->clone(mem_ctx, ht);
+ copy->body.push_tail(inst_copy);
+ }
+
+ return copy;
+ }
+
+ ir_function_signature *
+ ir_function_signature::clone_prototype(void *mem_ctx, struct hash_table *ht) const
+ {
+ ir_function_signature *copy =
+ new(mem_ctx) ir_function_signature(this->return_type);
+
+ copy->is_defined = false;
+ copy->builtin_avail = this->builtin_avail;
+ copy->origin = this;
+
+ /* Clone the parameter list, but NOT the body.
+ */
+ foreach_in_list(const ir_variable, param, &this->parameters) {
+ assert(const_cast<ir_variable *>(param)->as_variable() != NULL);
+
+ ir_variable *const param_copy = param->clone(mem_ctx, ht);
+ copy->parameters.push_tail(param_copy);
+ }
+
+ return copy;
+ }
+
+ ir_constant *
+ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+ (void)ht;
+
+ switch (this->type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL:
+ return new(mem_ctx) ir_constant(this->type, &this->value);
+
+ case GLSL_TYPE_STRUCT: {
+ ir_constant *c = new(mem_ctx) ir_constant;
+
+ c->type = this->type;
+ for (exec_node *node = this->components.head
+ ; !node->is_tail_sentinel()
+ ; node = node->next) {
+ ir_constant *const orig = (ir_constant *) node;
+
+ c->components.push_tail(orig->clone(mem_ctx, NULL));
+ }
+
+ return c;
+ }
+
+ case GLSL_TYPE_ARRAY: {
+ ir_constant *c = new(mem_ctx) ir_constant;
+
+ c->type = this->type;
+ c->array_elements = ralloc_array(c, ir_constant *, this->type->length);
+ for (unsigned i = 0; i < this->type->length; i++) {
+ c->array_elements[i] = this->array_elements[i]->clone(mem_ctx, NULL);
+ }
+ return c;
+ }
+
++ case GLSL_TYPE_FUNCTION:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_INTERFACE:
+ assert(!"Should not get here.");
+ break;
+ }
+
+ return NULL;
+ }
+
+
+ class fixup_ir_call_visitor : public ir_hierarchical_visitor {
+ public:
+ fixup_ir_call_visitor(struct hash_table *ht)
+ {
+ this->ht = ht;
+ }
+
+ virtual ir_visitor_status visit_enter(ir_call *ir)
+ {
+ /* Try to find the function signature referenced by the ir_call in the
+ * table. If it is found, replace it with the value from the table.
+ */
+ ir_function_signature *sig =
+ (ir_function_signature *) hash_table_find(this->ht, ir->callee);
+ if (sig != NULL)
+ ir->callee = sig;
+
+ /* Since this may be used before function call parameters are flattened,
+ * the children also need to be processed.
+ */
+ return visit_continue;
+ }
+
+ private:
+ struct hash_table *ht;
+ };
+
+
+ static void
+ fixup_function_calls(struct hash_table *ht, exec_list *instructions)
+ {
+ fixup_ir_call_visitor v(ht);
+ v.run(instructions);
+ }
+
+
+ void
+ clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in)
+ {
+ struct hash_table *ht =
+ hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare);
+
+ foreach_in_list(const ir_instruction, original, in) {
+ ir_instruction *copy = original->clone(mem_ctx, ht);
+
+ out->push_tail(copy);
+ }
+
+ /* Make a pass over the cloned tree to fix up ir_call nodes to point to the
+ * cloned ir_function_signature nodes. This cannot be done automatically
+ * during cloning because the ir_call might be a forward reference (i.e.,
+ * the function signature that it references may not have been cloned yet).
+ */
+ fixup_function_calls(ht, out);
+
+ hash_table_dtor(ht);
+ }
--- /dev/null
- LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040,
- LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080,
+ /*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+ /**
+ * \file ir_optimization.h
+ *
+ * Prototypes for optimization passes to be called by the compiler and drivers.
+ */
+
+ /* Operations for lower_instructions() */
+ #define SUB_TO_ADD_NEG 0x01
+ #define DIV_TO_MUL_RCP 0x02
+ #define EXP_TO_EXP2 0x04
+ #define POW_TO_EXP2 0x08
+ #define LOG_TO_LOG2 0x10
+ #define MOD_TO_FLOOR 0x20
+ #define INT_DIV_TO_MUL_RCP 0x40
+ #define LDEXP_TO_ARITH 0x80
+ #define CARRY_TO_ARITH 0x100
+ #define BORROW_TO_ARITH 0x200
+ #define SAT_TO_CLAMP 0x400
+ #define DOPS_TO_DFRAC 0x800
+ #define DFREXP_DLDEXP_TO_ARITH 0x1000
+
+ /**
+ * \see class lower_packing_builtins_visitor
+ */
+ enum lower_packing_builtins_op {
+ LOWER_PACK_UNPACK_NONE = 0x0000,
+
+ LOWER_PACK_SNORM_2x16 = 0x0001,
+ LOWER_UNPACK_SNORM_2x16 = 0x0002,
+
+ LOWER_PACK_UNORM_2x16 = 0x0004,
+ LOWER_UNPACK_UNORM_2x16 = 0x0008,
+
+ LOWER_PACK_HALF_2x16 = 0x0010,
+ LOWER_UNPACK_HALF_2x16 = 0x0020,
+
- LOWER_PACK_SNORM_4x8 = 0x0100,
- LOWER_UNPACK_SNORM_4x8 = 0x0200,
++ LOWER_PACK_SNORM_4x8 = 0x0040,
++ LOWER_UNPACK_SNORM_4x8 = 0x0080,
+
- LOWER_PACK_UNORM_4x8 = 0x0400,
- LOWER_UNPACK_UNORM_4x8 = 0x0800,
-
- LOWER_PACK_USE_BFI = 0x1000,
- LOWER_PACK_USE_BFE = 0x2000,
++ LOWER_PACK_UNORM_4x8 = 0x0100,
++ LOWER_UNPACK_UNORM_4x8 = 0x0200,
+
++ LOWER_PACK_USE_BFI = 0x0400,
++ LOWER_PACK_USE_BFE = 0x0800,
+ };
+
+ bool do_common_optimization(exec_list *ir, bool linked,
+ bool uniform_locations_assigned,
+ const struct gl_shader_compiler_options *options,
+ bool native_integers);
+
+ bool do_rebalance_tree(exec_list *instructions);
+ bool do_algebraic(exec_list *instructions, bool native_integers,
+ const struct gl_shader_compiler_options *options);
+ bool opt_conditional_discard(exec_list *instructions);
+ bool do_constant_folding(exec_list *instructions);
+ bool do_constant_variable(exec_list *instructions);
+ bool do_constant_variable_unlinked(exec_list *instructions);
+ bool do_copy_propagation(exec_list *instructions);
+ bool do_copy_propagation_elements(exec_list *instructions);
+ bool do_constant_propagation(exec_list *instructions);
+ void do_dead_builtin_varyings(struct gl_context *ctx,
+ gl_shader *producer, gl_shader *consumer,
+ unsigned num_tfeedback_decls,
+ class tfeedback_decl *tfeedback_decls);
+ bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned);
+ bool do_dead_code_local(exec_list *instructions);
+ bool do_dead_code_unlinked(exec_list *instructions);
+ bool do_dead_functions(exec_list *instructions);
+ bool opt_flip_matrices(exec_list *instructions);
+ bool do_function_inlining(exec_list *instructions);
+ bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
+ bool do_lower_texture_projection(exec_list *instructions);
+ bool do_if_simplification(exec_list *instructions);
+ bool opt_flatten_nested_if_blocks(exec_list *instructions);
+ bool do_discard_simplification(exec_list *instructions);
+ bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0);
+ bool do_mat_op_to_vec(exec_list *instructions);
+ bool do_minmax_prune(exec_list *instructions);
+ bool do_noop_swizzle(exec_list *instructions);
+ bool do_structure_splitting(exec_list *instructions);
+ bool do_swizzle_swizzle(exec_list *instructions);
+ bool do_vectorize(exec_list *instructions);
+ bool do_tree_grafting(exec_list *instructions);
+ bool do_vec_index_to_cond_assign(exec_list *instructions);
+ bool do_vec_index_to_swizzle(exec_list *instructions);
+ bool lower_discard(exec_list *instructions);
+ void lower_discard_flow(exec_list *instructions);
+ bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
+ bool lower_noise(exec_list *instructions);
+ bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
+ exec_list *instructions, bool lower_input, bool lower_output,
+ bool lower_temp, bool lower_uniform);
+ bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
+ bool lower_const_arrays_to_uniforms(exec_list *instructions);
+ bool lower_clip_distance(gl_shader *shader);
+ void lower_output_reads(unsigned stage, exec_list *instructions);
+ bool lower_packing_builtins(exec_list *instructions, int op_mask);
+ void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
+ void lower_ubo_reference(struct gl_shader *shader);
+ void lower_packed_varyings(void *mem_ctx,
+ unsigned locations_used, ir_variable_mode mode,
+ unsigned gs_input_vertices, gl_shader *shader);
+ bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
+ bool lower_vector_derefs(gl_shader *shader);
+ void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
+ bool optimize_redundant_jumps(exec_list *instructions);
+ bool optimize_split_arrays(exec_list *instructions, bool linked);
+ bool lower_offset_arrays(exec_list *instructions);
+ void optimize_dead_builtin_variables(exec_list *instructions,
+ enum ir_variable_mode other);
+ bool lower_tess_level(gl_shader *shader);
+
+ bool lower_vertex_id(gl_shader *shader);
+
+ bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
+
+ ir_rvalue *
+ compare_index_block(exec_list *instructions, ir_variable *index,
+ unsigned base, unsigned components, void *mem_ctx);
--- /dev/null
- case ir_unop_unpack_half_2x16_split_x:
- case ir_unop_unpack_half_2x16_split_y:
- assert(ir->type == glsl_type::float_type);
- assert(ir->operands[0]->type == glsl_type::uint_type);
- break;
-
+ /*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file ir_validate.cpp
+ *
+ * Attempts to verify that various invariants of the IR tree are true.
+ *
+ * In particular, at the moment it makes sure that no single
+ * ir_instruction node except for ir_variable appears multiple times
+ * in the ir tree. ir_variable does appear multiple times: Once as a
+ * declaration in an exec_list, and multiple times as the endpoint of
+ * a dereference chain.
+ */
+
+ #include "ir.h"
+ #include "ir_hierarchical_visitor.h"
+ #include "util/hash_table.h"
+ #include "util/set.h"
+ #include "compiler/glsl_types.h"
+
+ namespace {
+
+ class ir_validate : public ir_hierarchical_visitor {
+ public:
+ ir_validate()
+ {
+ this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ this->current_function = NULL;
+
+ this->callback_enter = ir_validate::validate_ir;
+ this->data_enter = ir_set;
+ }
+
+ ~ir_validate()
+ {
+ _mesa_set_destroy(this->ir_set, NULL);
+ }
+
+ virtual ir_visitor_status visit(ir_variable *v);
+ virtual ir_visitor_status visit(ir_dereference_variable *ir);
+
+ virtual ir_visitor_status visit_enter(ir_discard *ir);
+ virtual ir_visitor_status visit_enter(ir_if *ir);
+
+ virtual ir_visitor_status visit_enter(ir_function *ir);
+ virtual ir_visitor_status visit_leave(ir_function *ir);
+ virtual ir_visitor_status visit_enter(ir_function_signature *ir);
+
+ virtual ir_visitor_status visit_leave(ir_expression *ir);
+ virtual ir_visitor_status visit_leave(ir_swizzle *ir);
+
+ virtual ir_visitor_status visit_enter(class ir_dereference_array *);
+
+ virtual ir_visitor_status visit_enter(ir_assignment *ir);
+ virtual ir_visitor_status visit_enter(ir_call *ir);
+
+ static void validate_ir(ir_instruction *ir, void *data);
+
+ ir_function *current_function;
+
+ struct set *ir_set;
+ };
+
+ } /* anonymous namespace */
+
+ ir_visitor_status
+ ir_validate::visit(ir_dereference_variable *ir)
+ {
+ if ((ir->var == NULL) || (ir->var->as_variable() == NULL)) {
+ printf("ir_dereference_variable @ %p does not specify a variable %p\n",
+ (void *) ir, (void *) ir->var);
+ abort();
+ }
+
+ if (_mesa_set_search(ir_set, ir->var) == NULL) {
+ printf("ir_dereference_variable @ %p specifies undeclared variable "
+ "`%s' @ %p\n",
+ (void *) ir, ir->var->name, (void *) ir->var);
+ abort();
+ }
+
+ this->validate_ir(ir, this->data_enter);
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_enter(class ir_dereference_array *ir)
+ {
+ if (!ir->array->type->is_array() && !ir->array->type->is_matrix() &&
+ !ir->array->type->is_vector()) {
+ printf("ir_dereference_array @ %p does not specify an array, a vector "
+ "or a matrix\n",
+ (void *) ir);
+ ir->print();
+ printf("\n");
+ abort();
+ }
+
+ if (!ir->array_index->type->is_scalar()) {
+ printf("ir_dereference_array @ %p does not have scalar index: %s\n",
+ (void *) ir, ir->array_index->type->name);
+ abort();
+ }
+
+ if (!ir->array_index->type->is_integer()) {
+ printf("ir_dereference_array @ %p does not have integer index: %s\n",
+ (void *) ir, ir->array_index->type->name);
+ abort();
+ }
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_enter(ir_discard *ir)
+ {
+ if (ir->condition && ir->condition->type != glsl_type::bool_type) {
+ printf("ir_discard condition %s type instead of bool.\n",
+ ir->condition->type->name);
+ ir->print();
+ printf("\n");
+ abort();
+ }
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_enter(ir_if *ir)
+ {
+ if (ir->condition->type != glsl_type::bool_type) {
+ printf("ir_if condition %s type instead of bool.\n",
+ ir->condition->type->name);
+ ir->print();
+ printf("\n");
+ abort();
+ }
+
+ return visit_continue;
+ }
+
+
+ ir_visitor_status
+ ir_validate::visit_enter(ir_function *ir)
+ {
+ /* Function definitions cannot be nested.
+ */
+ if (this->current_function != NULL) {
+ printf("Function definition nested inside another function "
+ "definition:\n");
+ printf("%s %p inside %s %p\n",
+ ir->name, (void *) ir,
+ this->current_function->name, (void *) this->current_function);
+ abort();
+ }
+
+ /* Store the current function hierarchy being traversed. This is used
+ * by the function signature visitor to ensure that the signatures are
+ * linked with the correct functions.
+ */
+ this->current_function = ir;
+
+ this->validate_ir(ir, this->data_enter);
+
+ /* Verify that all of the things stored in the list of signatures are,
+ * in fact, function signatures.
+ */
+ foreach_in_list(ir_instruction, sig, &ir->signatures) {
+ if (sig->ir_type != ir_type_function_signature) {
+ printf("Non-signature in signature list of function `%s'\n",
+ ir->name);
+ abort();
+ }
+ }
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_leave(ir_function *ir)
+ {
+ assert(ralloc_parent(ir->name) == ir);
+
+ this->current_function = NULL;
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_enter(ir_function_signature *ir)
+ {
+ if (this->current_function != ir->function()) {
+ printf("Function signature nested inside wrong function "
+ "definition:\n");
+ printf("%p inside %s %p instead of %s %p\n",
+ (void *) ir,
+ this->current_function->name, (void *) this->current_function,
+ ir->function_name(), (void *) ir->function());
+ abort();
+ }
+
+ if (ir->return_type == NULL) {
+ printf("Function signature %p for function %s has NULL return type.\n",
+ (void *) ir, ir->function_name());
+ abort();
+ }
+
+ this->validate_ir(ir, this->data_enter);
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_leave(ir_expression *ir)
+ {
+ switch (ir->operation) {
+ case ir_unop_bit_not:
+ assert(ir->operands[0]->type == ir->type);
+ break;
+ case ir_unop_logic_not:
+ assert(ir->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+ break;
+
+ case ir_unop_neg:
+ case ir_unop_abs:
+ case ir_unop_sign:
+ case ir_unop_rcp:
+ case ir_unop_rsq:
+ case ir_unop_sqrt:
+ assert(ir->type == ir->operands[0]->type);
+ break;
+
+ case ir_unop_exp:
+ case ir_unop_log:
+ case ir_unop_exp2:
+ case ir_unop_log2:
+ case ir_unop_saturate:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->type == ir->operands[0]->type);
+ break;
+
+ case ir_unop_f2i:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+ case ir_unop_f2u:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->type->base_type == GLSL_TYPE_UINT);
+ break;
+ case ir_unop_i2f:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+ break;
+ case ir_unop_f2b:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->type->base_type == GLSL_TYPE_BOOL);
+ break;
+ case ir_unop_b2f:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+ break;
+ case ir_unop_i2b:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+ assert(ir->type->base_type == GLSL_TYPE_BOOL);
+ break;
+ case ir_unop_b2i:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+ case ir_unop_u2f:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+ break;
+ case ir_unop_i2u:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+ assert(ir->type->base_type == GLSL_TYPE_UINT);
+ break;
+ case ir_unop_u2i:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+ case ir_unop_bitcast_i2f:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+ break;
+ case ir_unop_bitcast_f2i:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+ case ir_unop_bitcast_u2f:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+ break;
+ case ir_unop_bitcast_f2u:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->type->base_type == GLSL_TYPE_UINT);
+ break;
+
+ case ir_unop_trunc:
+ case ir_unop_round_even:
+ case ir_unop_ceil:
+ case ir_unop_floor:
+ case ir_unop_fract:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+ ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->operands[0]->type == ir->type);
+ break;
+ case ir_unop_sin:
+ case ir_unop_cos:
+ case ir_unop_dFdx:
+ case ir_unop_dFdx_coarse:
+ case ir_unop_dFdx_fine:
+ case ir_unop_dFdy:
+ case ir_unop_dFdy_coarse:
+ case ir_unop_dFdy_fine:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->operands[0]->type == ir->type);
+ break;
+
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_half_2x16:
+ assert(ir->type == glsl_type::uint_type);
+ assert(ir->operands[0]->type == glsl_type::vec2_type);
+ break;
+
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_4x8:
+ assert(ir->type == glsl_type::uint_type);
+ assert(ir->operands[0]->type == glsl_type::vec4_type);
+ break;
+
+ case ir_unop_pack_double_2x32:
+ assert(ir->type == glsl_type::double_type);
+ assert(ir->operands[0]->type == glsl_type::uvec2_type);
+ break;
+
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ assert(ir->type == glsl_type::vec2_type);
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+ break;
+
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_4x8:
+ assert(ir->type == glsl_type::vec4_type);
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+ break;
+
- case ir_binop_pack_half_2x16_split:
- assert(ir->type == glsl_type::uint_type);
- assert(ir->operands[0]->type == glsl_type::float_type);
- assert(ir->operands[1]->type == glsl_type::float_type);
- break;
-
+ case ir_unop_unpack_double_2x32:
+ assert(ir->type == glsl_type::uvec2_type);
+ assert(ir->operands[0]->type == glsl_type::double_type);
+ break;
+
+ case ir_unop_bitfield_reverse:
+ assert(ir->operands[0]->type == ir->type);
+ assert(ir->type->is_integer());
+ break;
+
+ case ir_unop_bit_count:
+ case ir_unop_find_msb:
+ case ir_unop_find_lsb:
+ assert(ir->operands[0]->type->vector_elements == ir->type->vector_elements);
+ assert(ir->operands[0]->type->is_integer());
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+
+ case ir_unop_noise:
+ /* XXX what can we assert here? */
+ break;
+
+ case ir_unop_interpolate_at_centroid:
+ assert(ir->operands[0]->type == ir->type);
+ assert(ir->operands[0]->type->is_float());
+ break;
+
+ case ir_unop_get_buffer_size:
+ assert(ir->type == glsl_type::int_type);
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+ break;
+
+ case ir_unop_ssbo_unsized_array_length:
+ assert(ir->type == glsl_type::int_type);
+ assert(ir->operands[0]->type->is_array());
+ assert(ir->operands[0]->type->is_unsized_array());
+ break;
+
+ case ir_unop_d2f:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+ break;
+ case ir_unop_f2d:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+ break;
+ case ir_unop_d2i:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+ case ir_unop_i2d:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+ assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+ break;
+ case ir_unop_d2u:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->type->base_type == GLSL_TYPE_UINT);
+ break;
+ case ir_unop_u2d:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+ assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+ break;
+ case ir_unop_d2b:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->type->base_type == GLSL_TYPE_BOOL);
+ break;
+
+ case ir_unop_frexp_sig:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+ ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+ break;
+ case ir_unop_frexp_exp:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+ ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+ case ir_unop_subroutine_to_int:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_SUBROUTINE);
+ assert(ir->type->base_type == GLSL_TYPE_INT);
+ break;
+ case ir_binop_add:
+ case ir_binop_sub:
+ case ir_binop_mul:
+ case ir_binop_div:
+ case ir_binop_mod:
+ case ir_binop_min:
+ case ir_binop_max:
+ case ir_binop_pow:
+ assert(ir->operands[0]->type->base_type ==
+ ir->operands[1]->type->base_type);
+
+ if (ir->operands[0]->type->is_scalar())
+ assert(ir->operands[1]->type == ir->type);
+ else if (ir->operands[1]->type->is_scalar())
+ assert(ir->operands[0]->type == ir->type);
+ else if (ir->operands[0]->type->is_vector() &&
+ ir->operands[1]->type->is_vector()) {
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ assert(ir->operands[0]->type == ir->type);
+ }
+ break;
+
+ case ir_binop_imul_high:
+ assert(ir->type == ir->operands[0]->type);
+ assert(ir->type == ir->operands[1]->type);
+ assert(ir->type->is_integer());
+ break;
+
+ case ir_binop_carry:
+ case ir_binop_borrow:
+ assert(ir->type == ir->operands[0]->type);
+ assert(ir->type == ir->operands[1]->type);
+ assert(ir->type->base_type == GLSL_TYPE_UINT);
+ break;
+
+ case ir_binop_less:
+ case ir_binop_greater:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ /* The semantics of the IR operators differ from the GLSL <, >, <=, >=,
+ * ==, and != operators. The IR operators perform a component-wise
+ * comparison on scalar or vector types and return a boolean scalar or
+ * vector type of the same size.
+ */
+ assert(ir->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ assert(ir->operands[0]->type->is_vector()
+ || ir->operands[0]->type->is_scalar());
+ assert(ir->operands[0]->type->vector_elements
+ == ir->type->vector_elements);
+ break;
+
+ case ir_binop_all_equal:
+ case ir_binop_any_nequal:
+ /* GLSL == and != operate on scalars, vectors, matrices and arrays, and
+ * return a scalar boolean. The IR matches that.
+ */
+ assert(ir->type == glsl_type::bool_type);
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ break;
+
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ assert(ir->operands[0]->type->is_integer() &&
+ ir->operands[1]->type->is_integer());
+ if (ir->operands[0]->type->is_scalar()) {
+ assert(ir->operands[1]->type->is_scalar());
+ }
+ if (ir->operands[0]->type->is_vector() &&
+ ir->operands[1]->type->is_vector()) {
+ assert(ir->operands[0]->type->components() ==
+ ir->operands[1]->type->components());
+ }
+ assert(ir->type == ir->operands[0]->type);
+ break;
+
+ case ir_binop_bit_and:
+ case ir_binop_bit_xor:
+ case ir_binop_bit_or:
+ assert(ir->operands[0]->type->base_type ==
+ ir->operands[1]->type->base_type);
+ assert(ir->type->is_integer());
+ if (ir->operands[0]->type->is_vector() &&
+ ir->operands[1]->type->is_vector()) {
+ assert(ir->operands[0]->type->vector_elements ==
+ ir->operands[1]->type->vector_elements);
+ }
+ break;
+
+ case ir_binop_logic_and:
+ case ir_binop_logic_xor:
+ case ir_binop_logic_or:
+ assert(ir->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL);
+ break;
+
+ case ir_binop_dot:
+ assert(ir->type == glsl_type::float_type ||
+ ir->type == glsl_type::double_type);
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+ ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ break;
+
+ case ir_binop_ubo_load:
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+
+ assert(ir->operands[1]->type == glsl_type::uint_type);
+ break;
+
+ case ir_binop_ldexp:
+ assert(ir->operands[0]->type == ir->type);
+ assert(ir->operands[0]->type->is_float() ||
+ ir->operands[0]->type->is_double());
+ assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT);
+ assert(ir->operands[0]->type->components() ==
+ ir->operands[1]->type->components());
+ break;
+
+ case ir_binop_vector_extract:
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[1]->type->is_scalar()
+ && ir->operands[1]->type->is_integer());
+ break;
+
+ case ir_binop_interpolate_at_offset:
+ assert(ir->operands[0]->type == ir->type);
+ assert(ir->operands[0]->type->is_float());
+ assert(ir->operands[1]->type->components() == 2);
+ assert(ir->operands[1]->type->is_float());
+ break;
+
+ case ir_binop_interpolate_at_sample:
+ assert(ir->operands[0]->type == ir->type);
+ assert(ir->operands[0]->type->is_float());
+ assert(ir->operands[1]->type == glsl_type::int_type);
+ break;
+
+ case ir_triop_fma:
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT ||
+ ir->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->type == ir->operands[0]->type);
+ assert(ir->type == ir->operands[1]->type);
+ assert(ir->type == ir->operands[2]->type);
+ break;
+
+ case ir_triop_lrp:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+ ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ assert(ir->operands[2]->type == ir->operands[0]->type ||
+ ir->operands[2]->type == glsl_type::float_type ||
+ ir->operands[2]->type == glsl_type::double_type);
+ break;
+
+ case ir_triop_csel:
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->type->vector_elements == ir->operands[0]->type->vector_elements);
+ assert(ir->type == ir->operands[1]->type);
+ assert(ir->type == ir->operands[2]->type);
+ break;
+
+ case ir_triop_bitfield_extract:
+ assert(ir->type->is_integer());
+ assert(ir->operands[0]->type == ir->type);
+ assert(ir->operands[1]->type == ir->type);
+ assert(ir->operands[2]->type == ir->type);
+ break;
+
+ case ir_triop_vector_insert:
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[1]->type->is_scalar());
+ assert(ir->operands[0]->type->base_type == ir->operands[1]->type->base_type);
+ assert(ir->operands[2]->type->is_scalar()
+ && ir->operands[2]->type->is_integer());
+ assert(ir->type == ir->operands[0]->type);
+ break;
+
+ case ir_quadop_bitfield_insert:
+ assert(ir->type->is_integer());
+ assert(ir->operands[0]->type == ir->type);
+ assert(ir->operands[1]->type == ir->type);
+ assert(ir->operands[2]->type == ir->type);
+ assert(ir->operands[3]->type == ir->type);
+ break;
+
+ case ir_quadop_vector:
+ /* The vector operator collects some number of scalars and generates a
+ * vector from them.
+ *
+ * - All of the operands must be scalar.
+ * - Number of operands must matche the size of the resulting vector.
+ * - Base type of the operands must match the base type of the result.
+ */
+ assert(ir->type->is_vector());
+ switch (ir->type->vector_elements) {
+ case 2:
+ assert(ir->operands[0]->type->is_scalar());
+ assert(ir->operands[0]->type->base_type == ir->type->base_type);
+ assert(ir->operands[1]->type->is_scalar());
+ assert(ir->operands[1]->type->base_type == ir->type->base_type);
+ assert(ir->operands[2] == NULL);
+ assert(ir->operands[3] == NULL);
+ break;
+ case 3:
+ assert(ir->operands[0]->type->is_scalar());
+ assert(ir->operands[0]->type->base_type == ir->type->base_type);
+ assert(ir->operands[1]->type->is_scalar());
+ assert(ir->operands[1]->type->base_type == ir->type->base_type);
+ assert(ir->operands[2]->type->is_scalar());
+ assert(ir->operands[2]->type->base_type == ir->type->base_type);
+ assert(ir->operands[3] == NULL);
+ break;
+ case 4:
+ assert(ir->operands[0]->type->is_scalar());
+ assert(ir->operands[0]->type->base_type == ir->type->base_type);
+ assert(ir->operands[1]->type->is_scalar());
+ assert(ir->operands[1]->type->base_type == ir->type->base_type);
+ assert(ir->operands[2]->type->is_scalar());
+ assert(ir->operands[2]->type->base_type == ir->type->base_type);
+ assert(ir->operands[3]->type->is_scalar());
+ assert(ir->operands[3]->type->base_type == ir->type->base_type);
+ break;
+ default:
+ /* The is_vector assertion above should prevent execution from ever
+ * getting here.
+ */
+ assert(!"Should not get here.");
+ break;
+ }
+ }
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_leave(ir_swizzle *ir)
+ {
+ unsigned int chans[4] = {ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w};
+
+ for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+ if (chans[i] >= ir->val->type->vector_elements) {
+ printf("ir_swizzle @ %p specifies a channel not present "
+ "in the value.\n", (void *) ir);
+ ir->print();
+ abort();
+ }
+ }
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit(ir_variable *ir)
+ {
+ /* An ir_variable is the one thing that can (and will) appear multiple times
+ * in an IR tree. It is added to the hashtable so that it can be used
+ * in the ir_dereference_variable handler to ensure that a variable is
+ * declared before it is dereferenced.
+ */
+ if (ir->name && ir->is_name_ralloced())
+ assert(ralloc_parent(ir->name) == ir);
+
+ _mesa_set_add(ir_set, ir);
+
+ /* If a variable is an array, verify that the maximum array index is in
+ * bounds. There was once an error in AST-to-HIR conversion that set this
+ * to be out of bounds.
+ */
+ if (ir->type->array_size() > 0) {
+ if (ir->data.max_array_access >= ir->type->length) {
+ printf("ir_variable has maximum access out of bounds (%d vs %d)\n",
+ ir->data.max_array_access, ir->type->length - 1);
+ ir->print();
+ abort();
+ }
+ }
+
+ /* If a variable is an interface block (or an array of interface blocks),
+ * verify that the maximum array index for each interface member is in
+ * bounds.
+ */
+ if (ir->is_interface_instance()) {
+ const glsl_struct_field *fields =
+ ir->get_interface_type()->fields.structure;
+ for (unsigned i = 0; i < ir->get_interface_type()->length; i++) {
+ if (fields[i].type->array_size() > 0) {
+ const unsigned *const max_ifc_array_access =
+ ir->get_max_ifc_array_access();
+
+ assert(max_ifc_array_access != NULL);
+
+ if (max_ifc_array_access[i] >= fields[i].type->length) {
+ printf("ir_variable has maximum access out of bounds for "
+ "field %s (%d vs %d)\n", fields[i].name,
+ max_ifc_array_access[i], fields[i].type->length);
+ ir->print();
+ abort();
+ }
+ }
+ }
+ }
+
+ if (ir->constant_initializer != NULL && !ir->data.has_initializer) {
+ printf("ir_variable didn't have an initializer, but has a constant "
+ "initializer value.\n");
+ ir->print();
+ abort();
+ }
+
+ if (ir->data.mode == ir_var_uniform
+ && is_gl_identifier(ir->name)
+ && ir->get_state_slots() == NULL) {
+ printf("built-in uniform has no state\n");
+ ir->print();
+ abort();
+ }
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_enter(ir_assignment *ir)
+ {
+ const ir_dereference *const lhs = ir->lhs;
+ if (lhs->type->is_scalar() || lhs->type->is_vector()) {
+ if (ir->write_mask == 0) {
+ printf("Assignment LHS is %s, but write mask is 0:\n",
+ lhs->type->is_scalar() ? "scalar" : "vector");
+ ir->print();
+ abort();
+ }
+
+ int lhs_components = 0;
+ for (int i = 0; i < 4; i++) {
+ if (ir->write_mask & (1 << i))
+ lhs_components++;
+ }
+
+ if (lhs_components != ir->rhs->type->vector_elements) {
+ printf("Assignment count of LHS write mask channels enabled not\n"
+ "matching RHS vector size (%d LHS, %d RHS).\n",
+ lhs_components, ir->rhs->type->vector_elements);
+ ir->print();
+ abort();
+ }
+ }
+
+ this->validate_ir(ir, this->data_enter);
+
+ return visit_continue;
+ }
+
+ ir_visitor_status
+ ir_validate::visit_enter(ir_call *ir)
+ {
+ ir_function_signature *const callee = ir->callee;
+
+ if (callee->ir_type != ir_type_function_signature) {
+ printf("IR called by ir_call is not ir_function_signature!\n");
+ abort();
+ }
+
+ if (ir->return_deref) {
+ if (ir->return_deref->type != callee->return_type) {
+ printf("callee type %s does not match return storage type %s\n",
+ callee->return_type->name, ir->return_deref->type->name);
+ abort();
+ }
+ } else if (callee->return_type != glsl_type::void_type) {
+ printf("ir_call has non-void callee but no return storage\n");
+ abort();
+ }
+
+ const exec_node *formal_param_node = callee->parameters.head;
+ const exec_node *actual_param_node = ir->actual_parameters.head;
+ while (true) {
+ if (formal_param_node->is_tail_sentinel()
+ != actual_param_node->is_tail_sentinel()) {
+ printf("ir_call has the wrong number of parameters:\n");
+ goto dump_ir;
+ }
+ if (formal_param_node->is_tail_sentinel()) {
+ break;
+ }
+ const ir_variable *formal_param
+ = (const ir_variable *) formal_param_node;
+ const ir_rvalue *actual_param
+ = (const ir_rvalue *) actual_param_node;
+ if (formal_param->type != actual_param->type) {
+ printf("ir_call parameter type mismatch:\n");
+ goto dump_ir;
+ }
+ if (formal_param->data.mode == ir_var_function_out
+ || formal_param->data.mode == ir_var_function_inout) {
+ if (!actual_param->is_lvalue()) {
+ printf("ir_call out/inout parameters must be lvalues:\n");
+ goto dump_ir;
+ }
+ }
+ formal_param_node = formal_param_node->next;
+ actual_param_node = actual_param_node->next;
+ }
+
+ return visit_continue;
+
+ dump_ir:
+ ir->print();
+ printf("callee:\n");
+ callee->print();
+ abort();
+ return visit_stop;
+ }
+
+ void
+ ir_validate::validate_ir(ir_instruction *ir, void *data)
+ {
+ struct set *ir_set = (struct set *) data;
+
+ if (_mesa_set_search(ir_set, ir)) {
+ printf("Instruction node present twice in ir tree:\n");
+ ir->print();
+ printf("\n");
+ abort();
+ }
+ _mesa_set_add(ir_set, ir);
+ }
+
+ void
+ check_node_type(ir_instruction *ir, void *data)
+ {
+ (void) data;
+
+ if (ir->ir_type >= ir_type_max) {
+ printf("Instruction node with unset type\n");
+ ir->print(); printf("\n");
+ }
+ ir_rvalue *value = ir->as_rvalue();
+ if (value != NULL)
+ assert(value->type != glsl_type::error_type);
+ }
+
+ void
+ validate_ir_tree(exec_list *instructions)
+ {
+ /* We shouldn't have any reason to validate IR in a release build,
+ * and it's half composed of assert()s anyway which wouldn't do
+ * anything.
+ */
+ #ifdef DEBUG
+ ir_validate v;
+
+ v.run(instructions);
+
+ foreach_in_list(ir_instruction, ir, instructions) {
+ visit_tree(ir, check_node_type, NULL);
+ }
+ #endif
+ }
--- /dev/null
+ /*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #include "main/core.h"
+ #include "ir.h"
+ #include "linker.h"
+ #include "ir_uniform.h"
+
+ /* These functions are put in a "private" namespace instead of being marked
+ * static so that the unit tests can access them. See
+ * http://code.google.com/p/googletest/wiki/AdvancedGuide#Testing_Private_Code
+ */
+ namespace linker {
+
+ gl_uniform_storage *
+ get_storage(gl_uniform_storage *storage, unsigned num_storage,
+ const char *name)
+ {
+ for (unsigned int i = 0; i < num_storage; i++) {
+ if (strcmp(name, storage[i].name) == 0)
+ return &storage[i];
+ }
+
+ return NULL;
+ }
+
+ static unsigned
+ get_uniform_block_index(const gl_shader_program *shProg,
+ const char *uniformBlockName)
+ {
+ for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+ if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName))
+ return i;
+ }
+
+ return GL_INVALID_INDEX;
+ }
+
+ void
+ copy_constant_to_storage(union gl_constant_value *storage,
+ const ir_constant *val,
+ const enum glsl_base_type base_type,
+ const unsigned int elements,
+ unsigned int boolean_true)
+ {
+ for (unsigned int i = 0; i < elements; i++) {
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ storage[i].u = val->value.u[i];
+ break;
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_SAMPLER:
+ storage[i].i = val->value.i[i];
+ break;
+ case GLSL_TYPE_FLOAT:
+ storage[i].f = val->value.f[i];
+ break;
+ case GLSL_TYPE_DOUBLE:
+ /* XXX need to check on big-endian */
+ storage[i * 2].u = *(uint32_t *)&val->value.d[i];
+ storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1);
+ break;
+ case GLSL_TYPE_BOOL:
+ storage[i].b = val->value.b[i] ? boolean_true : 0;
+ break;
+ case GLSL_TYPE_ARRAY:
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_INTERFACE:
++ case GLSL_TYPE_FUNCTION:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_ERROR:
+ /* All other types should have already been filtered by other
+ * paths in the caller.
+ */
+ assert(!"Should not get here.");
+ break;
+ }
+ }
+ }
+
+ /**
+ * Initialize an opaque uniform from the value of an explicit binding
+ * qualifier specified in the shader. Atomic counters are different because
+ * they have no storage and should be handled elsewhere.
+ */
+ void
+ set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
+ const glsl_type *type, const char *name, int *binding)
+ {
+
+ if (type->is_array() && type->fields.array->is_array()) {
+ const glsl_type *const element_type = type->fields.array;
+
+ for (unsigned int i = 0; i < type->length; i++) {
+ const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
+
+ set_opaque_binding(mem_ctx, prog, element_type,
+ element_name, binding);
+ }
+ } else {
+ struct gl_uniform_storage *const storage =
+ get_storage(prog->UniformStorage, prog->NumUniformStorage, name);
+
+ if (storage == NULL) {
+ assert(storage != NULL);
+ return;
+ }
+
+ const unsigned elements = MAX2(storage->array_elements, 1);
+
+ /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec
+ * says:
+ *
+ * "If the binding identifier is used with an array, the first element
+ * of the array takes the specified unit and each subsequent element
+ * takes the next consecutive unit."
+ */
+ for (unsigned int i = 0; i < elements; i++) {
+ storage->storage[i].i = (*binding)++;
+ }
+
+ for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
+ gl_shader *shader = prog->_LinkedShaders[sh];
+
+ if (shader) {
+ if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
+ storage->opaque[sh].active) {
+ for (unsigned i = 0; i < elements; i++) {
+ const unsigned index = storage->opaque[sh].index + i;
+ shader->SamplerUnits[index] = storage->storage[i].i;
+ }
+
+ } else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
+ storage->opaque[sh].active) {
+ for (unsigned i = 0; i < elements; i++) {
+ const unsigned index = storage->opaque[sh].index + i;
+ shader->ImageUnits[index] = storage->storage[i].i;
+ }
+ }
+ }
+ }
+
+ storage->initialized = true;
+ }
+ }
+
+ void
+ set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
+ {
+ const unsigned block_index = get_uniform_block_index(prog, block_name);
+
+ if (block_index == GL_INVALID_INDEX) {
+ assert(block_index != GL_INVALID_INDEX);
+ return;
+ }
+
+ /* This is a field of a UBO. val is the binding index. */
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ int stage_index = prog->InterfaceBlockStageIndex[i][block_index];
+
+ if (stage_index != -1) {
+ struct gl_shader *sh = prog->_LinkedShaders[i];
+ sh->BufferInterfaceBlocks[stage_index].Binding = binding;
+ }
+ }
+ }
+
+ void
+ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
+ const char *name, const glsl_type *type,
+ ir_constant *val, unsigned int boolean_true)
+ {
+ const glsl_type *t_without_array = type->without_array();
+ if (type->is_record()) {
+ ir_constant *field_constant;
+
+ field_constant = (ir_constant *)val->components.get_head();
+
+ for (unsigned int i = 0; i < type->length; i++) {
+ const glsl_type *field_type = type->fields.structure[i].type;
+ const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
+ type->fields.structure[i].name);
+ set_uniform_initializer(mem_ctx, prog, field_name,
+ field_type, field_constant, boolean_true);
+ field_constant = (ir_constant *)field_constant->next;
+ }
+ return;
+ } else if (t_without_array->is_record() ||
+ (type->is_array() && type->fields.array->is_array())) {
+ const glsl_type *const element_type = type->fields.array;
+
+ for (unsigned int i = 0; i < type->length; i++) {
+ const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
+
+ set_uniform_initializer(mem_ctx, prog, element_name,
+ element_type, val->array_elements[i],
+ boolean_true);
+ }
+ return;
+ }
+
+ struct gl_uniform_storage *const storage =
+ get_storage(prog->UniformStorage,
+ prog->NumUniformStorage,
+ name);
+ if (storage == NULL) {
+ assert(storage != NULL);
+ return;
+ }
+
+ if (val->type->is_array()) {
+ const enum glsl_base_type base_type =
+ val->array_elements[0]->type->base_type;
+ const unsigned int elements = val->array_elements[0]->type->components();
+ unsigned int idx = 0;
+ unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1;
+
+ assert(val->type->length >= storage->array_elements);
+ for (unsigned int i = 0; i < storage->array_elements; i++) {
+ copy_constant_to_storage(& storage->storage[idx],
+ val->array_elements[i],
+ base_type,
+ elements,
+ boolean_true);
+
+ idx += elements * dmul;
+ }
+ } else {
+ copy_constant_to_storage(storage->storage,
+ val,
+ val->type->base_type,
+ val->type->components(),
+ boolean_true);
+
+ if (storage->type->is_sampler()) {
+ for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
+ gl_shader *shader = prog->_LinkedShaders[sh];
+
+ if (shader && storage->opaque[sh].active) {
+ unsigned index = storage->opaque[sh].index;
+
+ shader->SamplerUnits[index] = storage->storage[0].i;
+ }
+ }
+ }
+ }
+
+ storage->initialized = true;
+ }
+ }
+
+ void
+ link_set_uniform_initializers(struct gl_shader_program *prog,
+ unsigned int boolean_true)
+ {
+ void *mem_ctx = NULL;
+
+ for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct gl_shader *shader = prog->_LinkedShaders[i];
+
+ if (shader == NULL)
+ continue;
+
+ foreach_in_list(ir_instruction, node, shader->ir) {
+ ir_variable *const var = node->as_variable();
+
+ if (!var || (var->data.mode != ir_var_uniform &&
+ var->data.mode != ir_var_shader_storage))
+ continue;
+
+ if (!mem_ctx)
+ mem_ctx = ralloc_context(NULL);
+
+ if (var->data.explicit_binding) {
+ const glsl_type *const type = var->type;
+
+ if (type->without_array()->is_sampler() ||
+ type->without_array()->is_image()) {
+ int binding = var->data.binding;
+ linker::set_opaque_binding(mem_ctx, prog, var->type,
+ var->name, &binding);
+ } else if (var->is_in_buffer_block()) {
+ const glsl_type *const iface_type = var->get_interface_type();
+
+ /* If the variable is an array and it is an interface instance,
+ * we need to set the binding for each array element. Just
+ * checking that the variable is an array is not sufficient.
+ * The variable could be an array element of a uniform block
+ * that lacks an instance name. For example:
+ *
+ * uniform U {
+ * float f[4];
+ * };
+ *
+ * In this case "f" would pass is_in_buffer_block (above) and
+ * type->is_array(), but it will fail is_interface_instance().
+ */
+ if (var->is_interface_instance() && var->type->is_array()) {
+ for (unsigned i = 0; i < var->type->length; i++) {
+ const char *name =
+ ralloc_asprintf(mem_ctx, "%s[%u]", iface_type->name, i);
+
+ /* Section 4.4.3 (Uniform Block Layout Qualifiers) of the
+ * GLSL 4.20 spec says:
+ *
+ * "If the binding identifier is used with a uniform
+ * block instanced as an array then the first element
+ * of the array takes the specified block binding and
+ * each subsequent element takes the next consecutive
+ * uniform block binding point."
+ */
+ linker::set_block_binding(prog, name,
+ var->data.binding + i);
+ }
+ } else {
+ linker::set_block_binding(prog, iface_type->name,
+ var->data.binding);
+ }
+ } else if (type->contains_atomic()) {
+ /* we don't actually need to do anything. */
+ } else {
+ assert(!"Explicit binding not on a sampler, UBO or atomic.");
+ }
+ } else if (var->constant_initializer) {
+ linker::set_uniform_initializer(mem_ctx, prog, var->name,
+ var->type, var->constant_initializer,
+ boolean_true);
+ }
+ }
+ }
+
+ ralloc_free(mem_ctx);
+ }
--- /dev/null
- /* Mutually exclusive options. */
- assert(!((op_mask & LOWER_PACK_HALF_2x16) &&
- (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT)));
-
- assert(!((op_mask & LOWER_UNPACK_HALF_2x16) &&
- (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT)));
-
+ /*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #include "ir.h"
+ #include "ir_builder.h"
+ #include "ir_optimization.h"
+ #include "ir_rvalue_visitor.h"
+
+ namespace {
+
+ using namespace ir_builder;
+
+ /**
+ * A visitor that lowers built-in floating-point pack/unpack expressions
+ * such packSnorm2x16.
+ */
+ class lower_packing_builtins_visitor : public ir_rvalue_visitor {
+ public:
+ /**
+ * \param op_mask is a bitmask of `enum lower_packing_builtins_op`
+ */
+ explicit lower_packing_builtins_visitor(int op_mask)
+ : op_mask(op_mask),
+ progress(false)
+ {
- case LOWER_PACK_HALF_2x16_TO_SPLIT:
- *rvalue = split_pack_half_2x16(op0);
- break;
+ factory.instructions = &factory_instructions;
+ }
+
+ virtual ~lower_packing_builtins_visitor()
+ {
+ assert(factory_instructions.is_empty());
+ }
+
+ bool get_progress() { return progress; }
+
+ void handle_rvalue(ir_rvalue **rvalue)
+ {
+ if (!*rvalue)
+ return;
+
+ ir_expression *expr = (*rvalue)->as_expression();
+ if (!expr)
+ return;
+
+ enum lower_packing_builtins_op lowering_op =
+ choose_lowering_op(expr->operation);
+
+ if (lowering_op == LOWER_PACK_UNPACK_NONE)
+ return;
+
+ setup_factory(ralloc_parent(expr));
+
+ ir_rvalue *op0 = expr->operands[0];
+ ralloc_steal(factory.mem_ctx, op0);
+
+ switch (lowering_op) {
+ case LOWER_PACK_SNORM_2x16:
+ *rvalue = lower_pack_snorm_2x16(op0);
+ break;
+ case LOWER_PACK_SNORM_4x8:
+ *rvalue = lower_pack_snorm_4x8(op0);
+ break;
+ case LOWER_PACK_UNORM_2x16:
+ *rvalue = lower_pack_unorm_2x16(op0);
+ break;
+ case LOWER_PACK_UNORM_4x8:
+ *rvalue = lower_pack_unorm_4x8(op0);
+ break;
+ case LOWER_PACK_HALF_2x16:
+ *rvalue = lower_pack_half_2x16(op0);
+ break;
- case LOWER_UNPACK_HALF_2x16_TO_SPLIT:
- *rvalue = split_unpack_half_2x16(op0);
- break;
+ case LOWER_UNPACK_SNORM_2x16:
+ *rvalue = lower_unpack_snorm_2x16(op0);
+ break;
+ case LOWER_UNPACK_SNORM_4x8:
+ *rvalue = lower_unpack_snorm_4x8(op0);
+ break;
+ case LOWER_UNPACK_UNORM_2x16:
+ *rvalue = lower_unpack_unorm_2x16(op0);
+ break;
+ case LOWER_UNPACK_UNORM_4x8:
+ *rvalue = lower_unpack_unorm_4x8(op0);
+ break;
+ case LOWER_UNPACK_HALF_2x16:
+ *rvalue = lower_unpack_half_2x16(op0);
+ break;
- result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
+ case LOWER_PACK_UNPACK_NONE:
+ case LOWER_PACK_USE_BFI:
+ case LOWER_PACK_USE_BFE:
+ assert(!"not reached");
+ break;
+ }
+
+ teardown_factory();
+ progress = true;
+ }
+
+ private:
+ const int op_mask;
+ bool progress;
+ ir_factory factory;
+ exec_list factory_instructions;
+
+ /**
+ * Determine the needed lowering operation by filtering \a expr_op
+ * through \ref op_mask.
+ */
+ enum lower_packing_builtins_op
+ choose_lowering_op(ir_expression_operation expr_op)
+ {
+ /* C++ regards int and enum as fundamentally different types.
+ * So, we can't simply return from each case; we must cast the return
+ * value.
+ */
+ int result;
+
+ switch (expr_op) {
+ case ir_unop_pack_snorm_2x16:
+ result = op_mask & LOWER_PACK_SNORM_2x16;
+ break;
+ case ir_unop_pack_snorm_4x8:
+ result = op_mask & LOWER_PACK_SNORM_4x8;
+ break;
+ case ir_unop_pack_unorm_2x16:
+ result = op_mask & LOWER_PACK_UNORM_2x16;
+ break;
+ case ir_unop_pack_unorm_4x8:
+ result = op_mask & LOWER_PACK_UNORM_4x8;
+ break;
+ case ir_unop_pack_half_2x16:
- result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
++ result = op_mask & LOWER_PACK_HALF_2x16;
+ break;
+ case ir_unop_unpack_snorm_2x16:
+ result = op_mask & LOWER_UNPACK_SNORM_2x16;
+ break;
+ case ir_unop_unpack_snorm_4x8:
+ result = op_mask & LOWER_UNPACK_SNORM_4x8;
+ break;
+ case ir_unop_unpack_unorm_2x16:
+ result = op_mask & LOWER_UNPACK_UNORM_2x16;
+ break;
+ case ir_unop_unpack_unorm_4x8:
+ result = op_mask & LOWER_UNPACK_UNORM_4x8;
+ break;
+ case ir_unop_unpack_half_2x16:
- /**
- * \brief Split packHalf2x16's vec2 operand into two floats.
- *
- * \param vec2_rval is packHalf2x16's input
- * \return a uint rvalue
- *
- * Some code generators, such as the i965 fragment shader, require that all
- * vector expressions be lowered to a sequence of scalar expressions.
- * However, packHalf2x16 cannot be scalarized by the same mechanism as
- * a true vector operation because its input and output have a differing
- * number of vector components.
- *
- * This method scalarizes packHalf2x16 by transforming it from an unary
- * operation having vector input to a binary operation having scalar input.
- * That is, it transforms
- *
- * packHalf2x16(VEC2_RVAL);
- *
- * into
- *
- * vec2 v = VEC2_RVAL;
- * return packHalf2x16_split(v.x, v.y);
- */
- ir_rvalue*
- split_pack_half_2x16(ir_rvalue *vec2_rval)
- {
- assert(vec2_rval->type == glsl_type::vec2_type);
-
- ir_variable *v = factory.make_temp(glsl_type::vec2_type,
- "tmp_split_pack_half_2x16_v");
- factory.emit(assign(v, vec2_rval));
-
- return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v));
- }
-
++ result = op_mask & LOWER_UNPACK_HALF_2x16;
+ break;
+ default:
+ result = LOWER_PACK_UNPACK_NONE;
+ break;
+ }
+
+ return static_cast<enum lower_packing_builtins_op>(result);
+ }
+
+ void
+ setup_factory(void *mem_ctx)
+ {
+ assert(factory.mem_ctx == NULL);
+ assert(factory.instructions->is_empty());
+
+ factory.mem_ctx = mem_ctx;
+ }
+
+ void
+ teardown_factory()
+ {
+ base_ir->insert_before(factory.instructions);
+ assert(factory.instructions->is_empty());
+ factory.mem_ctx = NULL;
+ }
+
+ template <typename T>
+ ir_constant*
+ constant(T x)
+ {
+ return factory.constant(x);
+ }
+
+ /**
+ * \brief Pack two uint16's into a single uint32.
+ *
+ * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32
+ * where the least significant bits specify the first element of the pair.
+ * Return the uint32.
+ */
+ ir_rvalue*
+ pack_uvec2_to_uint(ir_rvalue *uvec2_rval)
+ {
+ assert(uvec2_rval->type == glsl_type::uvec2_type);
+
+ /* uvec2 u = UVEC2_RVAL; */
+ ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_uvec2_to_uint");
+ factory.emit(assign(u, uvec2_rval));
+
+ if (op_mask & LOWER_PACK_USE_BFI) {
+ return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
+ swizzle_y(u),
+ constant(16u),
+ constant(16u));
+ }
+
+ /* return (u.y << 16) | (u.x & 0xffff); */
+ return bit_or(lshift(swizzle_y(u), constant(16u)),
+ bit_and(swizzle_x(u), constant(0xffffu)));
+ }
+
+ /**
+ * \brief Pack four uint8's into a single uint32.
+ *
+ * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a
+ * uint32 where the least significant bits specify the first element of the
+ * 4-tuple. Return the uint32.
+ */
+ ir_rvalue*
+ pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
+ {
+ assert(uvec4_rval->type == glsl_type::uvec4_type);
+
+ ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
+ "tmp_pack_uvec4_to_uint");
+
+ if (op_mask & LOWER_PACK_USE_BFI) {
+ /* uvec4 u = UVEC4_RVAL; */
+ factory.emit(assign(u, uvec4_rval));
+
+ return bitfield_insert(bitfield_insert(
+ bitfield_insert(
+ bit_and(swizzle_x(u), constant(0xffu)),
+ swizzle_y(u), constant(8u), constant(8u)),
+ swizzle_z(u), constant(16u), constant(8u)),
+ swizzle_w(u), constant(24u), constant(8u));
+ }
+
+ /* uvec4 u = UVEC4_RVAL & 0xff */
+ factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
+
+ /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
+ return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)),
+ lshift(swizzle_z(u), constant(16u))),
+ bit_or(lshift(swizzle_y(u), constant(8u)),
+ swizzle_x(u)));
+ }
+
+ /**
+ * \brief Unpack a uint32 into two uint16's.
+ *
+ * Interpret the given uint32 as a uint16 pair where the uint32's least
+ * significant bits specify the pair's first element. Return the uint16
+ * pair as a uvec2.
+ */
+ ir_rvalue*
+ unpack_uint_to_uvec2(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ /* uint u = UINT_RVAL; */
+ ir_variable *u = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_uint_to_uvec2_u");
+ factory.emit(assign(u, uint_rval));
+
+ /* uvec2 u2; */
+ ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_uint_to_uvec2_u2");
+
+ /* u2.x = u & 0xffffu; */
+ factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X));
+
+ /* u2.y = u >> 16u; */
+ factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y));
+
+ return deref(u2).val;
+ }
+
+ /**
+ * \brief Unpack a uint32 into two int16's.
+ *
+ * Specifically each 16-bit value is sign-extended to the full width of an
+ * int32 on return.
+ */
+ ir_rvalue *
+ unpack_uint_to_ivec2(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ if (!(op_mask & LOWER_PACK_USE_BFE)) {
+ return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
+ constant(16u)),
+ constant(16u));
+ }
+
+ ir_variable *i = factory.make_temp(glsl_type::int_type,
+ "tmp_unpack_uint_to_ivec2_i");
+ factory.emit(assign(i, u2i(uint_rval)));
+
+ /* ivec2 i2; */
+ ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type,
+ "tmp_unpack_uint_to_ivec2_i2");
+
+ factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)),
+ WRITEMASK_X));
+ factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)),
+ WRITEMASK_Y));
+
+ return deref(i2).val;
+ }
+
+ /**
+ * \brief Unpack a uint32 into four uint8's.
+ *
+ * Interpret the given uint32 as a uint8 4-tuple where the uint32's least
+ * significant bits specify the 4-tuple's first element. Return the uint8
+ * 4-tuple as a uvec4.
+ */
+ ir_rvalue*
+ unpack_uint_to_uvec4(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ /* uint u = UINT_RVAL; */
+ ir_variable *u = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_uint_to_uvec4_u");
+ factory.emit(assign(u, uint_rval));
+
+ /* uvec4 u4; */
+ ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
+ "tmp_unpack_uint_to_uvec4_u4");
+
+ /* u4.x = u & 0xffu; */
+ factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
+
+ if (op_mask & LOWER_PACK_USE_BFE) {
+ /* u4.y = bitfield_extract(u, 8, 8); */
+ factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)),
+ WRITEMASK_Y));
+
+ /* u4.z = bitfield_extract(u, 16, 8); */
+ factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)),
+ WRITEMASK_Z));
+ } else {
+ /* u4.y = (u >> 8u) & 0xffu; */
+ factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
+ constant(0xffu)), WRITEMASK_Y));
+
+ /* u4.z = (u >> 16u) & 0xffu; */
+ factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
+ constant(0xffu)), WRITEMASK_Z));
+ }
+
+ /* u4.w = (u >> 24u) */
+ factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
+
+ return deref(u4).val;
+ }
+
+ /**
+ * \brief Unpack a uint32 into four int8's.
+ *
+ * Specifically each 8-bit value is sign-extended to the full width of an
+ * int32 on return.
+ */
+ ir_rvalue *
+ unpack_uint_to_ivec4(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ if (!(op_mask & LOWER_PACK_USE_BFE)) {
+ return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
+ constant(24u)),
+ constant(24u));
+ }
+
+ ir_variable *i = factory.make_temp(glsl_type::int_type,
+ "tmp_unpack_uint_to_ivec4_i");
+ factory.emit(assign(i, u2i(uint_rval)));
+
+ /* ivec4 i4; */
+ ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type,
+ "tmp_unpack_uint_to_ivec4_i4");
+
+ factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)),
+ WRITEMASK_X));
+ factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)),
+ WRITEMASK_Y));
+ factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)),
+ WRITEMASK_Z));
+ factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)),
+ WRITEMASK_W));
+
+ return deref(i4).val;
+ }
+
+ /**
+ * \brief Lower a packSnorm2x16 expression.
+ *
+ * \param vec2_rval is packSnorm2x16's input
+ * \return packSnorm2x16's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_snorm_2x16(ir_rvalue *vec2_rval)
+ {
+ /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp uint packSnorm2x16(vec2 v)
+ * --------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 16-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec2_to_uint(
+ * uvec2(ivec2(
+ * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f))));
+ *
+ * It is necessary to first convert the vec2 to ivec2 rather than directly
+ * converting vec2 to uvec2 because the latter conversion is undefined.
+ * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to
+ * convert a negative floating point value to an uint".
+ */
+ assert(vec2_rval->type == glsl_type::vec2_type);
+
+ ir_rvalue *result = pack_uvec2_to_uint(
+ i2u(f2i(round_even(mul(clamp(vec2_rval,
+ constant(-1.0f),
+ constant(1.0f)),
+ constant(32767.0f))))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower a packSnorm4x8 expression.
+ *
+ * \param vec4_rval is packSnorm4x8's input
+ * \return packSnorm4x8's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_snorm_4x8(ir_rvalue *vec4_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp uint packSnorm4x8(vec4 v)
+ * -------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 8-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec4_to_uint(
+ * uvec4(ivec4(
+ * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));
+ *
+ * It is necessary to first convert the vec4 to ivec4 rather than directly
+ * converting vec4 to uvec4 because the latter conversion is undefined.
+ * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to
+ * convert a negative floating point value to an uint".
+ */
+ assert(vec4_rval->type == glsl_type::vec4_type);
+
+ ir_rvalue *result = pack_uvec4_to_uint(
+ i2u(f2i(round_even(mul(clamp(vec4_rval,
+ constant(-1.0f),
+ constant(1.0f)),
+ constant(127.0f))))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackSnorm2x16 expression.
+ *
+ * \param uint_rval is unpackSnorm2x16's input
+ * \return unpackSnorm2x16's output as a vec2 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_snorm_2x16(ir_rvalue *uint_rval)
+ {
+ /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp vec2 unpackSnorm2x16 (highp uint p)
+ * -----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into a pair of
+ * 16-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * two-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return clamp(
+ * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f,
+ * -1.0f, 1.0f);
+ *
+ * The above IR may appear unnecessarily complex, but the intermediate
+ * conversion to ivec2 and the bit shifts are necessary to correctly unpack
+ * negative floats.
+ *
+ * To see why, consider packing and then unpacking vec2(-1.0, 0.0).
+ * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we
+ * place that int16 into an int32, which results in the *positive* integer
+ * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather
+ * unimportant bit 16. We must now extend the int16's sign bit into bits
+ * 17-32, which is accomplished by left-shifting then right-shifting.
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result =
+ clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)),
+ constant(32767.0f)),
+ constant(-1.0f),
+ constant(1.0f));
+
+ assert(result->type == glsl_type::vec2_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackSnorm4x8 expression.
+ *
+ * \param uint_rval is unpackSnorm4x8's input
+ * \return unpackSnorm4x8's output as a vec4 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_snorm_4x8(ir_rvalue *uint_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp vec4 unpackSnorm4x8 (highp uint p)
+ * ----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into four
+ * 8-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * four-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return clamp(
+ * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f,
+ * -1.0f, 1.0f);
+ *
+ * The above IR may appear unnecessarily complex, but the intermediate
+ * conversion to ivec4 and the bit shifts are necessary to correctly unpack
+ * negative floats.
+ *
+ * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0,
+ * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we
+ * place that int8 into an int32, which results in the *positive* integer
+ * 0x000000ff. The int8's sign bit becomes, in the int32, the rather
+ * unimportant bit 8. We must now extend the int8's sign bit into bits
+ * 9-32, which is accomplished by left-shifting then right-shifting.
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result =
+ clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)),
+ constant(127.0f)),
+ constant(-1.0f),
+ constant(1.0f));
+
+ assert(result->type == glsl_type::vec4_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower a packUnorm2x16 expression.
+ *
+ * \param vec2_rval is packUnorm2x16's input
+ * \return packUnorm2x16's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_unorm_2x16(ir_rvalue *vec2_rval)
+ {
+ /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp uint packUnorm2x16 (vec2 v)
+ * ---------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 16-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec2_to_uint(uvec2(
+ * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f)));
+ *
+ * Here it is safe to directly convert the vec2 to uvec2 because the vec2
+ * has been clamped to a non-negative range.
+ */
+
+ assert(vec2_rval->type == glsl_type::vec2_type);
+
+ ir_rvalue *result = pack_uvec2_to_uint(
+ f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f)))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower a packUnorm4x8 expression.
+ *
+ * \param vec4_rval is packUnorm4x8's input
+ * \return packUnorm4x8's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp uint packUnorm4x8 (vec4 v)
+ * --------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 8-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec4_to_uint(uvec4(
+ * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));
+ *
+ * Here it is safe to directly convert the vec4 to uvec4 because the vec4
+ * has been clamped to a non-negative range.
+ */
+
+ assert(vec4_rval->type == glsl_type::vec4_type);
+
+ ir_rvalue *result = pack_uvec4_to_uint(
+ f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackUnorm2x16 expression.
+ *
+ * \param uint_rval is unpackUnorm2x16's input
+ * \return unpackUnorm2x16's output as a vec2 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_unorm_2x16(ir_rvalue *uint_rval)
+ {
+ /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp vec2 unpackUnorm2x16 (highp uint p)
+ * -----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into a pair of
+ * 16-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * two-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm2x16: f / 65535.0
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0;
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)),
+ constant(65535.0f));
+
+ assert(result->type == glsl_type::vec2_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackUnorm4x8 expression.
+ *
+ * \param uint_rval is unpackUnorm4x8's input
+ * \return unpackUnorm4x8's output as a vec4 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_unorm_4x8(ir_rvalue *uint_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp vec4 unpackUnorm4x8 (highp uint p)
+ * ----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into four
+ * 8-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * two-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm4x8: f / 255.0
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),
+ constant(255.0f));
+
+ assert(result->type == glsl_type::vec4_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower the component-wise calculation of packHalf2x16.
+ *
+ * \param f_rval is one component of packHafl2x16's input
+ * \param e_rval is the unshifted exponent bits of f_rval
+ * \param m_rval is the unshifted mantissa bits of f_rval
+ *
+ * \return a uint rvalue that encodes a float16 in its lower 16 bits
+ */
+ ir_rvalue*
+ pack_half_1x16_nosign(ir_rvalue *f_rval,
+ ir_rvalue *e_rval,
+ ir_rvalue *m_rval)
+ {
+ assert(e_rval->type == glsl_type::uint_type);
+ assert(m_rval->type == glsl_type::uint_type);
+
+ /* uint u16; */
+ ir_variable *u16 = factory.make_temp(glsl_type::uint_type,
+ "tmp_pack_half_1x16_u16");
+
+ /* float f = FLOAT_RVAL; */
+ ir_variable *f = factory.make_temp(glsl_type::float_type,
+ "tmp_pack_half_1x16_f");
+ factory.emit(assign(f, f_rval));
+
+ /* uint e = E_RVAL; */
+ ir_variable *e = factory.make_temp(glsl_type::uint_type,
+ "tmp_pack_half_1x16_e");
+ factory.emit(assign(e, e_rval));
+
+ /* uint m = M_RVAL; */
+ ir_variable *m = factory.make_temp(glsl_type::uint_type,
+ "tmp_pack_half_1x16_m");
+ factory.emit(assign(m, m_rval));
+
+ /* Preliminaries
+ * -------------
+ *
+ * For a float16, the bit layout is:
+ *
+ * sign: 15
+ * exponent: 10:14
+ * mantissa: 0:9
+ *
+ * Let f16 be a float16 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1)
+ * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2)
+ * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
+ * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4)
+ * if e16 = 31 and m16 != 0, then NaN (5)
+ *
+ * where 0 <= m16 < 2^10.
+ *
+ * For a float32, the bit layout is:
+ *
+ * sign: 31
+ * exponent: 23:30
+ * mantissa: 0:22
+ *
+ * Let f32 be a float32 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10)
+ * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11)
+ * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
+ * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13)
+ * if e32 = 255 and m32 != 0, then NaN (14)
+ *
+ * where 0 <= m32 < 2^23.
+ *
+ * The minimum and maximum normal float16 values are
+ *
+ * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20)
+ * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21)
+ *
+ * The step at max_norm16 is
+ *
+ * max_step16 = 2^5 (22)
+ *
+ * Observe that the float16 boundary values in equations 20-21 lie in the
+ * range of normal float32 values.
+ *
+ *
+ * Rounding Behavior
+ * -----------------
+ * Not all float32 values can be exactly represented as a float16. We
+ * round all such intermediate float32 values to the nearest float16; if
+ * the float32 is exactly between to float16 values, we round to the one
+ * with an even mantissa. This rounding behavior has several benefits:
+ *
+ * - It has no sign bias.
+ *
+ * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
+ * GPU ISA.
+ *
+ * - By reproducing the behavior of the GPU (at least on Intel hardware),
+ * compile-time evaluation of constant packHalf2x16 GLSL expressions will
+ * result in the same value as if the expression were executed on the
+ * GPU.
+ *
+ * Calculation
+ * -----------
+ * Our task is to compute s16, e16, m16 given f32. Since this function
+ * ignores the sign bit, assume that s32 = s16 = 0. There are several
+ * cases consider.
+ */
+
+ factory.emit(
+
+ /* Case 1) f32 is NaN
+ *
+ * The resultant f16 will also be NaN.
+ */
+
+ /* if (e32 == 255 && m32 != 0) { */
+ if_tree(logic_and(equal(e, constant(0xffu << 23u)),
+ logic_not(equal(m, constant(0u)))),
+
+ assign(u16, constant(0x7fffu)),
+
+ /* Case 2) f32 lies in the range [0, min_norm16).
+ *
+ * The resultant float16 will be either zero, subnormal, or normal.
+ *
+ * Solving
+ *
+ * f32 = min_norm16 (30)
+ *
+ * gives
+ *
+ * e32 = 113 and m32 = 0 (31)
+ *
+ * Therefore this case occurs if and only if
+ *
+ * e32 < 113 (32)
+ */
+
+ /* } else if (e32 < 113) { */
+ if_tree(less(e, constant(113u << 23u)),
+
+ /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */
+ assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f),
+ constant((float) (1 << 24)))))),
+
+ /* Case 3) f32 lies in the range
+ * [min_norm16, max_norm16 + max_step16).
+ *
+ * The resultant float16 will be either normal or infinite.
+ *
+ * Solving
+ *
+ * f32 = max_norm16 + max_step16 (40)
+ * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41)
+ * = 2^16 (42)
+ * gives
+ *
+ * e32 = 143 and m32 = 0 (43)
+ *
+ * We already solved the boundary condition f32 = min_norm16 above
+ * in equation 31. Therefore this case occurs if and only if
+ *
+ * 113 <= e32 and e32 < 143
+ */
+
+ /* } else if (e32 < 143) { */
+ if_tree(less(e, constant(143u << 23u)),
+
+ /* The addition below handles the case where the mantissa rounds
+ * up to 1024 and bumps the exponent.
+ *
+ * u16 = ((e - (112u << 23u)) >> 13u)
+ * + round_to_even((float(m) / (1u << 13u));
+ */
+ assign(u16, add(rshift(sub(e, constant(112u << 23u)),
+ constant(13u)),
+ f2u(round_even(
+ div(u2f(m), constant((float) (1 << 13))))))),
+
+ /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf].
+ *
+ * The resultant float16 will be infinite.
+ *
+ * The cases above caught all float32 values in the range
+ * [0, max_norm16 + max_step16), so this is the fall-through case.
+ */
+
+ /* } else { */
+
+ assign(u16, constant(31u << 10u))))));
+
+ /* } */
+
+ return deref(u16).val;
+ }
+
+ /**
+ * \brief Lower a packHalf2x16 expression.
+ *
+ * \param vec2_rval is packHalf2x16's input
+ * \return packHalf2x16's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_half_2x16(ir_rvalue *vec2_rval)
+ {
+ /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp uint packHalf2x16 (mediump vec2 v)
+ * ----------------------------------------
+ * Returns an unsigned integer obtained by converting the components of
+ * a two-component floating-point vector to the 16-bit floating-point
+ * representation found in the OpenGL ES Specification, and then packing
+ * these two 16-bit integers into a 32-bit unsigned integer.
+ *
+ * The first vector component specifies the 16 least- significant bits
+ * of the result; the second component specifies the 16 most-significant
+ * bits.
+ */
+
+ assert(vec2_rval->type == glsl_type::vec2_type);
+
+ /* vec2 f = VEC2_RVAL; */
+ ir_variable *f = factory.make_temp(glsl_type::vec2_type,
+ "tmp_pack_half_2x16_f");
+ factory.emit(assign(f, vec2_rval));
+
+ /* uvec2 f32 = bitcast_f2u(f); */
+ ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_f32");
+ factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f)));
+
+ /* uvec2 f16; */
+ ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_f16");
+
+ /* Get f32's unshifted exponent bits.
+ *
+ * uvec2 e = f32 & 0x7f800000u;
+ */
+ ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_e");
+ factory.emit(assign(e, bit_and(f32, constant(0x7f800000u))));
+
+ /* Get f32's unshifted mantissa bits.
+ *
+ * uvec2 m = f32 & 0x007fffffu;
+ */
+ ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_m");
+ factory.emit(assign(m, bit_and(f32, constant(0x007fffffu))));
+
+ /* Set f16's exponent and mantissa bits.
+ *
+ * f16.x = pack_half_1x16_nosign(e.x, m.x);
+ * f16.y = pack_half_1y16_nosign(e.y, m.y);
+ */
+ factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f),
+ swizzle_x(e),
+ swizzle_x(m)),
+ WRITEMASK_X));
+ factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f),
+ swizzle_y(e),
+ swizzle_y(m)),
+ WRITEMASK_Y));
+
+ /* Set f16's sign bits.
+ *
+ * f16 |= (f32 & (1u << 31u) >> 16u;
+ */
+ factory.emit(
+ assign(f16, bit_or(f16,
+ rshift(bit_and(f32, constant(1u << 31u)),
+ constant(16u)))));
+
+
+ /* return (f16.y << 16u) | f16.x; */
+ ir_rvalue *result = bit_or(lshift(swizzle_y(f16),
+ constant(16u)),
+ swizzle_x(f16));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
-
- /**
- * \brief Split unpackHalf2x16 into two operations.
- *
- * \param uint_rval is unpackHalf2x16's input
- * \return a vec2 rvalue
- *
- * Some code generators, such as the i965 fragment shader, require that all
- * vector expressions be lowered to a sequence of scalar expressions.
- * However, unpackHalf2x16 cannot be scalarized by the same method as
- * a true vector operation because the number of components of its input
- * and output differ.
- *
- * This method scalarizes unpackHalf2x16 by transforming it from a single
- * operation having vec2 output to a pair of operations each having float
- * output. That is, it transforms
- *
- * unpackHalf2x16(UINT_RVAL)
- *
- * into
- *
- * uint u = UINT_RVAL;
- * vec2 v;
- *
- * v.x = unpackHalf2x16_split_x(u);
- * v.y = unpackHalf2x16_split_y(u);
- *
- * return v;
- */
- ir_rvalue*
- split_unpack_half_2x16(ir_rvalue *uint_rval)
- {
- assert(uint_rval->type == glsl_type::uint_type);
-
- /* uint u = uint_rval; */
- ir_variable *u = factory.make_temp(glsl_type::uint_type,
- "tmp_split_unpack_half_2x16_u");
- factory.emit(assign(u, uint_rval));
-
- /* vec2 v; */
- ir_variable *v = factory.make_temp(glsl_type::vec2_type,
- "tmp_split_unpack_half_2x16_v");
-
- /* v.x = unpack_half_2x16_split_x(u); */
- factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u),
- WRITEMASK_X));
-
- /* v.y = unpack_half_2x16_split_y(u); */
- factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u),
- WRITEMASK_Y));
-
- return deref(v).val;
- }
+ /**
+ * \brief Lower the component-wise calculation of unpackHalf2x16.
+ *
+ * Given a uint that encodes a float16 in its lower 16 bits, this function
+ * returns a uint that encodes a float32 with the same value. The sign bit
+ * of the float16 is ignored.
+ *
+ * \param e_rval is the unshifted exponent bits of a float16
+ * \param m_rval is the unshifted mantissa bits of a float16
+ * \param a uint rvalue that encodes a float32
+ */
+ ir_rvalue*
+ unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval)
+ {
+ assert(e_rval->type == glsl_type::uint_type);
+ assert(m_rval->type == glsl_type::uint_type);
+
+ /* uint u32; */
+ ir_variable *u32 = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_half_1x16_u32");
+
+ /* uint e = E_RVAL; */
+ ir_variable *e = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_half_1x16_e");
+ factory.emit(assign(e, e_rval));
+
+ /* uint m = M_RVAL; */
+ ir_variable *m = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_half_1x16_m");
+ factory.emit(assign(m, m_rval));
+
+ /* Preliminaries
+ * -------------
+ *
+ * For a float16, the bit layout is:
+ *
+ * sign: 15
+ * exponent: 10:14
+ * mantissa: 0:9
+ *
+ * Let f16 be a float16 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1)
+ * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2)
+ * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
+ * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4)
+ * if e16 = 31 and m16 != 0, then NaN (5)
+ *
+ * where 0 <= m16 < 2^10.
+ *
+ * For a float32, the bit layout is:
+ *
+ * sign: 31
+ * exponent: 23:30
+ * mantissa: 0:22
+ *
+ * Let f32 be a float32 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10)
+ * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11)
+ * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
+ * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13)
+ * if e32 = 255 and m32 != 0, then NaN (14)
+ *
+ * where 0 <= m32 < 2^23.
+ *
+ * Calculation
+ * -----------
+ * Our task is to compute s32, e32, m32 given f16. Since this function
+ * ignores the sign bit, assume that s32 = s16 = 0. There are several
+ * cases consider.
+ */
+
+ factory.emit(
+
+ /* Case 1) f16 is zero or subnormal.
+ *
+ * The simplest method of calcuating f32 in this case is
+ *
+ * f32 = f16 (20)
+ * = 2^(-14) * (m16 / 2^10) (21)
+ * = m16 / 2^(-24) (22)
+ */
+
+ /* if (e16 == 0) { */
+ if_tree(equal(e, constant(0u)),
+
+ /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */
+ assign(u32, expr(ir_unop_bitcast_f2u,
+ div(u2f(m), constant((float)(1 << 24))))),
+
+ /* Case 2) f16 is normal.
+ *
+ * The equation
+ *
+ * f32 = f16 (30)
+ * 2^(e32 - 127) * (1 + m32 / 2^23) = (31)
+ * 2^(e16 - 15) * (1 + m16 / 2^10)
+ *
+ * can be decomposed into two
+ *
+ * 2^(e32 - 127) = 2^(e16 - 15) (32)
+ * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33)
+ *
+ * which solve to
+ *
+ * e32 = e16 + 112 (34)
+ * m32 = m16 * 2^13 (35)
+ */
+
+ /* } else if (e16 < 31)) { */
+ if_tree(less(e, constant(31u << 10u)),
+
+ /* u32 = ((e + (112 << 10)) | m) << 13;
+ */
+ assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m),
+ constant(13u))),
+
+
+ /* Case 3) f16 is infinite. */
+ if_tree(equal(m, constant(0u)),
+
+ assign(u32, constant(255u << 23u)),
+
+ /* Case 4) f16 is NaN. */
+ /* } else { */
+
+ assign(u32, constant(0x7fffffffu))))));
+
+ /* } */
+
+ return deref(u32).val;
+ }
+
+ /**
+ * \brief Lower an unpackHalf2x16 expression.
+ *
+ * \param uint_rval is unpackHalf2x16's input
+ * \return unpackHalf2x16's output as a vec2 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_half_2x16(ir_rvalue *uint_rval)
+ {
+ /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * mediump vec2 unpackHalf2x16 (highp uint v)
+ * ------------------------------------------
+ * Returns a two-component floating-point vector with components
+ * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit
+ * values, interpreting those values as 16-bit floating-point numbers
+ * according to the OpenGL ES Specification, and converting them to
+ * 32-bit floating-point values.
+ *
+ * The first component of the vector is obtained from the
+ * 16 least-significant bits of v; the second component is obtained
+ * from the 16 most-significant bits of v.
+ */
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ /* uint u = RVALUE;
+ * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16);
+ */
+ ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_f16");
+ factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval)));
+
+ /* uvec2 f32; */
+ ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_f32");
+
+ /* Get f16's unshifted exponent bits.
+ *
+ * uvec2 e = f16 & 0x7c00u;
+ */
+ ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_e");
+ factory.emit(assign(e, bit_and(f16, constant(0x7c00u))));
+
+ /* Get f16's unshifted mantissa bits.
+ *
+ * uvec2 m = f16 & 0x03ffu;
+ */
+ ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_m");
+ factory.emit(assign(m, bit_and(f16, constant(0x03ffu))));
+
+ /* Set f32's exponent and mantissa bits.
+ *
+ * f32.x = unpack_half_1x16_nosign(e.x, m.x);
+ * f32.y = unpack_half_1x16_nosign(e.y, m.y);
+ */
+ factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e),
+ swizzle_x(m)),
+ WRITEMASK_X));
+ factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e),
+ swizzle_y(m)),
+ WRITEMASK_Y));
+
+ /* Set f32's sign bit.
+ *
+ * f32 |= (f16 & 0x8000u) << 16u;
+ */
+ factory.emit(assign(f32, bit_or(f32,
+ lshift(bit_and(f16,
+ constant(0x8000u)),
+ constant(16u)))));
+
+ /* return bitcast_u2f(f32); */
+ ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32);
+ assert(result->type == glsl_type::vec2_type);
+ return result;
+ }
+ };
+
+ } // namespace anonymous
+
+ /**
+ * \brief Lower the builtin packing functions.
+ *
+ * \param op_mask is a bitmask of `enum lower_packing_builtins_op`.
+ */
+ bool
+ lower_packing_builtins(exec_list *instructions, int op_mask)
+ {
+ lower_packing_builtins_visitor v(op_mask);
+ visit_list_elements(&v, instructions, true);
+ return v.get_progress();
+ }
--- /dev/null
+ /*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ /* This file declares stripped-down versions of functions that
+ * normally exist outside of the glsl folder, so that they can be used
+ * when running the GLSL compiler standalone (for unit testing or
+ * compiling builtins).
+ */
+
+ #include "standalone_scaffolding.h"
+
+ #include <assert.h>
+ #include <stdio.h>
+ #include <string.h>
+ #include "util/ralloc.h"
+ #include "util/strtod.h"
+
++extern "C" void
++_mesa_error_no_memory(const char *caller)
++{
++ fprintf(stderr, "Mesa error: out of memory in %s", caller);
++}
++
+ void
+ _mesa_warning(struct gl_context *ctx, const char *fmt, ...)
+ {
+ va_list vargs;
+ (void) ctx;
+
+ va_start(vargs, fmt);
+
+ /* This output is not thread-safe, but that's good enough for the
+ * standalone compiler.
+ */
+ fprintf(stderr, "Mesa warning: ");
+ vfprintf(stderr, fmt, vargs);
+ fprintf(stderr, "\n");
+
+ va_end(vargs);
+ }
+
+ void
+ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
+ struct gl_shader *sh)
+ {
+ (void) ctx;
+ *ptr = sh;
+ }
+
+ void
+ _mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
+ const char *)
+ {
+ }
+
+ struct gl_shader *
+ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
+ {
+ struct gl_shader *shader;
+
+ (void) ctx;
+
+ assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER);
+ shader = rzalloc(NULL, struct gl_shader);
+ if (shader) {
+ shader->Type = type;
+ shader->Stage = _mesa_shader_enum_to_shader_stage(type);
+ shader->Name = name;
+ shader->RefCount = 1;
+ }
+ return shader;
+ }
+
+ void
+ _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
+ {
+ free((void *)sh->Source);
+ free(sh->Label);
+ ralloc_free(sh);
+ }
+
+ void
+ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
+ {
+ unsigned i;
+
+ shProg->NumUniformStorage = 0;
+ shProg->UniformStorage = NULL;
+ shProg->NumUniformRemapTable = 0;
+ shProg->UniformRemapTable = NULL;
+ shProg->UniformHash = NULL;
+
+ ralloc_free(shProg->InfoLog);
+ shProg->InfoLog = ralloc_strdup(shProg, "");
+
+ ralloc_free(shProg->BufferInterfaceBlocks);
+ shProg->BufferInterfaceBlocks = NULL;
+ shProg->NumBufferInterfaceBlocks = 0;
+
+ ralloc_free(shProg->UniformBlocks);
+ shProg->UniformBlocks = NULL;
+ shProg->NumUniformBlocks = 0;
+
+ ralloc_free(shProg->ShaderStorageBlocks);
+ shProg->ShaderStorageBlocks = NULL;
+ shProg->NumShaderStorageBlocks = 0;
+
+ for (i = 0; i < MESA_SHADER_STAGES; i++) {
+ ralloc_free(shProg->InterfaceBlockStageIndex[i]);
+ shProg->InterfaceBlockStageIndex[i] = NULL;
+ }
+
+ ralloc_free(shProg->UboInterfaceBlockIndex);
+ shProg->UboInterfaceBlockIndex = NULL;
+ ralloc_free(shProg->SsboInterfaceBlockIndex);
+ shProg->SsboInterfaceBlockIndex = NULL;
+
+ ralloc_free(shProg->AtomicBuffers);
+ shProg->AtomicBuffers = NULL;
+ shProg->NumAtomicBuffers = 0;
+ }
+
+ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
+ {
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->API = api;
+
+ ctx->Extensions.dummy_false = false;
+ ctx->Extensions.dummy_true = true;
+ ctx->Extensions.ARB_compute_shader = true;
+ ctx->Extensions.ARB_conservative_depth = true;
+ ctx->Extensions.ARB_draw_instanced = true;
+ ctx->Extensions.ARB_ES2_compatibility = true;
+ ctx->Extensions.ARB_ES3_compatibility = true;
+ ctx->Extensions.ARB_explicit_attrib_location = true;
+ ctx->Extensions.ARB_fragment_coord_conventions = true;
+ ctx->Extensions.ARB_fragment_layer_viewport = true;
+ ctx->Extensions.ARB_gpu_shader5 = true;
+ ctx->Extensions.ARB_gpu_shader_fp64 = true;
+ ctx->Extensions.ARB_sample_shading = true;
+ ctx->Extensions.ARB_shader_bit_encoding = true;
+ ctx->Extensions.ARB_shader_draw_parameters = true;
+ ctx->Extensions.ARB_shader_stencil_export = true;
+ ctx->Extensions.ARB_shader_subroutine = true;
+ ctx->Extensions.ARB_shader_texture_lod = true;
+ ctx->Extensions.ARB_shading_language_420pack = true;
+ ctx->Extensions.ARB_shading_language_packing = true;
+ ctx->Extensions.ARB_tessellation_shader = true;
+ ctx->Extensions.ARB_texture_cube_map_array = true;
+ ctx->Extensions.ARB_texture_gather = true;
+ ctx->Extensions.ARB_texture_multisample = true;
+ ctx->Extensions.ARB_texture_query_levels = true;
+ ctx->Extensions.ARB_texture_query_lod = true;
+ ctx->Extensions.ARB_uniform_buffer_object = true;
+ ctx->Extensions.ARB_viewport_array = true;
+
+ ctx->Extensions.OES_EGL_image_external = true;
+ ctx->Extensions.OES_standard_derivatives = true;
+
+ ctx->Extensions.EXT_shader_integer_mix = true;
+ ctx->Extensions.EXT_texture_array = true;
+
+ ctx->Extensions.NV_texture_rectangle = true;
+
+ ctx->Const.GLSLVersion = 120;
+
+ /* 1.20 minimums. */
+ ctx->Const.MaxLights = 8;
+ ctx->Const.MaxClipPlanes = 6;
+ ctx->Const.MaxTextureUnits = 2;
+ ctx->Const.MaxTextureCoordUnits = 2;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
+
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
+ ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
+ ctx->Const.MaxCombinedTextureImageUnits = 2;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 2;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32;
+
+ ctx->Const.MaxDrawBuffers = 1;
+ ctx->Const.MaxComputeWorkGroupCount[0] = 65535;
+ ctx->Const.MaxComputeWorkGroupCount[1] = 65535;
+ ctx->Const.MaxComputeWorkGroupCount[2] = 65535;
+ ctx->Const.MaxComputeWorkGroupSize[0] = 1024;
+ ctx->Const.MaxComputeWorkGroupSize[1] = 1024;
+ ctx->Const.MaxComputeWorkGroupSize[2] = 64;
+ ctx->Const.MaxComputeWorkGroupInvocations = 1024;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
+
+ /* Set up default shader compiler options. */
+ struct gl_shader_compiler_options options;
+ memset(&options, 0, sizeof(options));
+ options.MaxUnrollIterations = 32;
+ options.MaxIfDepth = UINT_MAX;
+
+ for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh)
+ memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options));
+
+ _mesa_locale_init();
+ }
--- /dev/null
+ /*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #include <stdio.h>
+ #include "main/macros.h"
+ #include "compiler/glsl/glsl_parser_extras.h"
+ #include "glsl_types.h"
+ #include "util/hash_table.h"
+
+
+ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP;
+ hash_table *glsl_type::array_types = NULL;
+ hash_table *glsl_type::record_types = NULL;
+ hash_table *glsl_type::interface_types = NULL;
++hash_table *glsl_type::function_types = NULL;
+ hash_table *glsl_type::subroutine_types = NULL;
+ void *glsl_type::mem_ctx = NULL;
+
+ void
+ glsl_type::init_ralloc_type_ctx(void)
+ {
+ if (glsl_type::mem_ctx == NULL) {
+ glsl_type::mem_ctx = ralloc_autofree_context();
+ assert(glsl_type::mem_ctx != NULL);
+ }
+ }
+
+ glsl_type::glsl_type(GLenum gl_type,
+ glsl_base_type base_type, unsigned vector_elements,
+ unsigned matrix_columns, const char *name) :
+ gl_type(gl_type),
+ base_type(base_type),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing(0),
+ vector_elements(vector_elements), matrix_columns(matrix_columns),
+ length(0)
+ {
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+ assert(name != NULL);
+ this->name = ralloc_strdup(this->mem_ctx, name);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ /* Neither dimension is zero or both dimensions are zero.
+ */
+ assert((vector_elements == 0) == (matrix_columns == 0));
+ memset(& fields, 0, sizeof(fields));
+ }
+
+ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
+ enum glsl_sampler_dim dim, bool shadow, bool array,
+ unsigned type, const char *name) :
+ gl_type(gl_type),
+ base_type(base_type),
+ sampler_dimensionality(dim), sampler_shadow(shadow),
+ sampler_array(array), sampler_type(type), interface_packing(0),
+ length(0)
+ {
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+ assert(name != NULL);
+ this->name = ralloc_strdup(this->mem_ctx, name);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ memset(& fields, 0, sizeof(fields));
+
+ if (base_type == GLSL_TYPE_SAMPLER) {
+ /* Samplers take no storage whatsoever. */
+ matrix_columns = vector_elements = 0;
+ } else {
+ matrix_columns = vector_elements = 1;
+ }
+ }
+
+ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+ const char *name) :
+ gl_type(0),
+ base_type(GLSL_TYPE_STRUCT),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing(0),
+ vector_elements(0), matrix_columns(0),
+ length(num_fields)
+ {
+ unsigned int i;
+
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+ assert(name != NULL);
+ this->name = ralloc_strdup(this->mem_ctx, name);
+ this->fields.structure = ralloc_array(this->mem_ctx,
+ glsl_struct_field, length);
+
+ for (i = 0; i < length; i++) {
+ this->fields.structure[i].type = fields[i].type;
+ this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
+ fields[i].name);
+ this->fields.structure[i].location = fields[i].location;
+ this->fields.structure[i].interpolation = fields[i].interpolation;
+ this->fields.structure[i].centroid = fields[i].centroid;
+ this->fields.structure[i].sample = fields[i].sample;
+ this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
+ this->fields.structure[i].patch = fields[i].patch;
+ this->fields.structure[i].image_read_only = fields[i].image_read_only;
+ this->fields.structure[i].image_write_only = fields[i].image_write_only;
+ this->fields.structure[i].image_coherent = fields[i].image_coherent;
+ this->fields.structure[i].image_volatile = fields[i].image_volatile;
+ this->fields.structure[i].image_restrict = fields[i].image_restrict;
+ this->fields.structure[i].precision = fields[i].precision;
+ }
+
+ mtx_unlock(&glsl_type::mutex);
+ }
+
+ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+ enum glsl_interface_packing packing, const char *name) :
+ gl_type(0),
+ base_type(GLSL_TYPE_INTERFACE),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing((unsigned) packing),
+ vector_elements(0), matrix_columns(0),
+ length(num_fields)
+ {
+ unsigned int i;
+
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+ assert(name != NULL);
+ this->name = ralloc_strdup(this->mem_ctx, name);
+ this->fields.structure = ralloc_array(this->mem_ctx,
+ glsl_struct_field, length);
+ for (i = 0; i < length; i++) {
+ this->fields.structure[i].type = fields[i].type;
+ this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
+ fields[i].name);
+ this->fields.structure[i].location = fields[i].location;
+ this->fields.structure[i].interpolation = fields[i].interpolation;
+ this->fields.structure[i].centroid = fields[i].centroid;
+ this->fields.structure[i].sample = fields[i].sample;
+ this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
+ this->fields.structure[i].patch = fields[i].patch;
+ this->fields.structure[i].precision = fields[i].precision;
+ }
+
+ mtx_unlock(&glsl_type::mutex);
+ }
+
++glsl_type::glsl_type(const glsl_type *return_type,
++ const glsl_function_param *params, unsigned num_params) :
++ gl_type(0),
++ base_type(GLSL_TYPE_FUNCTION),
++ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
++ sampler_type(0), interface_packing(0),
++ vector_elements(0), matrix_columns(0),
++ length(num_params)
++{
++ unsigned int i;
++
++ mtx_lock(&glsl_type::mutex);
++
++ init_ralloc_type_ctx();
++
++ this->fields.parameters = rzalloc_array(this->mem_ctx,
++ glsl_function_param, num_params + 1);
++
++ /* We store the return type as the first parameter */
++ this->fields.parameters[0].type = return_type;
++ this->fields.parameters[0].in = false;
++ this->fields.parameters[0].out = true;
++
++ /* We store the i'th parameter in slot i+1 */
++ for (i = 0; i < length; i++) {
++ this->fields.parameters[i + 1].type = params[i].type;
++ this->fields.parameters[i + 1].in = params[i].in;
++ this->fields.parameters[i + 1].out = params[i].out;
++ }
++
++ mtx_unlock(&glsl_type::mutex);
++}
++
+ glsl_type::glsl_type(const char *subroutine_name) :
+ gl_type(0),
+ base_type(GLSL_TYPE_SUBROUTINE),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing(0),
+ vector_elements(1), matrix_columns(1),
+ length(0)
+ {
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+ assert(subroutine_name != NULL);
+ this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
+ mtx_unlock(&glsl_type::mutex);
+ }
+
+ bool
+ glsl_type::contains_sampler() const
+ {
+ if (this->is_array()) {
+ return this->fields.array->contains_sampler();
+ } else if (this->is_record()) {
+ for (unsigned int i = 0; i < this->length; i++) {
+ if (this->fields.structure[i].type->contains_sampler())
+ return true;
+ }
+ return false;
+ } else {
+ return this->is_sampler();
+ }
+ }
+
+
+ bool
+ glsl_type::contains_integer() const
+ {
+ if (this->is_array()) {
+ return this->fields.array->contains_integer();
+ } else if (this->is_record()) {
+ for (unsigned int i = 0; i < this->length; i++) {
+ if (this->fields.structure[i].type->contains_integer())
+ return true;
+ }
+ return false;
+ } else {
+ return this->is_integer();
+ }
+ }
+
+ bool
+ glsl_type::contains_double() const
+ {
+ if (this->is_array()) {
+ return this->fields.array->contains_double();
+ } else if (this->is_record()) {
+ for (unsigned int i = 0; i < this->length; i++) {
+ if (this->fields.structure[i].type->contains_double())
+ return true;
+ }
+ return false;
+ } else {
+ return this->is_double();
+ }
+ }
+
+ bool
+ glsl_type::contains_opaque() const {
+ switch (base_type) {
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_ATOMIC_UINT:
+ return true;
+ case GLSL_TYPE_ARRAY:
+ return fields.array->contains_opaque();
+ case GLSL_TYPE_STRUCT:
+ for (unsigned int i = 0; i < length; i++) {
+ if (fields.structure[i].type->contains_opaque())
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+
+ bool
+ glsl_type::contains_subroutine() const
+ {
+ if (this->is_array()) {
+ return this->fields.array->contains_subroutine();
+ } else if (this->is_record()) {
+ for (unsigned int i = 0; i < this->length; i++) {
+ if (this->fields.structure[i].type->contains_subroutine())
+ return true;
+ }
+ return false;
+ } else {
+ return this->is_subroutine();
+ }
+ }
+
+ gl_texture_index
+ glsl_type::sampler_index() const
+ {
+ const glsl_type *const t = (this->is_array()) ? this->fields.array : this;
+
+ assert(t->is_sampler());
+
+ switch (t->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D:
+ return (t->sampler_array) ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+ case GLSL_SAMPLER_DIM_2D:
+ return (t->sampler_array) ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+ case GLSL_SAMPLER_DIM_3D:
+ return TEXTURE_3D_INDEX;
+ case GLSL_SAMPLER_DIM_CUBE:
+ return (t->sampler_array) ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
+ case GLSL_SAMPLER_DIM_RECT:
+ return TEXTURE_RECT_INDEX;
+ case GLSL_SAMPLER_DIM_BUF:
+ return TEXTURE_BUFFER_INDEX;
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ return TEXTURE_EXTERNAL_INDEX;
+ case GLSL_SAMPLER_DIM_MS:
+ return (t->sampler_array) ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
+ default:
+ assert(!"Should not get here.");
+ return TEXTURE_BUFFER_INDEX;
+ }
+ }
+
+ bool
+ glsl_type::contains_image() const
+ {
+ if (this->is_array()) {
+ return this->fields.array->contains_image();
+ } else if (this->is_record()) {
+ for (unsigned int i = 0; i < this->length; i++) {
+ if (this->fields.structure[i].type->contains_image())
+ return true;
+ }
+ return false;
+ } else {
+ return this->is_image();
+ }
+ }
+
+ const glsl_type *glsl_type::get_base_type() const
+ {
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ return uint_type;
+ case GLSL_TYPE_INT:
+ return int_type;
+ case GLSL_TYPE_FLOAT:
+ return float_type;
+ case GLSL_TYPE_DOUBLE:
+ return double_type;
+ case GLSL_TYPE_BOOL:
+ return bool_type;
+ default:
+ return error_type;
+ }
+ }
+
+
+ const glsl_type *glsl_type::get_scalar_type() const
+ {
+ const glsl_type *type = this;
+
+ /* Handle arrays */
+ while (type->base_type == GLSL_TYPE_ARRAY)
+ type = type->fields.array;
+
+ /* Handle vectors and matrices */
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ return uint_type;
+ case GLSL_TYPE_INT:
+ return int_type;
+ case GLSL_TYPE_FLOAT:
+ return float_type;
+ case GLSL_TYPE_DOUBLE:
+ return double_type;
+ case GLSL_TYPE_BOOL:
+ return bool_type;
+ default:
+ /* Handle everything else */
+ return type;
+ }
+ }
+
+
+ void
+ _mesa_glsl_release_types(void)
+ {
+ /* Should only be called during atexit (either when unloading shared
+ * object, or if process terminates), so no mutex-locking should be
+ * necessary.
+ */
+ if (glsl_type::array_types != NULL) {
+ _mesa_hash_table_destroy(glsl_type::array_types, NULL);
+ glsl_type::array_types = NULL;
+ }
+
+ if (glsl_type::record_types != NULL) {
+ _mesa_hash_table_destroy(glsl_type::record_types, NULL);
+ glsl_type::record_types = NULL;
+ }
+
+ if (glsl_type::interface_types != NULL) {
+ _mesa_hash_table_destroy(glsl_type::interface_types, NULL);
+ glsl_type::interface_types = NULL;
+ }
+ }
+
+
+ glsl_type::glsl_type(const glsl_type *array, unsigned length) :
+ base_type(GLSL_TYPE_ARRAY),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing(0),
+ vector_elements(0), matrix_columns(0),
+ length(length), name(NULL)
+ {
+ this->fields.array = array;
+ /* Inherit the gl type of the base. The GL type is used for
+ * uniform/statevar handling in Mesa and the arrayness of the type
+ * is represented by the size rather than the type.
+ */
+ this->gl_type = array->gl_type;
+
+ /* Allow a maximum of 10 characters for the array size. This is enough
+ * for 32-bits of ~0. The extra 3 are for the '[', ']', and terminating
+ * NUL.
+ */
+ const unsigned name_length = strlen(array->name) + 10 + 3;
+
+ mtx_lock(&glsl_type::mutex);
+ char *const n = (char *) ralloc_size(this->mem_ctx, name_length);
+ mtx_unlock(&glsl_type::mutex);
+
+ if (length == 0)
+ snprintf(n, name_length, "%s[]", array->name);
+ else {
+ /* insert outermost dimensions in the correct spot
+ * otherwise the dimension order will be backwards
+ */
+ const char *pos = strchr(array->name, '[');
+ if (pos) {
+ int idx = pos - array->name;
+ snprintf(n, idx+1, "%s", array->name);
+ snprintf(n + idx, name_length - idx, "[%u]%s",
+ length, array->name + idx);
+ } else {
+ snprintf(n, name_length, "%s[%u]", array->name, length);
+ }
+ }
+
+ this->name = n;
+ }
+
+
+ const glsl_type *
+ glsl_type::vec(unsigned components)
+ {
+ if (components == 0 || components > 4)
+ return error_type;
+
+ static const glsl_type *const ts[] = {
+ float_type, vec2_type, vec3_type, vec4_type
+ };
+ return ts[components - 1];
+ }
+
+ const glsl_type *
+ glsl_type::dvec(unsigned components)
+ {
+ if (components == 0 || components > 4)
+ return error_type;
+
+ static const glsl_type *const ts[] = {
+ double_type, dvec2_type, dvec3_type, dvec4_type
+ };
+ return ts[components - 1];
+ }
+
+ const glsl_type *
+ glsl_type::ivec(unsigned components)
+ {
+ if (components == 0 || components > 4)
+ return error_type;
+
+ static const glsl_type *const ts[] = {
+ int_type, ivec2_type, ivec3_type, ivec4_type
+ };
+ return ts[components - 1];
+ }
+
+
+ const glsl_type *
+ glsl_type::uvec(unsigned components)
+ {
+ if (components == 0 || components > 4)
+ return error_type;
+
+ static const glsl_type *const ts[] = {
+ uint_type, uvec2_type, uvec3_type, uvec4_type
+ };
+ return ts[components - 1];
+ }
+
+
+ const glsl_type *
+ glsl_type::bvec(unsigned components)
+ {
+ if (components == 0 || components > 4)
+ return error_type;
+
+ static const glsl_type *const ts[] = {
+ bool_type, bvec2_type, bvec3_type, bvec4_type
+ };
+ return ts[components - 1];
+ }
+
+
+ const glsl_type *
+ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
+ {
+ if (base_type == GLSL_TYPE_VOID)
+ return void_type;
+
+ if ((rows < 1) || (rows > 4) || (columns < 1) || (columns > 4))
+ return error_type;
+
+ /* Treat GLSL vectors as Nx1 matrices.
+ */
+ if (columns == 1) {
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ return uvec(rows);
+ case GLSL_TYPE_INT:
+ return ivec(rows);
+ case GLSL_TYPE_FLOAT:
+ return vec(rows);
+ case GLSL_TYPE_DOUBLE:
+ return dvec(rows);
+ case GLSL_TYPE_BOOL:
+ return bvec(rows);
+ default:
+ return error_type;
+ }
+ } else {
+ if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
+ return error_type;
+
+ /* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following
+ * combinations are valid:
+ *
+ * 1 2 3 4
+ * 1
+ * 2 x x x
+ * 3 x x x
+ * 4 x x x
+ */
+ #define IDX(c,r) (((c-1)*3) + (r-1))
+
+ if (base_type == GLSL_TYPE_DOUBLE) {
+ switch (IDX(columns, rows)) {
+ case IDX(2,2): return dmat2_type;
+ case IDX(2,3): return dmat2x3_type;
+ case IDX(2,4): return dmat2x4_type;
+ case IDX(3,2): return dmat3x2_type;
+ case IDX(3,3): return dmat3_type;
+ case IDX(3,4): return dmat3x4_type;
+ case IDX(4,2): return dmat4x2_type;
+ case IDX(4,3): return dmat4x3_type;
+ case IDX(4,4): return dmat4_type;
+ default: return error_type;
+ }
+ } else {
+ switch (IDX(columns, rows)) {
+ case IDX(2,2): return mat2_type;
+ case IDX(2,3): return mat2x3_type;
+ case IDX(2,4): return mat2x4_type;
+ case IDX(3,2): return mat3x2_type;
+ case IDX(3,3): return mat3_type;
+ case IDX(3,4): return mat3x4_type;
+ case IDX(4,2): return mat4x2_type;
+ case IDX(4,3): return mat4x3_type;
+ case IDX(4,4): return mat4_type;
+ default: return error_type;
+ }
+ }
+ }
+
+ assert(!"Should not get here.");
+ return error_type;
+ }
+
+ const glsl_type *
+ glsl_type::get_sampler_instance(enum glsl_sampler_dim dim,
+ bool shadow,
+ bool array,
+ glsl_base_type type)
+ {
+ switch (type) {
+ case GLSL_TYPE_FLOAT:
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ if (shadow)
+ return (array ? sampler1DArrayShadow_type : sampler1DShadow_type);
+ else
+ return (array ? sampler1DArray_type : sampler1D_type);
+ case GLSL_SAMPLER_DIM_2D:
+ if (shadow)
+ return (array ? sampler2DArrayShadow_type : sampler2DShadow_type);
+ else
+ return (array ? sampler2DArray_type : sampler2D_type);
+ case GLSL_SAMPLER_DIM_3D:
+ if (shadow || array)
+ return error_type;
+ else
+ return sampler3D_type;
+ case GLSL_SAMPLER_DIM_CUBE:
+ if (shadow)
+ return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type);
+ else
+ return (array ? samplerCubeArray_type : samplerCube_type);
+ case GLSL_SAMPLER_DIM_RECT:
+ if (array)
+ return error_type;
+ if (shadow)
+ return sampler2DRectShadow_type;
+ else
+ return sampler2DRect_type;
+ case GLSL_SAMPLER_DIM_BUF:
+ if (shadow || array)
+ return error_type;
+ else
+ return samplerBuffer_type;
+ case GLSL_SAMPLER_DIM_MS:
+ if (shadow)
+ return error_type;
+ return (array ? sampler2DMSArray_type : sampler2DMS_type);
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ if (shadow || array)
+ return error_type;
+ else
+ return samplerExternalOES_type;
+ }
+ case GLSL_TYPE_INT:
+ if (shadow)
+ return error_type;
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ return (array ? isampler1DArray_type : isampler1D_type);
+ case GLSL_SAMPLER_DIM_2D:
+ return (array ? isampler2DArray_type : isampler2D_type);
+ case GLSL_SAMPLER_DIM_3D:
+ if (array)
+ return error_type;
+ return isampler3D_type;
+ case GLSL_SAMPLER_DIM_CUBE:
+ return (array ? isamplerCubeArray_type : isamplerCube_type);
+ case GLSL_SAMPLER_DIM_RECT:
+ if (array)
+ return error_type;
+ return isampler2DRect_type;
+ case GLSL_SAMPLER_DIM_BUF:
+ if (array)
+ return error_type;
+ return isamplerBuffer_type;
+ case GLSL_SAMPLER_DIM_MS:
+ return (array ? isampler2DMSArray_type : isampler2DMS_type);
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ return error_type;
+ }
+ case GLSL_TYPE_UINT:
+ if (shadow)
+ return error_type;
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ return (array ? usampler1DArray_type : usampler1D_type);
+ case GLSL_SAMPLER_DIM_2D:
+ return (array ? usampler2DArray_type : usampler2D_type);
+ case GLSL_SAMPLER_DIM_3D:
+ if (array)
+ return error_type;
+ return usampler3D_type;
+ case GLSL_SAMPLER_DIM_CUBE:
+ return (array ? usamplerCubeArray_type : usamplerCube_type);
+ case GLSL_SAMPLER_DIM_RECT:
+ if (array)
+ return error_type;
+ return usampler2DRect_type;
+ case GLSL_SAMPLER_DIM_BUF:
+ if (array)
+ return error_type;
+ return usamplerBuffer_type;
+ case GLSL_SAMPLER_DIM_MS:
+ return (array ? usampler2DMSArray_type : usampler2DMS_type);
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ return error_type;
+ }
+ default:
+ return error_type;
+ }
+
+ unreachable("switch statement above should be complete");
+ }
+
++const glsl_type *
++glsl_type::get_image_instance(enum glsl_sampler_dim dim,
++ bool array, glsl_base_type type)
++{
++ switch (type) {
++ case GLSL_TYPE_FLOAT:
++ switch (dim) {
++ case GLSL_SAMPLER_DIM_1D:
++ return (array ? image1DArray_type : image1D_type);
++ case GLSL_SAMPLER_DIM_2D:
++ return (array ? image2DArray_type : image2D_type);
++ case GLSL_SAMPLER_DIM_3D:
++ return image3D_type;
++ case GLSL_SAMPLER_DIM_CUBE:
++ return (array ? imageCubeArray_type : imageCube_type);
++ case GLSL_SAMPLER_DIM_RECT:
++ if (array)
++ return error_type;
++ else
++ return image2DRect_type;
++ case GLSL_SAMPLER_DIM_BUF:
++ if (array)
++ return error_type;
++ else
++ return imageBuffer_type;
++ case GLSL_SAMPLER_DIM_MS:
++ return (array ? image2DMSArray_type : image2DMS_type);
++ case GLSL_SAMPLER_DIM_EXTERNAL:
++ return error_type;
++ }
++ case GLSL_TYPE_INT:
++ switch (dim) {
++ case GLSL_SAMPLER_DIM_1D:
++ return (array ? iimage1DArray_type : iimage1D_type);
++ case GLSL_SAMPLER_DIM_2D:
++ return (array ? iimage2DArray_type : iimage2D_type);
++ case GLSL_SAMPLER_DIM_3D:
++ if (array)
++ return error_type;
++ return iimage3D_type;
++ case GLSL_SAMPLER_DIM_CUBE:
++ return (array ? iimageCubeArray_type : iimageCube_type);
++ case GLSL_SAMPLER_DIM_RECT:
++ if (array)
++ return error_type;
++ return iimage2DRect_type;
++ case GLSL_SAMPLER_DIM_BUF:
++ if (array)
++ return error_type;
++ return iimageBuffer_type;
++ case GLSL_SAMPLER_DIM_MS:
++ return (array ? iimage2DMSArray_type : iimage2DMS_type);
++ case GLSL_SAMPLER_DIM_EXTERNAL:
++ return error_type;
++ }
++ case GLSL_TYPE_UINT:
++ switch (dim) {
++ case GLSL_SAMPLER_DIM_1D:
++ return (array ? uimage1DArray_type : uimage1D_type);
++ case GLSL_SAMPLER_DIM_2D:
++ return (array ? uimage2DArray_type : uimage2D_type);
++ case GLSL_SAMPLER_DIM_3D:
++ if (array)
++ return error_type;
++ return uimage3D_type;
++ case GLSL_SAMPLER_DIM_CUBE:
++ return (array ? uimageCubeArray_type : uimageCube_type);
++ case GLSL_SAMPLER_DIM_RECT:
++ if (array)
++ return error_type;
++ return uimage2DRect_type;
++ case GLSL_SAMPLER_DIM_BUF:
++ if (array)
++ return error_type;
++ return uimageBuffer_type;
++ case GLSL_SAMPLER_DIM_MS:
++ return (array ? uimage2DMSArray_type : uimage2DMS_type);
++ case GLSL_SAMPLER_DIM_EXTERNAL:
++ return error_type;
++ }
++ default:
++ return error_type;
++ }
++
++ unreachable("switch statement above should be complete");
++}
++
+ const glsl_type *
+ glsl_type::get_array_instance(const glsl_type *base, unsigned array_size)
+ {
+ /* Generate a name using the base type pointer in the key. This is
+ * done because the name of the base type may not be unique across
+ * shaders. For example, two shaders may have different record types
+ * named 'foo'.
+ */
+ char key[128];
+ snprintf(key, sizeof(key), "%p[%u]", (void *) base, array_size);
+
+ mtx_lock(&glsl_type::mutex);
+
+ if (array_types == NULL) {
+ array_types = _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+ _mesa_key_string_equal);
+ }
+
+ const struct hash_entry *entry = _mesa_hash_table_search(array_types, key);
+ if (entry == NULL) {
+ mtx_unlock(&glsl_type::mutex);
+ const glsl_type *t = new glsl_type(base, array_size);
+ mtx_lock(&glsl_type::mutex);
+
+ entry = _mesa_hash_table_insert(array_types,
+ ralloc_strdup(mem_ctx, key),
+ (void *) t);
+ }
+
+ assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_ARRAY);
+ assert(((glsl_type *) entry->data)->length == array_size);
+ assert(((glsl_type *) entry->data)->fields.array == base);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ return (glsl_type *) entry->data;
+ }
+
+
+ bool
+ glsl_type::record_compare(const glsl_type *b) const
+ {
+ if (this->length != b->length)
+ return false;
+
+ if (this->interface_packing != b->interface_packing)
+ return false;
+
+ /* From the GLSL 4.20 specification (Sec 4.2):
+ *
+ * "Structures must have the same name, sequence of type names, and
+ * type definitions, and field names to be considered the same type."
+ *
+ * GLSL ES behaves the same (Ver 1.00 Sec 4.2.4, Ver 3.00 Sec 4.2.5).
+ *
+ * Note that we cannot force type name check when comparing unnamed
+ * structure types, these have a unique name assigned during parsing.
+ */
+ if (!this->is_anonymous() && !b->is_anonymous())
+ if (strcmp(this->name, b->name) != 0)
+ return false;
+
+ for (unsigned i = 0; i < this->length; i++) {
+ if (this->fields.structure[i].type != b->fields.structure[i].type)
+ return false;
+ if (strcmp(this->fields.structure[i].name,
+ b->fields.structure[i].name) != 0)
+ return false;
+ if (this->fields.structure[i].matrix_layout
+ != b->fields.structure[i].matrix_layout)
+ return false;
+ if (this->fields.structure[i].location
+ != b->fields.structure[i].location)
+ return false;
+ if (this->fields.structure[i].interpolation
+ != b->fields.structure[i].interpolation)
+ return false;
+ if (this->fields.structure[i].centroid
+ != b->fields.structure[i].centroid)
+ return false;
+ if (this->fields.structure[i].sample
+ != b->fields.structure[i].sample)
+ return false;
+ if (this->fields.structure[i].patch
+ != b->fields.structure[i].patch)
+ return false;
+ if (this->fields.structure[i].image_read_only
+ != b->fields.structure[i].image_read_only)
+ return false;
+ if (this->fields.structure[i].image_write_only
+ != b->fields.structure[i].image_write_only)
+ return false;
+ if (this->fields.structure[i].image_coherent
+ != b->fields.structure[i].image_coherent)
+ return false;
+ if (this->fields.structure[i].image_volatile
+ != b->fields.structure[i].image_volatile)
+ return false;
+ if (this->fields.structure[i].image_restrict
+ != b->fields.structure[i].image_restrict)
+ return false;
+ if (this->fields.structure[i].precision
+ != b->fields.structure[i].precision)
+ return false;
+ }
+
+ return true;
+ }
+
+
+ bool
+ glsl_type::record_key_compare(const void *a, const void *b)
+ {
+ const glsl_type *const key1 = (glsl_type *) a;
+ const glsl_type *const key2 = (glsl_type *) b;
+
+ return strcmp(key1->name, key2->name) == 0 && key1->record_compare(key2);
+ }
+
+
+ /**
+ * Generate an integer hash value for a glsl_type structure type.
+ */
+ unsigned
+ glsl_type::record_key_hash(const void *a)
+ {
+ const glsl_type *const key = (glsl_type *) a;
+ uintptr_t hash = key->length;
+ unsigned retval;
+
+ for (unsigned i = 0; i < key->length; i++) {
+ /* casting pointer to uintptr_t */
+ hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type;
+ }
+
+ if (sizeof(hash) == 8)
+ retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32);
+ else
+ retval = hash;
+
+ return retval;
+ }
+
+
+ const glsl_type *
+ glsl_type::get_record_instance(const glsl_struct_field *fields,
+ unsigned num_fields,
+ const char *name)
+ {
+ const glsl_type key(fields, num_fields, name);
+
+ mtx_lock(&glsl_type::mutex);
+
+ if (record_types == NULL) {
+ record_types = _mesa_hash_table_create(NULL, record_key_hash,
+ record_key_compare);
+ }
+
+ const struct hash_entry *entry = _mesa_hash_table_search(record_types,
+ &key);
+ if (entry == NULL) {
+ mtx_unlock(&glsl_type::mutex);
+ const glsl_type *t = new glsl_type(fields, num_fields, name);
+ mtx_lock(&glsl_type::mutex);
+
+ entry = _mesa_hash_table_insert(record_types, t, (void *) t);
+ }
+
+ assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT);
+ assert(((glsl_type *) entry->data)->length == num_fields);
+ assert(strcmp(((glsl_type *) entry->data)->name, name) == 0);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ return (glsl_type *) entry->data;
+ }
+
+
+ const glsl_type *
+ glsl_type::get_interface_instance(const glsl_struct_field *fields,
+ unsigned num_fields,
+ enum glsl_interface_packing packing,
+ const char *block_name)
+ {
+ const glsl_type key(fields, num_fields, packing, block_name);
+
+ mtx_lock(&glsl_type::mutex);
+
+ if (interface_types == NULL) {
+ interface_types = _mesa_hash_table_create(NULL, record_key_hash,
+ record_key_compare);
+ }
+
+ const struct hash_entry *entry = _mesa_hash_table_search(interface_types,
+ &key);
+ if (entry == NULL) {
+ mtx_unlock(&glsl_type::mutex);
+ const glsl_type *t = new glsl_type(fields, num_fields,
+ packing, block_name);
+ mtx_lock(&glsl_type::mutex);
+
+ entry = _mesa_hash_table_insert(interface_types, t, (void *) t);
+ }
+
+ assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_INTERFACE);
+ assert(((glsl_type *) entry->data)->length == num_fields);
+ assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ return (glsl_type *) entry->data;
+ }
+
+ const glsl_type *
+ glsl_type::get_subroutine_instance(const char *subroutine_name)
+ {
+ const glsl_type key(subroutine_name);
+
+ mtx_lock(&glsl_type::mutex);
+
+ if (subroutine_types == NULL) {
+ subroutine_types = _mesa_hash_table_create(NULL, record_key_hash,
+ record_key_compare);
+ }
+
+ const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types,
+ &key);
+ if (entry == NULL) {
+ mtx_unlock(&glsl_type::mutex);
+ const glsl_type *t = new glsl_type(subroutine_name);
+ mtx_lock(&glsl_type::mutex);
+
+ entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t);
+ }
+
+ assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE);
+ assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ return (glsl_type *) entry->data;
+ }
+
+
++static bool
++function_key_compare(const void *a, const void *b)
++{
++ const glsl_type *const key1 = (glsl_type *) a;
++ const glsl_type *const key2 = (glsl_type *) b;
++
++ if (key1->length != key2->length)
++ return 1;
++
++ return memcmp(key1->fields.parameters, key2->fields.parameters,
++ (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0;
++}
++
++
++static uint32_t
++function_key_hash(const void *a)
++{
++ const glsl_type *const key = (glsl_type *) a;
++ char hash_key[128];
++ unsigned size = 0;
++
++ size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
++
++ for (unsigned i = 0; i < key->length; i++) {
++ if (size >= sizeof(hash_key))
++ break;
++
++ size += snprintf(& hash_key[size], sizeof(hash_key) - size,
++ "%p", (void *) key->fields.structure[i].type);
++ }
++
++ return _mesa_hash_string(hash_key);
++}
++
++const glsl_type *
++glsl_type::get_function_instance(const glsl_type *return_type,
++ const glsl_function_param *params,
++ unsigned num_params)
++{
++ const glsl_type key(return_type, params, num_params);
++
++ mtx_lock(&glsl_type::mutex);
++
++ if (function_types == NULL) {
++ function_types = _mesa_hash_table_create(NULL, function_key_hash,
++ function_key_compare);
++ }
++
++ struct hash_entry *entry = _mesa_hash_table_search(function_types, &key);
++ if (entry == NULL) {
++ mtx_unlock(&glsl_type::mutex);
++ const glsl_type *t = new glsl_type(return_type, params, num_params);
++ mtx_lock(&glsl_type::mutex);
++
++ entry = _mesa_hash_table_insert(function_types, t, (void *) t);
++ }
++
++ const glsl_type *t = (const glsl_type *)entry->data;
++
++ assert(t->base_type == GLSL_TYPE_FUNCTION);
++ assert(t->length == num_params);
++
++ mtx_unlock(&glsl_type::mutex);
++
++ return t;
++}
++
++
+ const glsl_type *
+ glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
+ {
+ if (type_a == type_b) {
+ return type_a;
+ } else if (type_a->is_matrix() && type_b->is_matrix()) {
+ /* Matrix multiply. The columns of A must match the rows of B. Given
+ * the other previously tested constraints, this means the vector type
+ * of a row from A must be the same as the vector type of a column from
+ * B.
+ */
+ if (type_a->row_type() == type_b->column_type()) {
+ /* The resulting matrix has the number of columns of matrix B and
+ * the number of rows of matrix A. We get the row count of A by
+ * looking at the size of a vector that makes up a column. The
+ * transpose (size of a row) is done for B.
+ */
+ const glsl_type *const type =
+ get_instance(type_a->base_type,
+ type_a->column_type()->vector_elements,
+ type_b->row_type()->vector_elements);
+ assert(type != error_type);
+
+ return type;
+ }
+ } else if (type_a->is_matrix()) {
+ /* A is a matrix and B is a column vector. Columns of A must match
+ * rows of B. Given the other previously tested constraints, this
+ * means the vector type of a row from A must be the same as the
+ * vector the type of B.
+ */
+ if (type_a->row_type() == type_b) {
+ /* The resulting vector has a number of elements equal to
+ * the number of rows of matrix A. */
+ const glsl_type *const type =
+ get_instance(type_a->base_type,
+ type_a->column_type()->vector_elements,
+ 1);
+ assert(type != error_type);
+
+ return type;
+ }
+ } else {
+ assert(type_b->is_matrix());
+
+ /* A is a row vector and B is a matrix. Columns of A must match rows
+ * of B. Given the other previously tested constraints, this means
+ * the type of A must be the same as the vector type of a column from
+ * B.
+ */
+ if (type_a == type_b->column_type()) {
+ /* The resulting vector has a number of elements equal to
+ * the number of columns of matrix B. */
+ const glsl_type *const type =
+ get_instance(type_a->base_type,
+ type_b->row_type()->vector_elements,
+ 1);
+ assert(type != error_type);
+
+ return type;
+ }
+ }
+
+ return error_type;
+ }
+
+
+ const glsl_type *
+ glsl_type::field_type(const char *name) const
+ {
+ if (this->base_type != GLSL_TYPE_STRUCT
+ && this->base_type != GLSL_TYPE_INTERFACE)
+ return error_type;
+
+ for (unsigned i = 0; i < this->length; i++) {
+ if (strcmp(name, this->fields.structure[i].name) == 0)
+ return this->fields.structure[i].type;
+ }
+
+ return error_type;
+ }
+
+
+ int
+ glsl_type::field_index(const char *name) const
+ {
+ if (this->base_type != GLSL_TYPE_STRUCT
+ && this->base_type != GLSL_TYPE_INTERFACE)
+ return -1;
+
+ for (unsigned i = 0; i < this->length; i++) {
+ if (strcmp(name, this->fields.structure[i].name) == 0)
+ return i;
+ }
+
+ return -1;
+ }
+
+
+ unsigned
+ glsl_type::component_slots() const
+ {
+ switch (this->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ return this->components();
+
+ case GLSL_TYPE_DOUBLE:
+ return 2 * this->components();
+
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE: {
+ unsigned size = 0;
+
+ for (unsigned i = 0; i < this->length; i++)
+ size += this->fields.structure[i].type->component_slots();
+
+ return size;
+ }
+
+ case GLSL_TYPE_ARRAY:
+ return this->length * this->fields.array->component_slots();
+
+ case GLSL_TYPE_IMAGE:
+ return 1;
+ case GLSL_TYPE_SUBROUTINE:
+ return 1;
++
++ case GLSL_TYPE_FUNCTION:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ break;
+ }
+
+ return 0;
+ }
+
+ unsigned
+ glsl_type::record_location_offset(unsigned length) const
+ {
+ unsigned offset = 0;
+ const glsl_type *t = this->without_array();
+ if (t->is_record()) {
+ assert(length <= t->length);
+
+ for (unsigned i = 0; i < length; i++) {
+ const glsl_type *st = t->fields.structure[i].type;
+ const glsl_type *wa = st->without_array();
+ if (wa->is_record()) {
+ unsigned r_offset = wa->record_location_offset(wa->length);
+ offset += st->is_array() ?
+ st->arrays_of_arrays_size() * r_offset : r_offset;
+ } else if (st->is_array() && st->fields.array->is_array()) {
+ unsigned outer_array_size = st->length;
+ const glsl_type *base_type = st->fields.array;
+
+ /* For arrays of arrays the outer arrays take up a uniform
+ * slot for each element. The innermost array elements share a
+ * single slot so we ignore the innermost array when calculating
+ * the offset.
+ */
+ while (base_type->fields.array->is_array()) {
+ outer_array_size = outer_array_size * base_type->length;
+ base_type = base_type->fields.array;
+ }
+ offset += outer_array_size;
+ } else {
+ /* We dont worry about arrays here because unless the array
+ * contains a structure or another array it only takes up a single
+ * uniform slot.
+ */
+ offset += 1;
+ }
+ }
+ }
+ return offset;
+ }
+
+ unsigned
+ glsl_type::uniform_locations() const
+ {
+ unsigned size = 0;
+
+ switch (this->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_SUBROUTINE:
+ return 1;
+
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE:
+ for (unsigned i = 0; i < this->length; i++)
+ size += this->fields.structure[i].type->uniform_locations();
+ return size;
+ case GLSL_TYPE_ARRAY:
+ return this->length * this->fields.array->uniform_locations();
+ default:
+ return 0;
+ }
+ }
+
+ bool
+ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
+ _mesa_glsl_parse_state *state) const
+ {
+ if (this == desired)
+ return true;
+
+ /* There is no conversion among matrix types. */
+ if (this->matrix_columns > 1 || desired->matrix_columns > 1)
+ return false;
+
+ /* Vector size must match. */
+ if (this->vector_elements != desired->vector_elements)
+ return false;
+
+ /* int and uint can be converted to float. */
+ if (desired->is_float() && this->is_integer())
+ return true;
+
+ /* With GLSL 4.0 / ARB_gpu_shader5, int can be converted to uint.
+ * Note that state may be NULL here, when resolving function calls in the
+ * linker. By this time, all the state-dependent checks have already
+ * happened though, so allow anything that's allowed in any shader version. */
+ if ((!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) &&
+ desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT)
+ return true;
+
+ /* No implicit conversions from double. */
+ if ((!state || state->has_double()) && this->is_double())
+ return false;
+
+ /* Conversions from different types to double. */
+ if ((!state || state->has_double()) && desired->is_double()) {
+ if (this->is_float())
+ return true;
+ if (this->is_integer())
+ return true;
+ }
+
+ return false;
+ }
+
+ unsigned
+ glsl_type::std140_base_alignment(bool row_major) const
+ {
+ unsigned N = is_double() ? 8 : 4;
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_scalar() || this->is_vector()) {
+ switch (this->vector_elements) {
+ case 1:
+ return N;
+ case 2:
+ return 2 * N;
+ case 3:
+ case 4:
+ return 4 * N;
+ }
+ }
+
+ /* (4) If the member is an array of scalars or vectors, the base alignment
+ * and array stride are set to match the base alignment of a single
+ * array element, according to rules (1), (2), and (3), and rounded up
+ * to the base alignment of a vec4. The array may have padding at the
+ * end; the base offset of the member following the array is rounded up
+ * to the next multiple of the base alignment.
+ *
+ * (6) If the member is an array of <S> column-major matrices with <C>
+ * columns and <R> rows, the matrix is stored identically to a row of
+ * <S>*<C> column vectors with <R> components each, according to rule
+ * (4).
+ *
+ * (8) If the member is an array of <S> row-major matrices with <C> columns
+ * and <R> rows, the matrix is stored identically to a row of <S>*<R>
+ * row vectors with <C> components each, according to rule (4).
+ *
+ * (10) If the member is an array of <S> structures, the <S> elements of
+ * the array are laid out in order, according to rule (9).
+ */
+ if (this->is_array()) {
+ if (this->fields.array->is_scalar() ||
+ this->fields.array->is_vector() ||
+ this->fields.array->is_matrix()) {
+ return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+ } else {
+ assert(this->fields.array->is_record() ||
+ this->fields.array->is_array());
+ return this->fields.array->std140_base_alignment(row_major);
+ }
+ }
+
+ /* (5) If the member is a column-major matrix with <C> columns and
+ * <R> rows, the matrix is stored identically to an array of
+ * <C> column vectors with <R> components each, according to
+ * rule (4).
+ *
+ * (7) If the member is a row-major matrix with <C> columns and <R>
+ * rows, the matrix is stored identically to an array of <R>
+ * row vectors with <C> components each, according to rule (4).
+ */
+ if (this->is_matrix()) {
+ const struct glsl_type *vec_type, *array_type;
+ int c = this->matrix_columns;
+ int r = this->vector_elements;
+
+ if (row_major) {
+ vec_type = get_instance(base_type, c, 1);
+ array_type = glsl_type::get_array_instance(vec_type, r);
+ } else {
+ vec_type = get_instance(base_type, r, 1);
+ array_type = glsl_type::get_array_instance(vec_type, c);
+ }
+
+ return array_type->std140_base_alignment(false);
+ }
+
+ /* (9) If the member is a structure, the base alignment of the
+ * structure is <N>, where <N> is the largest base alignment
+ * value of any of its members, and rounded up to the base
+ * alignment of a vec4. The individual members of this
+ * sub-structure are then assigned offsets by applying this set
+ * of rules recursively, where the base offset of the first
+ * member of the sub-structure is equal to the aligned offset
+ * of the structure. The structure may have padding at the end;
+ * the base offset of the member following the sub-structure is
+ * rounded up to the next multiple of the base alignment of the
+ * structure.
+ */
+ if (this->is_record()) {
+ unsigned base_alignment = 16;
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ base_alignment = MAX2(base_alignment,
+ field_type->std140_base_alignment(field_row_major));
+ }
+ return base_alignment;
+ }
+
+ assert(!"not reached");
+ return -1;
+ }
+
+ unsigned
+ glsl_type::std140_size(bool row_major) const
+ {
+ unsigned N = is_double() ? 8 : 4;
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_scalar() || this->is_vector()) {
+ return this->vector_elements * N;
+ }
+
+ /* (5) If the member is a column-major matrix with <C> columns and
+ * <R> rows, the matrix is stored identically to an array of
+ * <C> column vectors with <R> components each, according to
+ * rule (4).
+ *
+ * (6) If the member is an array of <S> column-major matrices with <C>
+ * columns and <R> rows, the matrix is stored identically to a row of
+ * <S>*<C> column vectors with <R> components each, according to rule
+ * (4).
+ *
+ * (7) If the member is a row-major matrix with <C> columns and <R>
+ * rows, the matrix is stored identically to an array of <R>
+ * row vectors with <C> components each, according to rule (4).
+ *
+ * (8) If the member is an array of <S> row-major matrices with <C> columns
+ * and <R> rows, the matrix is stored identically to a row of <S>*<R>
+ * row vectors with <C> components each, according to rule (4).
+ */
+ if (this->without_array()->is_matrix()) {
+ const struct glsl_type *element_type;
+ const struct glsl_type *vec_type;
+ unsigned int array_len;
+
+ if (this->is_array()) {
+ element_type = this->without_array();
+ array_len = this->arrays_of_arrays_size();
+ } else {
+ element_type = this;
+ array_len = 1;
+ }
+
+ if (row_major) {
+ vec_type = get_instance(element_type->base_type,
+ element_type->matrix_columns, 1);
+
+ array_len *= element_type->vector_elements;
+ } else {
+ vec_type = get_instance(element_type->base_type,
+ element_type->vector_elements, 1);
+ array_len *= element_type->matrix_columns;
+ }
+ const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+ array_len);
+
+ return array_type->std140_size(false);
+ }
+
+ /* (4) If the member is an array of scalars or vectors, the base alignment
+ * and array stride are set to match the base alignment of a single
+ * array element, according to rules (1), (2), and (3), and rounded up
+ * to the base alignment of a vec4. The array may have padding at the
+ * end; the base offset of the member following the array is rounded up
+ * to the next multiple of the base alignment.
+ *
+ * (10) If the member is an array of <S> structures, the <S> elements of
+ * the array are laid out in order, according to rule (9).
+ */
+ if (this->is_array()) {
+ if (this->without_array()->is_record()) {
+ return this->arrays_of_arrays_size() *
+ this->without_array()->std140_size(row_major);
+ } else {
+ unsigned element_base_align =
+ this->without_array()->std140_base_alignment(row_major);
+ return this->arrays_of_arrays_size() * MAX2(element_base_align, 16);
+ }
+ }
+
+ /* (9) If the member is a structure, the base alignment of the
+ * structure is <N>, where <N> is the largest base alignment
+ * value of any of its members, and rounded up to the base
+ * alignment of a vec4. The individual members of this
+ * sub-structure are then assigned offsets by applying this set
+ * of rules recursively, where the base offset of the first
+ * member of the sub-structure is equal to the aligned offset
+ * of the structure. The structure may have padding at the end;
+ * the base offset of the member following the sub-structure is
+ * rounded up to the next multiple of the base alignment of the
+ * structure.
+ */
+ if (this->is_record() || this->is_interface()) {
+ unsigned size = 0;
+ unsigned max_align = 0;
+
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ unsigned align = field_type->std140_base_alignment(field_row_major);
+
+ /* Ignore unsized arrays when calculating size */
+ if (field_type->is_unsized_array())
+ continue;
+
+ size = glsl_align(size, align);
+ size += field_type->std140_size(field_row_major);
+
+ max_align = MAX2(align, max_align);
+
+ if (field_type->is_record() && (i + 1 < this->length))
+ size = glsl_align(size, 16);
+ }
+ size = glsl_align(size, MAX2(max_align, 16));
+ return size;
+ }
+
+ assert(!"not reached");
+ return -1;
+ }
+
+ unsigned
+ glsl_type::std430_base_alignment(bool row_major) const
+ {
+
+ unsigned N = is_double() ? 8 : 4;
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_scalar() || this->is_vector()) {
+ switch (this->vector_elements) {
+ case 1:
+ return N;
+ case 2:
+ return 2 * N;
+ case 3:
+ case 4:
+ return 4 * N;
+ }
+ }
+
+ /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+ *
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures
+ * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+ */
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_array())
+ return this->fields.array->std430_base_alignment(row_major);
+
+ /* (5) If the member is a column-major matrix with <C> columns and
+ * <R> rows, the matrix is stored identically to an array of
+ * <C> column vectors with <R> components each, according to
+ * rule (4).
+ *
+ * (7) If the member is a row-major matrix with <C> columns and <R>
+ * rows, the matrix is stored identically to an array of <R>
+ * row vectors with <C> components each, according to rule (4).
+ */
+ if (this->is_matrix()) {
+ const struct glsl_type *vec_type, *array_type;
+ int c = this->matrix_columns;
+ int r = this->vector_elements;
+
+ if (row_major) {
+ vec_type = get_instance(base_type, c, 1);
+ array_type = glsl_type::get_array_instance(vec_type, r);
+ } else {
+ vec_type = get_instance(base_type, r, 1);
+ array_type = glsl_type::get_array_instance(vec_type, c);
+ }
+
+ return array_type->std430_base_alignment(false);
+ }
+
+ /* (9) If the member is a structure, the base alignment of the
+ * structure is <N>, where <N> is the largest base alignment
+ * value of any of its members, and rounded up to the base
+ * alignment of a vec4. The individual members of this
+ * sub-structure are then assigned offsets by applying this set
+ * of rules recursively, where the base offset of the first
+ * member of the sub-structure is equal to the aligned offset
+ * of the structure. The structure may have padding at the end;
+ * the base offset of the member following the sub-structure is
+ * rounded up to the next multiple of the base alignment of the
+ * structure.
+ */
+ if (this->is_record()) {
+ unsigned base_alignment = 0;
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ base_alignment = MAX2(base_alignment,
+ field_type->std430_base_alignment(field_row_major));
+ }
+ assert(base_alignment > 0);
+ return base_alignment;
+ }
+ assert(!"not reached");
+ return -1;
+ }
+
+ unsigned
+ glsl_type::std430_array_stride(bool row_major) const
+ {
+ unsigned N = is_double() ? 8 : 4;
+
+ /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+ * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_vector() && this->vector_elements == 3)
+ return 4 * N;
+
+ /* By default use std430_size(row_major) */
+ return this->std430_size(row_major);
+ }
+
+ unsigned
+ glsl_type::std430_size(bool row_major) const
+ {
+ unsigned N = is_double() ? 8 : 4;
+
+ /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+ *
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures
+ * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+ */
+ if (this->is_scalar() || this->is_vector())
+ return this->vector_elements * N;
+
+ if (this->without_array()->is_matrix()) {
+ const struct glsl_type *element_type;
+ const struct glsl_type *vec_type;
+ unsigned int array_len;
+
+ if (this->is_array()) {
+ element_type = this->without_array();
+ array_len = this->arrays_of_arrays_size();
+ } else {
+ element_type = this;
+ array_len = 1;
+ }
+
+ if (row_major) {
+ vec_type = get_instance(element_type->base_type,
+ element_type->matrix_columns, 1);
+
+ array_len *= element_type->vector_elements;
+ } else {
+ vec_type = get_instance(element_type->base_type,
+ element_type->vector_elements, 1);
+ array_len *= element_type->matrix_columns;
+ }
+ const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+ array_len);
+
+ return array_type->std430_size(false);
+ }
+
+ if (this->is_array()) {
+ if (this->without_array()->is_record())
+ return this->arrays_of_arrays_size() *
+ this->without_array()->std430_size(row_major);
+ else
+ return this->arrays_of_arrays_size() *
+ this->without_array()->std430_base_alignment(row_major);
+ }
+
+ if (this->is_record() || this->is_interface()) {
+ unsigned size = 0;
+ unsigned max_align = 0;
+
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ unsigned align = field_type->std430_base_alignment(field_row_major);
+ size = glsl_align(size, align);
+ size += field_type->std430_size(field_row_major);
+
+ max_align = MAX2(align, max_align);
+ }
+ size = glsl_align(size, max_align);
+ return size;
+ }
+
+ assert(!"not reached");
+ return -1;
+ }
+
+ unsigned
+ glsl_type::count_attribute_slots(bool vertex_input_slots) const
+ {
+ /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "A scalar input counts the same amount against this limit as a vec4,
+ * so applications may want to consider packing groups of four
+ * unrelated float inputs together into a vector to better utilize the
+ * capabilities of the underlying hardware. A matrix input will use up
+ * multiple locations. The number of locations used will equal the
+ * number of columns in the matrix."
+ *
+ * The spec does not explicitly say how arrays are counted. However, it
+ * should be safe to assume the total number of slots consumed by an array
+ * is the number of entries in the array multiplied by the number of slots
+ * consumed by a single element of the array.
+ *
+ * The spec says nothing about how structs are counted, because vertex
+ * attributes are not allowed to be (or contain) structs. However, Mesa
+ * allows varying structs, the number of varying slots taken up by a
+ * varying struct is simply equal to the sum of the number of slots taken
+ * up by each element.
+ *
+ * Doubles are counted different depending on whether they are vertex
+ * inputs or everything else. Vertex inputs from ARB_vertex_attrib_64bit
+ * take one location no matter what size they are, otherwise dvec3/4
+ * take two locations.
+ */
+ switch (this->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ return this->matrix_columns;
+ case GLSL_TYPE_DOUBLE:
+ if (this->vector_elements > 2 && !vertex_input_slots)
+ return this->matrix_columns * 2;
+ else
+ return this->matrix_columns;
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE: {
+ unsigned size = 0;
+
+ for (unsigned i = 0; i < this->length; i++)
+ size += this->fields.structure[i].type->count_attribute_slots(vertex_input_slots);
+
+ return size;
+ }
+
+ case GLSL_TYPE_ARRAY:
+ return this->length * this->fields.array->count_attribute_slots(vertex_input_slots);
+
++ case GLSL_TYPE_FUNCTION:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_ERROR:
+ break;
+ }
+
+ assert(!"Unexpected type in count_attribute_slots()");
+
+ return 0;
+ }
+
+ int
+ glsl_type::coordinate_components() const
+ {
+ int size;
+
+ switch (sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D:
+ case GLSL_SAMPLER_DIM_BUF:
+ size = 1;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_MS:
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ size = 2;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ size = 3;
+ break;
+ default:
+ assert(!"Should not get here.");
+ size = 1;
+ break;
+ }
+
+ /* Array textures need an additional component for the array index, except
+ * for cubemap array images that behave like a 2D array of interleaved
+ * cubemap faces.
+ */
+ if (sampler_array &&
+ !(base_type == GLSL_TYPE_IMAGE &&
+ sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE))
+ size += 1;
+
+ return size;
+ }
+
+ /**
+ * Declarations of type flyweights (glsl_type::_foo_type) and
+ * convenience pointers (glsl_type::foo_type).
+ * @{
+ */
+ #define DECL_TYPE(NAME, ...) \
+ const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \
+ const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type;
+
+ #define STRUCT_TYPE(NAME)
+
+ #include "compiler/builtin_type_macros.h"
+ /** @} */
--- /dev/null
- const struct glsl_type *parameters; /**< Parameters to function. */
+ /* -*- c++ -*- */
+ /*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #pragma once
+ #ifndef GLSL_TYPES_H
+ #define GLSL_TYPES_H
+
+ #include <string.h>
+ #include <assert.h>
+
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+
+ struct _mesa_glsl_parse_state;
+ struct glsl_symbol_table;
+
+ extern void
+ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state);
+
+ extern void
+ _mesa_glsl_release_types(void);
+
+ #ifdef __cplusplus
+ }
+ #endif
+
+ enum glsl_base_type {
+ GLSL_TYPE_UINT = 0,
+ GLSL_TYPE_INT,
+ GLSL_TYPE_FLOAT,
+ GLSL_TYPE_DOUBLE,
+ GLSL_TYPE_BOOL,
+ GLSL_TYPE_SAMPLER,
+ GLSL_TYPE_IMAGE,
+ GLSL_TYPE_ATOMIC_UINT,
+ GLSL_TYPE_STRUCT,
++ GLSL_TYPE_FUNCTION,
+ GLSL_TYPE_INTERFACE,
+ GLSL_TYPE_ARRAY,
+ GLSL_TYPE_VOID,
+ GLSL_TYPE_SUBROUTINE,
+ GLSL_TYPE_ERROR
+ };
+
+ enum glsl_sampler_dim {
+ GLSL_SAMPLER_DIM_1D = 0,
+ GLSL_SAMPLER_DIM_2D,
+ GLSL_SAMPLER_DIM_3D,
+ GLSL_SAMPLER_DIM_CUBE,
+ GLSL_SAMPLER_DIM_RECT,
+ GLSL_SAMPLER_DIM_BUF,
+ GLSL_SAMPLER_DIM_EXTERNAL,
+ GLSL_SAMPLER_DIM_MS
+ };
+
+ enum glsl_interface_packing {
+ GLSL_INTERFACE_PACKING_STD140,
+ GLSL_INTERFACE_PACKING_SHARED,
+ GLSL_INTERFACE_PACKING_PACKED,
+ GLSL_INTERFACE_PACKING_STD430
+ };
+
+ enum glsl_matrix_layout {
+ /**
+ * The layout of the matrix is inherited from the object containing the
+ * matrix (the top level structure or the uniform block).
+ */
+ GLSL_MATRIX_LAYOUT_INHERITED,
+
+ /**
+ * Explicit column-major layout
+ *
+ * If a uniform block doesn't have an explicit layout set, it will default
+ * to this layout.
+ */
+ GLSL_MATRIX_LAYOUT_COLUMN_MAJOR,
+
+ /**
+ * Row-major layout
+ */
+ GLSL_MATRIX_LAYOUT_ROW_MAJOR
+ };
+
+ enum {
+ GLSL_PRECISION_NONE = 0,
+ GLSL_PRECISION_HIGH,
+ GLSL_PRECISION_MEDIUM,
+ GLSL_PRECISION_LOW
+ };
+
+ #ifdef __cplusplus
+ #include "GL/gl.h"
+ #include "util/ralloc.h"
+ #include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */
+
+ struct glsl_type {
+ GLenum gl_type;
+ glsl_base_type base_type;
+
+ unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */
+ unsigned sampler_shadow:1;
+ unsigned sampler_array:1;
+ unsigned sampler_type:2; /**< Type of data returned using this
+ * sampler or image. Only \c
+ * GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT,
+ * and \c GLSL_TYPE_UINT are valid.
+ */
+ unsigned interface_packing:2;
+
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */
+ static void* operator new(size_t size)
+ {
+ mtx_lock(&glsl_type::mutex);
+
+ /* mem_ctx should have been created by the static members */
+ assert(glsl_type::mem_ctx != NULL);
+
+ void *type;
+
+ type = ralloc_size(glsl_type::mem_ctx, size);
+ assert(type != NULL);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ return type;
+ }
+
+ /* If the user *does* call delete, that's OK, we will just
+ * ralloc_free in that case. */
+ static void operator delete(void *type)
+ {
+ mtx_lock(&glsl_type::mutex);
+ ralloc_free(type);
+ mtx_unlock(&glsl_type::mutex);
+ }
+
+ /**
+ * \name Vector and matrix element counts
+ *
+ * For scalars, each of these values will be 1. For non-numeric types
+ * these will be 0.
+ */
+ /*@{*/
+ uint8_t vector_elements; /**< 1, 2, 3, or 4 vector elements. */
+ uint8_t matrix_columns; /**< 1, 2, 3, or 4 matrix columns. */
+ /*@}*/
+
+ /**
+ * For \c GLSL_TYPE_ARRAY, this is the length of the array. For
+ * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of
+ * elements in the structure and the number of values pointed to by
+ * \c fields.structure (below).
+ */
+ unsigned length;
+
+ /**
+ * Name of the data type
+ *
+ * Will never be \c NULL.
+ */
+ const char *name;
+
+ /**
+ * Subtype of composite data types.
+ */
+ union {
+ const struct glsl_type *array; /**< Type of array elements. */
-#undef DECL_TYPE
-#undef STRUCT_TYPE
-#endif /* __cplusplus */
-
++ struct glsl_function_param *parameters; /**< Parameters to function. */
+ struct glsl_struct_field *structure; /**< List of struct fields. */
+ } fields;
+
+ /**
+ * \name Pointers to various public type singletons
+ */
+ /*@{*/
+ #undef DECL_TYPE
+ #define DECL_TYPE(NAME, ...) \
+ static const glsl_type *const NAME##_type;
+ #undef STRUCT_TYPE
+ #define STRUCT_TYPE(NAME) \
+ static const glsl_type *const struct_##NAME##_type;
+ #include "compiler/builtin_type_macros.h"
+ /*@}*/
+
+ /**
+ * Convenience accessors for vector types (shorter than get_instance()).
+ * @{
+ */
+ static const glsl_type *vec(unsigned components);
+ static const glsl_type *dvec(unsigned components);
+ static const glsl_type *ivec(unsigned components);
+ static const glsl_type *uvec(unsigned components);
+ static const glsl_type *bvec(unsigned components);
+ /**@}*/
+
+ /**
+ * For numeric and boolean derived types returns the basic scalar type
+ *
+ * If the type is a numeric or boolean scalar, vector, or matrix type,
+ * this function gets the scalar type of the individual components. For
+ * all other types, including arrays of numeric or boolean types, the
+ * error type is returned.
+ */
+ const glsl_type *get_base_type() const;
+
+ /**
+ * Get the basic scalar type which this type aggregates.
+ *
+ * If the type is a numeric or boolean scalar, vector, or matrix, or an
+ * array of any of those, this function gets the scalar type of the
+ * individual components. For structs and arrays of structs, this function
+ * returns the struct type. For samplers and arrays of samplers, this
+ * function returns the sampler type.
+ */
+ const glsl_type *get_scalar_type() const;
+
+ /**
+ * Get the instance of a built-in scalar, vector, or matrix type
+ */
+ static const glsl_type *get_instance(unsigned base_type, unsigned rows,
+ unsigned columns);
+
+ /**
+ * Get the instance of a sampler type
+ */
+ static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim,
+ bool shadow,
+ bool array,
+ glsl_base_type type);
+
++ static const glsl_type *get_image_instance(enum glsl_sampler_dim dim,
++ bool array, glsl_base_type type);
+
+ /**
+ * Get the instance of an array type
+ */
+ static const glsl_type *get_array_instance(const glsl_type *base,
+ unsigned elements);
+
+ /**
+ * Get the instance of a record type
+ */
+ static const glsl_type *get_record_instance(const glsl_struct_field *fields,
+ unsigned num_fields,
+ const char *name);
+
+ /**
+ * Get the instance of an interface block type
+ */
+ static const glsl_type *get_interface_instance(const glsl_struct_field *fields,
+ unsigned num_fields,
+ enum glsl_interface_packing packing,
+ const char *block_name);
+
+ /**
+ * Get the instance of an subroutine type
+ */
+ static const glsl_type *get_subroutine_instance(const char *subroutine_name);
+
++ /**
++ * Get the instance of a function type
++ */
++ static const glsl_type *get_function_instance(const struct glsl_type *return_type,
++ const glsl_function_param *parameters,
++ unsigned num_params);
++
+ /**
+ * Get the type resulting from a multiplication of \p type_a * \p type_b
+ */
+ static const glsl_type *get_mul_type(const glsl_type *type_a,
+ const glsl_type *type_b);
+
+ /**
+ * Query the total number of scalars that make up a scalar, vector or matrix
+ */
+ unsigned components() const
+ {
+ return vector_elements * matrix_columns;
+ }
+
+ /**
+ * Calculate the number of components slots required to hold this type
+ *
+ * This is used to determine how many uniform or varying locations a type
+ * might occupy.
+ */
+ unsigned component_slots() const;
+
+ /**
+ * Calculate offset between the base location of the struct in
+ * uniform storage and a struct member.
+ * For the initial call, length is the index of the member to find the
+ * offset for.
+ */
+ unsigned record_location_offset(unsigned length) const;
+
+ /**
+ * Calculate the number of unique values from glGetUniformLocation for the
+ * elements of the type.
+ *
+ * This is used to allocate slots in the UniformRemapTable, the amount of
+ * locations may not match with actual used storage space by the driver.
+ */
+ unsigned uniform_locations() const;
+
+ /**
+ * Calculate the number of attribute slots required to hold this type
+ *
+ * This implements the language rules of GLSL 1.50 for counting the number
+ * of slots used by a vertex attribute. It also determines the number of
+ * varying slots the type will use up in the absence of varying packing
+ * (and thus, it can be used to measure the number of varying slots used by
+ * the varyings that are generated by lower_packed_varyings).
+ *
+ * For vertex shader attributes - doubles only take one slot.
+ * For inter-shader varyings - dvec3/dvec4 take two slots.
+ */
+ unsigned count_attribute_slots(bool vertex_input_slots) const;
+
+ /**
+ * Alignment in bytes of the start of this type in a std140 uniform
+ * block.
+ */
+ unsigned std140_base_alignment(bool row_major) const;
+
+ /** Size in bytes of this type in a std140 uniform block.
+ *
+ * Note that this is not GL_UNIFORM_SIZE (which is the number of
+ * elements in the array)
+ */
+ unsigned std140_size(bool row_major) const;
+
+ /**
+ * Alignment in bytes of the start of this type in a std430 shader
+ * storage block.
+ */
+ unsigned std430_base_alignment(bool row_major) const;
+
+ /**
+ * Calculate array stride in bytes of this type in a std430 shader storage
+ * block.
+ */
+ unsigned std430_array_stride(bool row_major) const;
+
+ /**
+ * Size in bytes of this type in a std430 shader storage block.
+ *
+ * Note that this is not GL_BUFFER_SIZE
+ */
+ unsigned std430_size(bool row_major) const;
+
+ /**
+ * \brief Can this type be implicitly converted to another?
+ *
+ * \return True if the types are identical or if this type can be converted
+ * to \c desired according to Section 4.1.10 of the GLSL spec.
+ *
+ * \verbatim
+ * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10
+ * Implicit Conversions:
+ *
+ * In some situations, an expression and its type will be implicitly
+ * converted to a different type. The following table shows all allowed
+ * implicit conversions:
+ *
+ * Type of expression | Can be implicitly converted to
+ * --------------------------------------------------
+ * int float
+ * uint
+ *
+ * ivec2 vec2
+ * uvec2
+ *
+ * ivec3 vec3
+ * uvec3
+ *
+ * ivec4 vec4
+ * uvec4
+ *
+ * There are no implicit array or structure conversions. For example,
+ * an array of int cannot be implicitly converted to an array of float.
+ * There are no implicit conversions between signed and unsigned
+ * integers.
+ * \endverbatim
+ */
+ bool can_implicitly_convert_to(const glsl_type *desired,
+ _mesa_glsl_parse_state *state) const;
+
+ /**
+ * Query whether or not a type is a scalar (non-vector and non-matrix).
+ */
+ bool is_scalar() const
+ {
+ return (vector_elements == 1)
+ && (base_type >= GLSL_TYPE_UINT)
+ && (base_type <= GLSL_TYPE_BOOL);
+ }
+
+ /**
+ * Query whether or not a type is a vector
+ */
+ bool is_vector() const
+ {
+ return (vector_elements > 1)
+ && (matrix_columns == 1)
+ && (base_type >= GLSL_TYPE_UINT)
+ && (base_type <= GLSL_TYPE_BOOL);
+ }
+
+ /**
+ * Query whether or not a type is a matrix
+ */
+ bool is_matrix() const
+ {
+ /* GLSL only has float matrices. */
+ return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE);
+ }
+
+ /**
+ * Query whether or not a type is a non-array numeric type
+ */
+ bool is_numeric() const
+ {
+ return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE);
+ }
+
+ /**
+ * Query whether or not a type is an integral type
+ */
+ bool is_integer() const
+ {
+ return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT);
+ }
+
+ /**
+ * Query whether or not type is an integral type, or for struct and array
+ * types, contains an integral type.
+ */
+ bool contains_integer() const;
+
+ /**
+ * Query whether or not type is a double type, or for struct and array
+ * types, contains a double type.
+ */
+ bool contains_double() const;
+
+ /**
+ * Query whether or not a type is a float type
+ */
+ bool is_float() const
+ {
+ return base_type == GLSL_TYPE_FLOAT;
+ }
+
+ /**
+ * Query whether or not a type is a double type
+ */
+ bool is_double() const
+ {
+ return base_type == GLSL_TYPE_DOUBLE;
+ }
+
+ /**
+ * Query whether a double takes two slots.
+ */
+ bool is_dual_slot_double() const
+ {
+ return base_type == GLSL_TYPE_DOUBLE && vector_elements > 2;
+ }
+
+ /**
+ * Query whether or not a type is a non-array boolean type
+ */
+ bool is_boolean() const
+ {
+ return base_type == GLSL_TYPE_BOOL;
+ }
+
+ /**
+ * Query whether or not a type is a sampler
+ */
+ bool is_sampler() const
+ {
+ return base_type == GLSL_TYPE_SAMPLER;
+ }
+
+ /**
+ * Query whether or not type is a sampler, or for struct and array
+ * types, contains a sampler.
+ */
+ bool contains_sampler() const;
+
+ /**
+ * Get the Mesa texture target index for a sampler type.
+ */
+ gl_texture_index sampler_index() const;
+
+ /**
+ * Query whether or not type is an image, or for struct and array
+ * types, contains an image.
+ */
+ bool contains_image() const;
+
+ /**
+ * Query whether or not a type is an image
+ */
+ bool is_image() const
+ {
+ return base_type == GLSL_TYPE_IMAGE;
+ }
+
+ /**
+ * Query whether or not a type is an array
+ */
+ bool is_array() const
+ {
+ return base_type == GLSL_TYPE_ARRAY;
+ }
+
+ bool is_array_of_arrays() const
+ {
+ return is_array() && fields.array->is_array();
+ }
+
+ /**
+ * Query whether or not a type is a record
+ */
+ bool is_record() const
+ {
+ return base_type == GLSL_TYPE_STRUCT;
+ }
+
+ /**
+ * Query whether or not a type is an interface
+ */
+ bool is_interface() const
+ {
+ return base_type == GLSL_TYPE_INTERFACE;
+ }
+
+ /**
+ * Query whether or not a type is the void type singleton.
+ */
+ bool is_void() const
+ {
+ return base_type == GLSL_TYPE_VOID;
+ }
+
+ /**
+ * Query whether or not a type is the error type singleton.
+ */
+ bool is_error() const
+ {
+ return base_type == GLSL_TYPE_ERROR;
+ }
+
+ /**
+ * Query if a type is unnamed/anonymous (named by the parser)
+ */
+
+ bool is_subroutine() const
+ {
+ return base_type == GLSL_TYPE_SUBROUTINE;
+ }
+ bool contains_subroutine() const;
+
+ bool is_anonymous() const
+ {
+ return !strncmp(name, "#anon", 5);
+ }
+
+ /**
+ * Get the type stripped of any arrays
+ *
+ * \return
+ * Pointer to the type of elements of the first non-array type for array
+ * types, or pointer to itself for non-array types.
+ */
+ const glsl_type *without_array() const
+ {
+ const glsl_type *t = this;
+
+ while (t->is_array())
+ t = t->fields.array;
+
+ return t;
+ }
+
+ /**
+ * Return the total number of elements in an array including the elements
+ * in arrays of arrays.
+ */
+ unsigned arrays_of_arrays_size() const
+ {
+ if (!is_array())
+ return 0;
+
+ unsigned size = length;
+ const glsl_type *base_type = fields.array;
+
+ while (base_type->is_array()) {
+ size = size * base_type->length;
+ base_type = base_type->fields.array;
+ }
+ return size;
+ }
+
+ /**
+ * Return the amount of atomic counter storage required for a type.
+ */
+ unsigned atomic_size() const
+ {
+ if (base_type == GLSL_TYPE_ATOMIC_UINT)
+ return ATOMIC_COUNTER_SIZE;
+ else if (is_array())
+ return length * fields.array->atomic_size();
+ else
+ return 0;
+ }
+
+ /**
+ * Return whether a type contains any atomic counters.
+ */
+ bool contains_atomic() const
+ {
+ return atomic_size() > 0;
+ }
+
+ /**
+ * Return whether a type contains any opaque types.
+ */
+ bool contains_opaque() const;
+
+ /**
+ * Query the full type of a matrix row
+ *
+ * \return
+ * If the type is not a matrix, \c glsl_type::error_type is returned.
+ * Otherwise a type matching the rows of the matrix is returned.
+ */
+ const glsl_type *row_type() const
+ {
+ return is_matrix()
+ ? get_instance(base_type, matrix_columns, 1)
+ : error_type;
+ }
+
+ /**
+ * Query the full type of a matrix column
+ *
+ * \return
+ * If the type is not a matrix, \c glsl_type::error_type is returned.
+ * Otherwise a type matching the columns of the matrix is returned.
+ */
+ const glsl_type *column_type() const
+ {
+ return is_matrix()
+ ? get_instance(base_type, vector_elements, 1)
+ : error_type;
+ }
+
+ /**
+ * Get the type of a structure field
+ *
+ * \return
+ * Pointer to the type of the named field. If the type is not a structure
+ * or the named field does not exist, \c glsl_type::error_type is returned.
+ */
+ const glsl_type *field_type(const char *name) const;
+
+ /**
+ * Get the location of a field within a record type
+ */
+ int field_index(const char *name) const;
+
+ /**
+ * Query the number of elements in an array type
+ *
+ * \return
+ * The number of elements in the array for array types or -1 for non-array
+ * types. If the number of elements in the array has not yet been declared,
+ * zero is returned.
+ */
+ int array_size() const
+ {
+ return is_array() ? length : -1;
+ }
+
+ /**
+ * Query whether the array size for all dimensions has been declared.
+ */
+ bool is_unsized_array() const
+ {
+ return is_array() && length == 0;
+ }
+
+ /**
+ * Return the number of coordinate components needed for this
+ * sampler or image type.
+ *
+ * This is based purely on the sampler's dimensionality. For example, this
+ * returns 1 for sampler1D, and 3 for sampler2DArray.
+ *
+ * Note that this is often different than actual coordinate type used in
+ * a texturing built-in function, since those pack additional values (such
+ * as the shadow comparitor or projector) into the coordinate type.
+ */
+ int coordinate_components() const;
+
+ /**
+ * Compare a record type against another record type.
+ *
+ * This is useful for matching record types declared across shader stages.
+ */
+ bool record_compare(const glsl_type *b) const;
+
+ private:
+
+ static mtx_t mutex;
+
+ /**
+ * ralloc context for all glsl_type allocations
+ *
+ * Set on the first call to \c glsl_type::new.
+ */
+ static void *mem_ctx;
+
+ void init_ralloc_type_ctx(void);
+
+ /** Constructor for vector and matrix types */
+ glsl_type(GLenum gl_type,
+ glsl_base_type base_type, unsigned vector_elements,
+ unsigned matrix_columns, const char *name);
+
+ /** Constructor for sampler or image types */
+ glsl_type(GLenum gl_type, glsl_base_type base_type,
+ enum glsl_sampler_dim dim, bool shadow, bool array,
+ unsigned type, const char *name);
+
+ /** Constructor for record types */
+ glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+ const char *name);
+
+ /** Constructor for interface types */
+ glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+ enum glsl_interface_packing packing, const char *name);
+
++ /** Constructor for interface types */
++ glsl_type(const glsl_type *return_type,
++ const glsl_function_param *params, unsigned num_params);
++
+ /** Constructor for array types */
+ glsl_type(const glsl_type *array, unsigned length);
+
+ /** Constructor for subroutine types */
+ glsl_type(const char *name);
+
+ /** Hash table containing the known array types. */
+ static struct hash_table *array_types;
+
+ /** Hash table containing the known record types. */
+ static struct hash_table *record_types;
+
+ /** Hash table containing the known interface types. */
+ static struct hash_table *interface_types;
+
+ /** Hash table containing the known subroutine types. */
+ static struct hash_table *subroutine_types;
+
++ /** Hash table containing the known function types. */
++ static struct hash_table *function_types;
++
+ static bool record_key_compare(const void *a, const void *b);
+ static unsigned record_key_hash(const void *key);
+
+ /**
+ * \name Built-in type flyweights
+ */
+ /*@{*/
+ #undef DECL_TYPE
+ #define DECL_TYPE(NAME, ...) static const glsl_type _##NAME##_type;
+ #undef STRUCT_TYPE
+ #define STRUCT_TYPE(NAME) static const glsl_type _struct_##NAME##_type;
+ #include "compiler/builtin_type_macros.h"
+ /*@}*/
+
+ /**
+ * \name Friend functions.
+ *
+ * These functions are friends because they must have C linkage and the
+ * need to call various private methods or access various private static
+ * data.
+ */
+ /*@{*/
+ friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *);
+ friend void _mesa_glsl_release_types(void);
+ /*@}*/
+ };
+
++#undef DECL_TYPE
++#undef STRUCT_TYPE
++#endif /* __cplusplus */
++
+ struct glsl_struct_field {
+ const struct glsl_type *type;
+ const char *name;
+
+ /**
+ * For interface blocks, gl_varying_slot corresponding to the input/output
+ * if this is a built-in input/output (i.e. a member of the built-in
+ * gl_PerVertex interface block); -1 otherwise.
+ *
+ * Ignored for structs.
+ */
+ int location;
+
+ /**
+ * For interface blocks, the interpolation mode (as in
+ * ir_variable::interpolation). 0 otherwise.
+ */
+ unsigned interpolation:2;
+
+ /**
+ * For interface blocks, 1 if this variable uses centroid interpolation (as
+ * in ir_variable::centroid). 0 otherwise.
+ */
+ unsigned centroid:1;
+
+ /**
+ * For interface blocks, 1 if this variable uses sample interpolation (as
+ * in ir_variable::sample). 0 otherwise.
+ */
+ unsigned sample:1;
+
+ /**
+ * Layout of the matrix. Uses glsl_matrix_layout values.
+ */
+ unsigned matrix_layout:2;
+
+ /**
+ * For interface blocks, 1 if this variable is a per-patch input or output
+ * (as in ir_variable::patch). 0 otherwise.
+ */
+ unsigned patch:1;
+
+ /**
+ * Precision qualifier
+ */
+ unsigned precision:2;
+
+ /**
+ * Image qualifiers, applicable to buffer variables defined in shader
+ * storage buffer objects (SSBOs)
+ */
+ unsigned image_read_only:1;
+ unsigned image_write_only:1;
+ unsigned image_coherent:1;
+ unsigned image_volatile:1;
+ unsigned image_restrict:1;
+
++#ifdef __cplusplus
+ glsl_struct_field(const struct glsl_type *_type, const char *_name)
+ : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
+ sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+ precision(GLSL_PRECISION_NONE)
+ {
+ /* empty */
+ }
+
+ glsl_struct_field()
+ {
+ /* empty */
+ }
++#endif
++};
++
++struct glsl_function_param {
++ const struct glsl_type *type;
++
++ bool in;
++ bool out;
+ };
+
+ static inline unsigned int
+ glsl_align(unsigned int a, unsigned int align)
+ {
+ return (a + align - 1) / align * align;
+ }
+
+ #endif /* GLSL_TYPES_H */
--- /dev/null
+ NIR_GENERATED_FILES = \
+ nir_builder_opcodes.h \
+ nir_constant_expressions.c \
+ nir_opcodes.c \
+ nir_opcodes.h \
+ nir_opt_algebraic.c
+
+ NIR_FILES = \
+ glsl_to_nir.cpp \
+ glsl_to_nir.h \
+ nir.c \
+ nir.h \
+ nir_array.h \
+ nir_builder.h \
+ nir_clone.c \
+ nir_constant_expressions.h \
+ nir_control_flow.c \
+ nir_control_flow.h \
+ nir_control_flow_private.h \
+ nir_dominance.c \
+ nir_from_ssa.c \
++ nir_gather_info.c \
+ nir_gs_count_vertices.c \
++ nir_inline_functions.c \
+ nir_intrinsics.c \
+ nir_intrinsics.h \
+ nir_instr_set.c \
+ nir_instr_set.h \
+ nir_liveness.c \
+ nir_lower_alu_to_scalar.c \
+ nir_lower_atomics.c \
+ nir_lower_clip.c \
+ nir_lower_global_vars_to_local.c \
+ nir_lower_gs_intrinsics.c \
++ nir_lower_indirect_derefs.c \
+ nir_lower_load_const_to_scalar.c \
+ nir_lower_locals_to_regs.c \
+ nir_lower_idiv.c \
+ nir_lower_io.c \
+ nir_lower_outputs_to_temporaries.c \
+ nir_lower_phis_to_scalar.c \
++ nir_lower_returns.c \
+ nir_lower_samplers.c \
+ nir_lower_system_values.c \
+ nir_lower_tex.c \
+ nir_lower_to_source_mods.c \
+ nir_lower_two_sided_color.c \
+ nir_lower_vars_to_ssa.c \
+ nir_lower_var_copies.c \
+ nir_lower_vec_to_movs.c \
+ nir_metadata.c \
+ nir_move_vec_src_uses_to_dest.c \
+ nir_normalize_cubemap_coords.c \
+ nir_opt_constant_folding.c \
+ nir_opt_copy_propagate.c \
+ nir_opt_cse.c \
+ nir_opt_dce.c \
+ nir_opt_dead_cf.c \
+ nir_opt_gcm.c \
+ nir_opt_global_to_local.c \
+ nir_opt_peephole_select.c \
+ nir_opt_remove_phis.c \
+ nir_opt_undef.c \
++ nir_phi_builder.c \
++ nir_phi_builder.h \
+ nir_print.c \
+ nir_remove_dead_variables.c \
++ nir_repair_ssa.c \
+ nir_search.c \
+ nir_search.h \
+ nir_split_var_copies.c \
+ nir_sweep.c \
+ nir_to_ssa.c \
+ nir_validate.c \
+ nir_vla.h \
+ nir_worklist.c \
+ nir_worklist.h
+
++SPIRV_FILES = \
++ spirv/nir_spirv.h \
++ spirv/spirv_to_nir.c \
++ spirv/vtn_alu.c \
++ spirv/vtn_cfg.c \
++ spirv/vtn_glsl450.c \
++ spirv/vtn_private.h \
++ spirv/vtn_variables.c
++
--- /dev/null
- nir_visitor(nir_shader *shader);
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "glsl_to_nir.h"
+ #include "nir_control_flow.h"
+ #include "nir_builder.h"
+ #include "compiler/glsl/ir_visitor.h"
+ #include "compiler/glsl/ir_hierarchical_visitor.h"
+ #include "compiler/glsl/ir.h"
+ #include "main/imports.h"
+
+ /*
+ * pass to lower GLSL IR to NIR
+ *
+ * This will lower variable dereferences to loads/stores of corresponding
+ * variables in NIR - the variables will be converted to registers in a later
+ * pass.
+ */
+
+ namespace {
+
+ class nir_visitor : public ir_visitor
+ {
+ public:
- nir_visitor v1(shader);
++ nir_visitor(nir_shader *shader, gl_shader *sh);
+ ~nir_visitor();
+
+ virtual void visit(ir_variable *);
+ virtual void visit(ir_function *);
+ virtual void visit(ir_function_signature *);
+ virtual void visit(ir_loop *);
+ virtual void visit(ir_if *);
+ virtual void visit(ir_discard *);
+ virtual void visit(ir_loop_jump *);
+ virtual void visit(ir_return *);
+ virtual void visit(ir_call *);
+ virtual void visit(ir_assignment *);
+ virtual void visit(ir_emit_vertex *);
+ virtual void visit(ir_end_primitive *);
+ virtual void visit(ir_expression *);
+ virtual void visit(ir_swizzle *);
+ virtual void visit(ir_texture *);
+ virtual void visit(ir_constant *);
+ virtual void visit(ir_dereference_variable *);
+ virtual void visit(ir_dereference_record *);
+ virtual void visit(ir_dereference_array *);
+ virtual void visit(ir_barrier *);
+
+ void create_function(ir_function_signature *ir);
+
+ private:
+ void add_instr(nir_instr *instr, unsigned num_components);
+ nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
+
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+ nir_ssa_def *src2);
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+ nir_ssa_def *src2, nir_ssa_def *src3);
+
+ bool supports_ints;
+
++ struct gl_shader *sh;
++
+ nir_shader *shader;
+ nir_function_impl *impl;
+ nir_builder b;
+ nir_ssa_def *result; /* result of the expression tree last visited */
+
+ nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+
+ /* the head of the dereference chain we're creating */
+ nir_deref_var *deref_head;
+ /* the tail of the dereference chain we're creating */
+ nir_deref *deref_tail;
+
+ nir_variable *var; /* variable created by ir_variable visitor */
+
+ /* whether the IR we're operating on is per-function or global */
+ bool is_global;
+
+ /* map of ir_variable -> nir_variable */
+ struct hash_table *var_table;
+
+ /* map of ir_function_signature -> nir_function_overload */
+ struct hash_table *overload_table;
+ };
+
+ /*
+ * This visitor runs before the main visitor, calling create_function() for
+ * each function so that the main visitor can resolve forward references in
+ * calls.
+ */
+
+ class nir_function_visitor : public ir_hierarchical_visitor
+ {
+ public:
+ nir_function_visitor(nir_visitor *v) : visitor(v)
+ {
+ }
+ virtual ir_visitor_status visit_enter(ir_function *);
+
+ private:
+ nir_visitor *visitor;
+ };
+
+ }; /* end of anonymous namespace */
+
+ nir_shader *
+ glsl_to_nir(const struct gl_shader_program *shader_prog,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options)
+ {
+ struct gl_shader *sh = shader_prog->_LinkedShaders[stage];
+
+ nir_shader *shader = nir_shader_create(NULL, stage, options);
+
- nir_lower_outputs_to_temporaries(shader);
++ nir_visitor v1(shader, sh);
+ nir_function_visitor v2(&v1);
+ v2.run(sh->ir);
+ visit_exec_list(sh->ir, &v1);
+
-nir_visitor::nir_visitor(nir_shader *shader)
++ nir_function *main = NULL;
++ nir_foreach_function(shader, func) {
++ if (strcmp(func->name, "main") == 0) {
++ main = func;
++ break;
++ }
++ }
++ assert(main);
++
++ nir_lower_outputs_to_temporaries(shader, main);
+
+ shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
+ if (shader_prog->Label)
+ shader->info.label = ralloc_strdup(shader, shader_prog->Label);
+ shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed);
+ shader->info.num_ubos = sh->NumUniformBlocks;
+ shader->info.num_abos = shader_prog->NumAtomicBuffers;
+ shader->info.num_ssbos = sh->NumShaderStorageBlocks;
+ shader->info.num_images = sh->NumImages;
+ shader->info.inputs_read = sh->Program->InputsRead;
+ shader->info.outputs_written = sh->Program->OutputsWritten;
+ shader->info.patch_inputs_read = sh->Program->PatchInputsRead;
+ shader->info.patch_outputs_written = sh->Program->PatchOutputsWritten;
+ shader->info.system_values_read = sh->Program->SystemValuesRead;
+ shader->info.uses_texture_gather = sh->Program->UsesGather;
+ shader->info.uses_clip_distance_out =
+ sh->Program->ClipDistanceArraySize != 0;
+ shader->info.separate_shader = shader_prog->SeparateShader;
+ shader->info.has_transform_feedback_varyings =
+ shader_prog->TransformFeedback.NumVarying > 0;
+
+ switch (stage) {
+ case MESA_SHADER_TESS_CTRL:
+ shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
+ break;
+
+ case MESA_SHADER_GEOMETRY:
+ shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
+ shader->info.gs.output_primitive = sh->Geom.OutputType;
+ shader->info.gs.vertices_out = sh->Geom.VerticesOut;
+ shader->info.gs.invocations = sh->Geom.Invocations;
+ shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive;
+ shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams;
+ break;
+
+ case MESA_SHADER_FRAGMENT: {
+ struct gl_fragment_program *fp =
+ (struct gl_fragment_program *)sh->Program;
+
+ shader->info.fs.uses_discard = fp->UsesKill;
+ shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests;
+ shader->info.fs.depth_layout = fp->FragDepthLayout;
+ break;
+ }
+
+ case MESA_SHADER_COMPUTE: {
+ struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program;
+ shader->info.cs.local_size[0] = cp->LocalSize[0];
+ shader->info.cs.local_size[1] = cp->LocalSize[1];
+ shader->info.cs.local_size[2] = cp->LocalSize[2];
+ break;
+ }
+
+ default:
+ break; /* No stage-specific info */
+ }
+
+ return shader;
+ }
+
- case ir_unop_unpack_half_2x16_split_x:
- result = nir_unpack_half_2x16_split_x(&b, srcs[0]);
- break;
- case ir_unop_unpack_half_2x16_split_y:
- result = nir_unpack_half_2x16_split_y(&b, srcs[0]);
- break;
++nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh)
+ {
+ this->supports_ints = shader->options->native_integers;
+ this->shader = shader;
++ this->sh = sh;
+ this->is_global = true;
+ this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ }
+
+ nir_visitor::~nir_visitor()
+ {
+ _mesa_hash_table_destroy(this->var_table, NULL);
+ _mesa_hash_table_destroy(this->overload_table, NULL);
+ }
+
+ nir_deref_var *
+ nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+ {
+ ir->accept(this);
+ ralloc_steal(mem_ctx, this->deref_head);
+ return this->deref_head;
+ }
+
+ static nir_constant *
+ constant_copy(ir_constant *ir, void *mem_ctx)
+ {
+ if (ir == NULL)
+ return NULL;
+
+ nir_constant *ret = ralloc(mem_ctx, nir_constant);
+
+ unsigned total_elems = ir->type->components();
+ unsigned i;
+
+ ret->num_elements = 0;
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_UINT:
+ for (i = 0; i < total_elems; i++)
+ ret->value.u[i] = ir->value.u[i];
+ break;
+
+ case GLSL_TYPE_INT:
+ for (i = 0; i < total_elems; i++)
+ ret->value.i[i] = ir->value.i[i];
+ break;
+
+ case GLSL_TYPE_FLOAT:
+ for (i = 0; i < total_elems; i++)
+ ret->value.f[i] = ir->value.f[i];
+ break;
+
+ case GLSL_TYPE_BOOL:
+ for (i = 0; i < total_elems; i++)
+ ret->value.b[i] = ir->value.b[i];
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ ret->elements = ralloc_array(mem_ctx, nir_constant *,
+ ir->type->length);
+ ret->num_elements = ir->type->length;
+
+ i = 0;
+ foreach_in_list(ir_constant, field, &ir->components) {
+ ret->elements[i] = constant_copy(field, mem_ctx);
+ i++;
+ }
+ break;
+
+ case GLSL_TYPE_ARRAY:
+ ret->elements = ralloc_array(mem_ctx, nir_constant *,
+ ir->type->length);
+ ret->num_elements = ir->type->length;
+
+ for (i = 0; i < ir->type->length; i++)
+ ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ return ret;
+ }
+
+ void
+ nir_visitor::visit(ir_variable *ir)
+ {
+ nir_variable *var = ralloc(shader, nir_variable);
+ var->type = ir->type;
+ var->name = ralloc_strdup(var, ir->name);
+
+ var->data.read_only = ir->data.read_only;
+ var->data.centroid = ir->data.centroid;
+ var->data.sample = ir->data.sample;
+ var->data.patch = ir->data.patch;
+ var->data.invariant = ir->data.invariant;
+ var->data.location = ir->data.location;
+
+ switch(ir->data.mode) {
+ case ir_var_auto:
+ case ir_var_temporary:
+ if (is_global)
+ var->data.mode = nir_var_global;
+ else
+ var->data.mode = nir_var_local;
+ break;
+
+ case ir_var_function_in:
+ case ir_var_function_out:
+ case ir_var_function_inout:
+ case ir_var_const_in:
+ var->data.mode = nir_var_local;
+ break;
+
+ case ir_var_shader_in:
+ if (shader->stage == MESA_SHADER_FRAGMENT &&
+ ir->data.location == VARYING_SLOT_FACE) {
+ /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
+ var->data.location = SYSTEM_VALUE_FRONT_FACE;
+ var->data.mode = nir_var_system_value;
+ } else if (shader->stage == MESA_SHADER_GEOMETRY &&
+ ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
+ /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
+ var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
+ var->data.mode = nir_var_system_value;
+ } else {
+ var->data.mode = nir_var_shader_in;
+ }
+ break;
+
+ case ir_var_shader_out:
+ var->data.mode = nir_var_shader_out;
+ break;
+
+ case ir_var_uniform:
+ var->data.mode = nir_var_uniform;
+ break;
+
+ case ir_var_shader_storage:
+ var->data.mode = nir_var_shader_storage;
+ break;
+
+ case ir_var_system_value:
+ var->data.mode = nir_var_system_value;
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ var->data.interpolation = ir->data.interpolation;
+ var->data.origin_upper_left = ir->data.origin_upper_left;
+ var->data.pixel_center_integer = ir->data.pixel_center_integer;
+ var->data.explicit_location = ir->data.explicit_location;
+ var->data.explicit_index = ir->data.explicit_index;
+ var->data.explicit_binding = ir->data.explicit_binding;
+ var->data.has_initializer = ir->data.has_initializer;
+ var->data.location_frac = ir->data.location_frac;
+ var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array;
+ var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray;
+
+ switch (ir->data.depth_layout) {
+ case ir_depth_layout_none:
+ var->data.depth_layout = nir_depth_layout_none;
+ break;
+ case ir_depth_layout_any:
+ var->data.depth_layout = nir_depth_layout_any;
+ break;
+ case ir_depth_layout_greater:
+ var->data.depth_layout = nir_depth_layout_greater;
+ break;
+ case ir_depth_layout_less:
+ var->data.depth_layout = nir_depth_layout_less;
+ break;
+ case ir_depth_layout_unchanged:
+ var->data.depth_layout = nir_depth_layout_unchanged;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ var->data.index = ir->data.index;
++ var->data.descriptor_set = 0;
+ var->data.binding = ir->data.binding;
+ var->data.offset = ir->data.offset;
+ var->data.image.read_only = ir->data.image_read_only;
+ var->data.image.write_only = ir->data.image_write_only;
+ var->data.image.coherent = ir->data.image_coherent;
+ var->data.image._volatile = ir->data.image_volatile;
+ var->data.image.restrict_flag = ir->data.image_restrict;
+ var->data.image.format = ir->data.image_format;
+ var->data.max_array_access = ir->data.max_array_access;
+
+ var->num_state_slots = ir->get_num_state_slots();
+ if (var->num_state_slots > 0) {
+ var->state_slots = ralloc_array(var, nir_state_slot,
+ var->num_state_slots);
+
+ ir_state_slot *state_slots = ir->get_state_slots();
+ for (unsigned i = 0; i < var->num_state_slots; i++) {
+ for (unsigned j = 0; j < 5; j++)
+ var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
+ var->state_slots[i].swizzle = state_slots[i].swizzle;
+ }
+ } else {
+ var->state_slots = NULL;
+ }
+
+ var->constant_initializer = constant_copy(ir->constant_initializer, var);
+
+ var->interface_type = ir->get_interface_type();
+
+ if (var->data.mode == nir_var_local)
+ nir_function_impl_add_variable(impl, var);
+ else
+ nir_shader_add_variable(shader, var);
+
+ _mesa_hash_table_insert(var_table, ir, var);
+ this->var = var;
+ }
+
+ ir_visitor_status
+ nir_function_visitor::visit_enter(ir_function *ir)
+ {
+ foreach_in_list(ir_function_signature, sig, &ir->signatures) {
+ visitor->create_function(sig);
+ }
+ return visit_continue_with_parent;
+ }
+
+ void
+ nir_visitor::create_function(ir_function_signature *ir)
+ {
+ if (ir->is_intrinsic)
+ return;
+
+ nir_function *func = nir_function_create(shader, ir->function_name());
+
+ unsigned num_params = ir->parameters.length();
+ func->num_params = num_params;
+ func->params = ralloc_array(shader, nir_parameter, num_params);
+
+ unsigned i = 0;
+ foreach_in_list(ir_variable, param, &ir->parameters) {
+ switch (param->data.mode) {
+ case ir_var_function_in:
+ func->params[i].param_type = nir_parameter_in;
+ break;
+
+ case ir_var_function_out:
+ func->params[i].param_type = nir_parameter_out;
+ break;
+
+ case ir_var_function_inout:
+ func->params[i].param_type = nir_parameter_inout;
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ func->params[i].type = param->type;
+ i++;
+ }
+
+ func->return_type = ir->return_type;
+
+ _mesa_hash_table_insert(this->overload_table, ir, func);
+ }
+
+ void
+ nir_visitor::visit(ir_function *ir)
+ {
+ foreach_in_list(ir_function_signature, sig, &ir->signatures)
+ sig->accept(this);
+ }
+
+ void
+ nir_visitor::visit(ir_function_signature *ir)
+ {
+ if (ir->is_intrinsic)
+ return;
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(this->overload_table, ir);
+
+ assert(entry);
+ nir_function *func = (nir_function *) entry->data;
+
+ if (ir->is_defined) {
+ nir_function_impl *impl = nir_function_impl_create(func);
+ this->impl = impl;
+
+ unsigned num_params = func->num_params;
+ impl->num_params = num_params;
+ impl->params = ralloc_array(this->shader, nir_variable *, num_params);
+ unsigned i = 0;
+ foreach_in_list(ir_variable, param, &ir->parameters) {
+ param->accept(this);
+ impl->params[i] = this->var;
+ i++;
+ }
+
+ if (func->return_type == glsl_type::void_type) {
+ impl->return_var = NULL;
+ } else {
+ impl->return_var = ralloc(this->shader, nir_variable);
+ impl->return_var->name = ralloc_strdup(impl->return_var,
+ "return_var");
+ impl->return_var->type = func->return_type;
+ }
+
+ this->is_global = false;
+
+ nir_builder_init(&b, impl);
+ b.cursor = nir_after_cf_list(&impl->body);
+ visit_exec_list(&ir->body, this);
+
+ this->is_global = true;
+ } else {
+ func->impl = NULL;
+ }
+ }
+
+ void
+ nir_visitor::visit(ir_loop *ir)
+ {
+ nir_loop *loop = nir_loop_create(this->shader);
+ nir_builder_cf_insert(&b, &loop->cf_node);
+
+ b.cursor = nir_after_cf_list(&loop->body);
+ visit_exec_list(&ir->body_instructions, this);
+ b.cursor = nir_after_cf_node(&loop->cf_node);
+ }
+
+ void
+ nir_visitor::visit(ir_if *ir)
+ {
+ nir_src condition =
+ nir_src_for_ssa(evaluate_rvalue(ir->condition));
+
+ nir_if *if_stmt = nir_if_create(this->shader);
+ if_stmt->condition = condition;
+ nir_builder_cf_insert(&b, &if_stmt->cf_node);
+
+ b.cursor = nir_after_cf_list(&if_stmt->then_list);
+ visit_exec_list(&ir->then_instructions, this);
+
+ b.cursor = nir_after_cf_list(&if_stmt->else_list);
+ visit_exec_list(&ir->else_instructions, this);
+
+ b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+ }
+
+ void
+ nir_visitor::visit(ir_discard *ir)
+ {
+ /*
+ * discards aren't treated as control flow, because before we lower them
+ * they can appear anywhere in the shader and the stuff after them may still
+ * be executed (yay, crazy GLSL rules!). However, after lowering, all the
+ * discards will be immediately followed by a return.
+ */
+
+ nir_intrinsic_instr *discard;
+ if (ir->condition) {
+ discard = nir_intrinsic_instr_create(this->shader,
+ nir_intrinsic_discard_if);
+ discard->src[0] =
+ nir_src_for_ssa(evaluate_rvalue(ir->condition));
+ } else {
+ discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
+ }
+
+ nir_builder_instr_insert(&b, &discard->instr);
+ }
+
+ void
+ nir_visitor::visit(ir_emit_vertex *ir)
+ {
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
+ instr->const_index[0] = ir->stream_id();
+ nir_builder_instr_insert(&b, &instr->instr);
+ }
+
+ void
+ nir_visitor::visit(ir_end_primitive *ir)
+ {
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
+ instr->const_index[0] = ir->stream_id();
+ nir_builder_instr_insert(&b, &instr->instr);
+ }
+
+ void
+ nir_visitor::visit(ir_loop_jump *ir)
+ {
+ nir_jump_type type;
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ type = nir_jump_break;
+ break;
+ case ir_loop_jump::jump_continue:
+ type = nir_jump_continue;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
+ nir_builder_instr_insert(&b, &instr->instr);
+ }
+
+ void
+ nir_visitor::visit(ir_return *ir)
+ {
+ if (ir->value != NULL) {
+ nir_intrinsic_instr *copy =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+ copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+ copy->variables[1] = evaluate_deref(©->instr, ir->value);
+ }
+
+ nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
+ nir_builder_instr_insert(&b, &instr->instr);
+ }
+
+ void
+ nir_visitor::visit(ir_call *ir)
+ {
+ if (ir->callee->is_intrinsic) {
+ nir_intrinsic_op op;
+ if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) {
+ op = nir_intrinsic_atomic_counter_read_var;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) {
+ op = nir_intrinsic_atomic_counter_inc_var;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) {
+ op = nir_intrinsic_atomic_counter_dec_var;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) {
+ op = nir_intrinsic_image_load;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) {
+ op = nir_intrinsic_image_store;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) {
+ op = nir_intrinsic_image_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) {
+ op = nir_intrinsic_image_atomic_min;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) {
+ op = nir_intrinsic_image_atomic_max;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) {
+ op = nir_intrinsic_image_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) {
+ op = nir_intrinsic_image_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) {
+ op = nir_intrinsic_image_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) {
+ op = nir_intrinsic_image_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) {
+ op = nir_intrinsic_image_atomic_comp_swap;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) {
+ op = nir_intrinsic_memory_barrier;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
+ op = nir_intrinsic_image_size;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+ op = nir_intrinsic_image_samples;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+ op = nir_intrinsic_store_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+ op = nir_intrinsic_load_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_ssbo_atomic_imin;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_ssbo_atomic_umin;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_ssbo_atomic_imax;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_ssbo_atomic_umax;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_comp_swap;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) {
+ op = nir_intrinsic_shader_clock;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_group_memory_barrier") == 0) {
+ op = nir_intrinsic_group_memory_barrier;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_atomic_counter") == 0) {
+ op = nir_intrinsic_memory_barrier_atomic_counter;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_buffer") == 0) {
+ op = nir_intrinsic_memory_barrier_buffer;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_image") == 0) {
+ op = nir_intrinsic_memory_barrier_image;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) {
+ op = nir_intrinsic_memory_barrier_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) {
+ op = nir_intrinsic_load_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) {
+ op = nir_intrinsic_store_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_shared_atomic_imin;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_shared_atomic_umin;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_shared_atomic_imax;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_shared_atomic_umax;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_comp_swap;
+ } else {
+ unreachable("not reached");
+ }
+
+ nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+ nir_dest *dest = &instr->dest;
+
+ switch (op) {
+ case nir_intrinsic_atomic_counter_read_var:
+ case nir_intrinsic_atomic_counter_inc_var:
+ case nir_intrinsic_atomic_counter_dec_var: {
+ ir_dereference *param =
+ (ir_dereference *) ir->actual_parameters.get_head();
+ instr->variables[0] = evaluate_deref(&instr->instr, param);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_samples:
+ case nir_intrinsic_image_size: {
+ nir_ssa_undef_instr *instr_undef =
+ nir_ssa_undef_instr_create(shader, 1);
+ nir_builder_instr_insert(&b, &instr_undef->instr);
+
+ /* Set the image variable dereference. */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_dereference *image = (ir_dereference *)param;
+ const glsl_type *type =
+ image->variable_referenced()->type->without_array();
+
+ instr->variables[0] = evaluate_deref(&instr->instr, image);
+ param = param->get_next();
+
+ /* Set the intrinsic destination. */
+ if (ir->return_deref) {
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[instr->intrinsic];
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ info->dest_components, NULL);
+ }
+
+ if (op == nir_intrinsic_image_size ||
+ op == nir_intrinsic_image_samples) {
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+
+ /* Set the address argument, extending the coordinate vector to four
+ * components.
+ */
+ nir_ssa_def *src_addr =
+ evaluate_rvalue((ir_dereference *)param);
+ nir_ssa_def *srcs[4];
+
+ for (int i = 0; i < 4; i++) {
+ if (i < type->coordinate_components())
+ srcs[i] = nir_channel(&b, src_addr, i);
+ else
+ srcs[i] = &instr_undef->def;
+ }
+
+ instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
+ param = param->get_next();
+
+ /* Set the sample argument, which is undefined for single-sample
+ * images.
+ */
+ if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
+ instr->src[1] =
+ nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+ param = param->get_next();
+ } else {
+ instr->src[1] = nir_src_for_ssa(&instr_undef->def);
+ }
+
+ /* Set the intrinsic parameters. */
+ if (!param->is_tail_sentinel()) {
+ instr->src[2] =
+ nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+ param = param->get_next();
+ }
+
+ if (!param->is_tail_sentinel()) {
+ instr->src[3] =
+ nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+ param = param->get_next();
+ }
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier_atomic_counter:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared:
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ case nir_intrinsic_shader_clock:
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ case nir_intrinsic_store_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+ instr->const_index[0] = write_mask->value.u[0];
+ instr->num_components = val->type->vector_elements;
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_load_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ type->vector_elements, NULL);
+
+ /* Insert the created nir instruction now since in the case of boolean
+ * result we will need to emit another instruction after it
+ */
+ nir_builder_instr_insert(&b, &instr->instr);
+
+ /*
+ * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+ * consider a true boolean to be ~0. Fix this up with a != 0
+ * comparison.
+ */
+ if (type->base_type == GLSL_TYPE_BOOL) {
+ nir_alu_instr *load_ssbo_compare =
+ nir_alu_instr_create(shader, nir_op_ine);
+ load_ssbo_compare->src[0].src.is_ssa = true;
+ load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+ load_ssbo_compare->src[1].src =
+ nir_src_for_ssa(nir_imm_int(&b, 0));
+ for (unsigned i = 0; i < type->vector_elements; i++)
+ load_ssbo_compare->src[1].swizzle[i] = 0;
+ nir_ssa_dest_init(&load_ssbo_compare->instr,
+ &load_ssbo_compare->dest.dest,
+ type->vector_elements, NULL);
+ load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+ nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
+ dest = &load_ssbo_compare->dest.dest;
+ }
+ break;
+ }
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 3 || param_count == 4);
+
+ /* Block index */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* Offset */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 4) {
+ assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_load_shared: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ instr->const_index[0] = 0;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ type->vector_elements, NULL);
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_store_shared: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ instr->const_index[0] = 0;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ instr->const_index[1] = write_mask->value.u[0];
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+ instr->num_components = val->type->vector_elements;
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 2 || param_count == 3);
+
+ /* Offset */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 3) {
+ assert(op == nir_intrinsic_shared_atomic_comp_swap);
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] =
+ nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ default:
+ unreachable("not reached");
+ }
+
+ if (ir->return_deref) {
+ nir_intrinsic_instr *store_instr =
+ nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+ store_instr->num_components = ir->return_deref->type->vector_elements;
+ store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
+
+ store_instr->variables[0] =
+ evaluate_deref(&store_instr->instr, ir->return_deref);
+ store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
+
+ nir_builder_instr_insert(&b, &store_instr->instr);
+ }
+
+ return;
+ }
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(this->overload_table, ir->callee);
+ assert(entry);
+ nir_function *callee = (nir_function *) entry->data;
+
+ nir_call_instr *instr = nir_call_instr_create(this->shader, callee);
+
+ unsigned i = 0;
+ foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
+ instr->params[i] = evaluate_deref(&instr->instr, param);
+ i++;
+ }
+
+ instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
+ nir_builder_instr_insert(&b, &instr->instr);
+ }
+
+ void
+ nir_visitor::visit(ir_assignment *ir)
+ {
+ unsigned num_components = ir->lhs->type->vector_elements;
+
+ if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
+ (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
+ /* We're doing a plain-as-can-be copy, so emit a copy_var */
+ nir_intrinsic_instr *copy =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+ copy->variables[0] = evaluate_deref(©->instr, ir->lhs);
+ copy->variables[1] = evaluate_deref(©->instr, ir->rhs);
+
+ if (ir->condition) {
+ nir_if *if_stmt = nir_if_create(this->shader);
+ if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
+ nir_builder_cf_insert(&b, &if_stmt->cf_node);
+ nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr);
+ b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+ } else {
+ nir_builder_instr_insert(&b, ©->instr);
+ }
+ return;
+ }
+
+ assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());
+
+ ir->lhs->accept(this);
+ nir_deref_var *lhs_deref = this->deref_head;
+ nir_ssa_def *src = evaluate_rvalue(ir->rhs);
+
+ if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
+ /* GLSL IR will give us the input to the write-masked assignment in a
+ * single packed vector. So, for example, if the writemask is xzw, then
+ * we have to swizzle x -> x, y -> z, and z -> w and get the y component
+ * from the load.
+ */
+ unsigned swiz[4];
+ unsigned component = 0;
+ for (unsigned i = 0; i < 4; i++) {
+ swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;
+ }
+ src = nir_swizzle(&b, src, swiz, num_components, !supports_ints);
+ }
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
+ store->num_components = ir->lhs->type->vector_elements;
+ store->const_index[0] = ir->write_mask;
+ nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
+ store->variables[0] = nir_deref_as_var(store_deref);
+ store->src[0] = nir_src_for_ssa(src);
+
+ if (ir->condition) {
+ nir_if *if_stmt = nir_if_create(this->shader);
+ if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
+ nir_builder_cf_insert(&b, &if_stmt->cf_node);
+ nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr);
+ b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+ } else {
+ nir_builder_instr_insert(&b, &store->instr);
+ }
+ }
+
+ /*
+ * Given an instruction, returns a pointer to its destination or NULL if there
+ * is no destination.
+ *
+ * Note that this only handles instructions we generate at this level.
+ */
+ static nir_dest *
+ get_instr_dest(nir_instr *instr)
+ {
+ nir_alu_instr *alu_instr;
+ nir_intrinsic_instr *intrinsic_instr;
+ nir_tex_instr *tex_instr;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ alu_instr = nir_instr_as_alu(instr);
+ return &alu_instr->dest.dest;
+
+ case nir_instr_type_intrinsic:
+ intrinsic_instr = nir_instr_as_intrinsic(instr);
+ if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
+ return &intrinsic_instr->dest;
+ else
+ return NULL;
+
+ case nir_instr_type_tex:
+ tex_instr = nir_instr_as_tex(instr);
+ return &tex_instr->dest;
+
+ default:
+ unreachable("not reached");
+ }
+
+ return NULL;
+ }
+
+ void
+ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
+ {
+ nir_dest *dest = get_instr_dest(instr);
+
+ if (dest)
+ nir_ssa_dest_init(instr, dest, num_components, NULL);
+
+ nir_builder_instr_insert(&b, instr);
+
+ if (dest) {
+ assert(dest->is_ssa);
+ this->result = &dest->ssa;
+ }
+ }
+
+ nir_ssa_def *
+ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
+ {
+ ir->accept(this);
+ if (ir->as_dereference() || ir->as_constant()) {
+ /*
+ * A dereference is being used on the right hand side, which means we
+ * must emit a variable load.
+ */
+
+ nir_intrinsic_instr *load_instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+ load_instr->num_components = ir->type->vector_elements;
+ load_instr->variables[0] = this->deref_head;
+ ralloc_steal(load_instr, load_instr->variables[0]);
+ add_instr(&load_instr->instr, ir->type->vector_elements);
+ }
+
+ return this->result;
+ }
+
+ void
+ nir_visitor::visit(ir_expression *ir)
+ {
+ /* Some special cases */
+ switch (ir->operation) {
+ case ir_binop_ubo_load: {
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
+ load->num_components = ir->type->vector_elements;
+ load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
+ load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+ add_instr(&load->instr, ir->type->vector_elements);
+
+ /*
+ * In UBO's, a true boolean value is any non-zero value, but we consider
+ * a true boolean to be ~0. Fix this up with a != 0 comparison.
+ */
+
+ if (ir->type->base_type == GLSL_TYPE_BOOL)
+ this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0));
+
+ return;
+ }
+
+ case ir_unop_interpolate_at_centroid:
+ case ir_binop_interpolate_at_offset:
+ case ir_binop_interpolate_at_sample: {
+ ir_dereference *deref = ir->operands[0]->as_dereference();
+ ir_swizzle *swizzle = NULL;
+ if (!deref) {
+ /* the api does not allow a swizzle here, but the varying packing code
+ * may have pushed one into here.
+ */
+ swizzle = ir->operands[0]->as_swizzle();
+ assert(swizzle);
+ deref = swizzle->val->as_dereference();
+ assert(deref);
+ }
+
+ deref->accept(this);
+
+ nir_intrinsic_op op;
+ if (this->deref_head->var->data.mode == nir_var_shader_in) {
+ switch (ir->operation) {
+ case ir_unop_interpolate_at_centroid:
+ op = nir_intrinsic_interp_var_at_centroid;
+ break;
+ case ir_binop_interpolate_at_offset:
+ op = nir_intrinsic_interp_var_at_offset;
+ break;
+ case ir_binop_interpolate_at_sample:
+ op = nir_intrinsic_interp_var_at_sample;
+ break;
+ default:
+ unreachable("Invalid interpolation intrinsic");
+ }
+ } else {
+ /* This case can happen if the vertex shader does not write the
+ * given varying. In this case, the linker will lower it to a
+ * global variable. Since interpolating a variable makes no
+ * sense, we'll just turn it into a load which will probably
+ * eventually end up as an SSA definition.
+ */
+ assert(this->deref_head->var->data.mode == nir_var_global);
+ op = nir_intrinsic_load_var;
+ }
+
+ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
+ intrin->num_components = deref->type->vector_elements;
+ intrin->variables[0] = this->deref_head;
+ ralloc_steal(intrin, intrin->variables[0]);
+
+ if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
+ intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
+ intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+
+ add_instr(&intrin->instr, deref->type->vector_elements);
+
+ if (swizzle) {
+ unsigned swiz[4] = {
+ swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w
+ };
+
+ result = nir_swizzle(&b, result, swiz,
+ swizzle->type->vector_elements, false);
+ }
+
+ return;
+ }
+
+ default:
+ break;
+ }
+
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ srcs[i] = evaluate_rvalue(ir->operands[i]);
+
+ glsl_base_type types[4];
+ for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ if (supports_ints)
+ types[i] = ir->operands[i]->type->base_type;
+ else
+ types[i] = GLSL_TYPE_FLOAT;
+
+ glsl_base_type out_type;
+ if (supports_ints)
+ out_type = ir->type->base_type;
+ else
+ out_type = GLSL_TYPE_FLOAT;
+
+ switch (ir->operation) {
+ case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
+ case ir_unop_logic_not:
+ result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
+ break;
+ case ir_unop_neg:
+ result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0])
+ : nir_ineg(&b, srcs[0]);
+ break;
+ case ir_unop_abs:
+ result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0])
+ : nir_iabs(&b, srcs[0]);
+ break;
+ case ir_unop_saturate:
+ assert(types[0] == GLSL_TYPE_FLOAT);
+ result = nir_fsat(&b, srcs[0]);
+ break;
+ case ir_unop_sign:
+ result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0])
+ : nir_isign(&b, srcs[0]);
+ break;
+ case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break;
+ case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break;
+ case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break;
+ case ir_unop_exp: unreachable("ir_unop_exp should have been lowered");
+ case ir_unop_log: unreachable("ir_unop_log should have been lowered");
+ case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
+ case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
+ case ir_unop_i2f:
+ result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+ break;
+ case ir_unop_u2f:
+ result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+ break;
+ case ir_unop_b2f:
+ result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+ break;
+ case ir_unop_f2i: result = nir_f2i(&b, srcs[0]); break;
+ case ir_unop_f2u: result = nir_f2u(&b, srcs[0]); break;
+ case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break;
+ case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break;
+ case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break;
+ case ir_unop_i2u:
+ case ir_unop_u2i:
+ case ir_unop_bitcast_i2f:
+ case ir_unop_bitcast_f2i:
+ case ir_unop_bitcast_u2f:
+ case ir_unop_bitcast_f2u:
+ case ir_unop_subroutine_to_int:
+ /* no-op */
+ result = nir_imov(&b, srcs[0]);
+ break;
+ case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
+ case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break;
+ case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;
+ case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;
+ case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;
+ case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break;
+ case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break;
+ case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break;
+ case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break;
+ case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break;
+ case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break;
+ case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;
+ case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;
+ case ir_unop_pack_snorm_2x16:
+ result = nir_pack_snorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_pack_snorm_4x8:
+ result = nir_pack_snorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_pack_unorm_2x16:
+ result = nir_pack_unorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_pack_unorm_4x8:
+ result = nir_pack_unorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_pack_half_2x16:
+ result = nir_pack_half_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_snorm_2x16:
+ result = nir_unpack_snorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_snorm_4x8:
+ result = nir_unpack_snorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_unorm_2x16:
+ result = nir_unpack_unorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_unorm_4x8:
+ result = nir_unpack_unorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_half_2x16:
+ result = nir_unpack_half_2x16(&b, srcs[0]);
+ break;
- case ir_binop_pack_half_2x16_split:
- result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]);
- break;
+ case ir_unop_bitfield_reverse:
+ result = nir_bitfield_reverse(&b, srcs[0]);
+ break;
+ case ir_unop_bit_count:
+ result = nir_bit_count(&b, srcs[0]);
+ break;
+ case ir_unop_find_msb:
+ switch (types[0]) {
+ case GLSL_TYPE_UINT:
+ result = nir_ufind_msb(&b, srcs[0]);
+ break;
+ case GLSL_TYPE_INT:
+ result = nir_ifind_msb(&b, srcs[0]);
+ break;
+ default:
+ unreachable("Invalid type for findMSB()");
+ }
+ break;
+ case ir_unop_find_lsb:
+ result = nir_find_lsb(&b, srcs[0]);
+ break;
+
+ case ir_unop_noise:
+ switch (ir->type->vector_elements) {
+ case 1:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise1_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise1_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise1_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise1_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ case 2:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise2_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise2_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise2_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise2_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ case 3:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise3_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise3_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise3_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise3_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ case 4:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise4_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise4_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise4_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise4_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ default:
+ unreachable("not reached");
+ }
+ break;
+ case ir_unop_get_buffer_size: {
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+ this->shader,
+ nir_intrinsic_get_buffer_size);
+ load->num_components = ir->type->vector_elements;
+ load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
+ add_instr(&load->instr, ir->type->vector_elements);
+ return;
+ }
+
+ case ir_binop_add:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1])
+ : nir_iadd(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_sub:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1])
+ : nir_isub(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_mul:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1])
+ : nir_imul(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_div:
+ if (out_type == GLSL_TYPE_FLOAT)
+ result = nir_fdiv(&b, srcs[0], srcs[1]);
+ else if (out_type == GLSL_TYPE_INT)
+ result = nir_idiv(&b, srcs[0], srcs[1]);
+ else
+ result = nir_udiv(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_mod:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1])
+ : nir_umod(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_min:
+ if (out_type == GLSL_TYPE_FLOAT)
+ result = nir_fmin(&b, srcs[0], srcs[1]);
+ else if (out_type == GLSL_TYPE_INT)
+ result = nir_imin(&b, srcs[0], srcs[1]);
+ else
+ result = nir_umin(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_max:
+ if (out_type == GLSL_TYPE_FLOAT)
+ result = nir_fmax(&b, srcs[0], srcs[1]);
+ else if (out_type == GLSL_TYPE_INT)
+ result = nir_imax(&b, srcs[0], srcs[1]);
+ else
+ result = nir_umax(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;
+ case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;
+ case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
+ case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
+ case ir_binop_logic_and:
+ result = supports_ints ? nir_iand(&b, srcs[0], srcs[1])
+ : nir_fand(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_logic_or:
+ result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
+ : nir_for(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_logic_xor:
+ result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
+ : nir_fxor(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
+ case ir_binop_rshift:
+ result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1])
+ : nir_ushr(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_imul_high:
+ result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
+ : nir_umul_high(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break;
+ case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
+ case ir_binop_less:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_flt(&b, srcs[0], srcs[1]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ilt(&b, srcs[0], srcs[1]);
+ else
+ result = nir_ult(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_slt(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_greater:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_flt(&b, srcs[1], srcs[0]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ilt(&b, srcs[1], srcs[0]);
+ else
+ result = nir_ult(&b, srcs[1], srcs[0]);
+ } else {
+ result = nir_slt(&b, srcs[1], srcs[0]);
+ }
+ break;
+ case ir_binop_lequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_fge(&b, srcs[1], srcs[0]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ige(&b, srcs[1], srcs[0]);
+ else
+ result = nir_uge(&b, srcs[1], srcs[0]);
+ } else {
+ result = nir_slt(&b, srcs[1], srcs[0]);
+ }
+ break;
+ case ir_binop_gequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_fge(&b, srcs[0], srcs[1]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ige(&b, srcs[0], srcs[1]);
+ else
+ result = nir_uge(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_slt(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_equal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_feq(&b, srcs[0], srcs[1]);
+ else
+ result = nir_ieq(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_seq(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_nequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_fne(&b, srcs[0], srcs[1]);
+ else
+ result = nir_ine(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_sne(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_all_equal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT) {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_seq(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ break;
+ case ir_binop_any_nequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT) {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_sne(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ break;
+ case ir_binop_dot:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ break;
+
+ case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
+ case ir_triop_fma:
+ result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_triop_lrp:
+ result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_triop_csel:
+ if (supports_ints)
+ result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
+ else
+ result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_triop_bitfield_extract:
+ result = (out_type == GLSL_TYPE_INT) ?
+ nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) :
+ nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_quadop_bitfield_insert:
+ result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]);
+ break;
+ case ir_quadop_vector:
+ result = nir_vec(&b, srcs, ir->type->vector_elements);
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+ }
+
+ void
+ nir_visitor::visit(ir_swizzle *ir)
+ {
+ unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
+ result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
+ ir->type->vector_elements, !supports_ints);
+ }
+
+ void
+ nir_visitor::visit(ir_texture *ir)
+ {
+ unsigned num_srcs;
+ nir_texop op;
+ switch (ir->op) {
+ case ir_tex:
+ op = nir_texop_tex;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_txb:
+ case ir_txl:
+ op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
+ num_srcs = 2; /* coordinate, bias/lod */
+ break;
+
+ case ir_txd:
+ op = nir_texop_txd; /* coordinate, dPdx, dPdy */
+ num_srcs = 3;
+ break;
+
+ case ir_txf:
+ op = nir_texop_txf;
+ if (ir->lod_info.lod != NULL)
+ num_srcs = 2; /* coordinate, lod */
+ else
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_txf_ms:
+ op = nir_texop_txf_ms;
+ num_srcs = 2; /* coordinate, sample_index */
+ break;
+
+ case ir_txs:
+ op = nir_texop_txs;
+ if (ir->lod_info.lod != NULL)
+ num_srcs = 1; /* lod */
+ else
+ num_srcs = 0;
+ break;
+
+ case ir_lod:
+ op = nir_texop_lod;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_tg4:
+ op = nir_texop_tg4;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_query_levels:
+ op = nir_texop_query_levels;
+ num_srcs = 0;
+ break;
+
+ case ir_texture_samples:
+ op = nir_texop_texture_samples;
+ num_srcs = 0;
+ break;
+
+ case ir_samples_identical:
+ op = nir_texop_samples_identical;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ if (ir->projector != NULL)
+ num_srcs++;
+ if (ir->shadow_comparitor != NULL)
+ num_srcs++;
+ if (ir->offset != NULL && ir->offset->as_constant() == NULL)
+ num_srcs++;
+
+ nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
+
+ instr->op = op;
+ instr->sampler_dim =
+ (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
+ instr->is_array = ir->sampler->type->sampler_array;
+ instr->is_shadow = ir->sampler->type->sampler_shadow;
+ if (instr->is_shadow)
+ instr->is_new_style_shadow = (ir->type->vector_elements == 1);
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ instr->dest_type = nir_type_float;
+ break;
+ case GLSL_TYPE_INT:
+ instr->dest_type = nir_type_int;
+ break;
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_UINT:
+ instr->dest_type = nir_type_uint;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
+
+ unsigned src_number = 0;
+
+ if (ir->coordinate != NULL) {
+ instr->coord_components = ir->coordinate->type->vector_elements;
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->coordinate));
+ instr->src[src_number].src_type = nir_tex_src_coord;
+ src_number++;
+ }
+
+ if (ir->projector != NULL) {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->projector));
+ instr->src[src_number].src_type = nir_tex_src_projector;
+ src_number++;
+ }
+
+ if (ir->shadow_comparitor != NULL) {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparitor));
+ instr->src[src_number].src_type = nir_tex_src_comparitor;
+ src_number++;
+ }
+
+ if (ir->offset != NULL) {
+ /* we don't support multiple offsets yet */
+ assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
+
+ ir_constant *const_offset = ir->offset->as_constant();
+ if (const_offset != NULL) {
+ for (unsigned i = 0; i < const_offset->type->vector_elements; i++)
+ instr->const_offset[i] = const_offset->value.i[i];
+ } else {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->offset));
+ instr->src[src_number].src_type = nir_tex_src_offset;
+ src_number++;
+ }
+ }
+
+ switch (ir->op) {
+ case ir_txb:
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias));
+ instr->src[src_number].src_type = nir_tex_src_bias;
+ src_number++;
+ break;
+
+ case ir_txl:
+ case ir_txf:
+ case ir_txs:
+ if (ir->lod_info.lod != NULL) {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod));
+ instr->src[src_number].src_type = nir_tex_src_lod;
+ src_number++;
+ }
+ break;
+
+ case ir_txd:
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx));
+ instr->src[src_number].src_type = nir_tex_src_ddx;
+ src_number++;
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy));
+ instr->src[src_number].src_type = nir_tex_src_ddy;
+ src_number++;
+ break;
+
+ case ir_txf_ms:
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index));
+ instr->src[src_number].src_type = nir_tex_src_ms_index;
+ src_number++;
+ break;
+
+ case ir_tg4:
+ instr->component = ir->lod_info.component->as_constant()->value.u[0];
+ break;
+
+ default:
+ break;
+ }
+
+ assert(src_number == num_srcs);
+
+ add_instr(&instr->instr, nir_tex_instr_dest_size(instr));
+ }
+
+ void
+ nir_visitor::visit(ir_constant *ir)
+ {
+ /*
+ * We don't know if this variable is an an array or struct that gets
+ * dereferenced, so do the safe thing an make it a variable with a
+ * constant initializer and return a dereference.
+ */
+
+ nir_variable *var =
+ nir_local_variable_create(this->impl, ir->type, "const_temp");
+ var->data.read_only = true;
+ var->constant_initializer = constant_copy(ir, var);
+
+ this->deref_head = nir_deref_var_create(this->shader, var);
+ this->deref_tail = &this->deref_head->deref;
+ }
+
+ void
+ nir_visitor::visit(ir_dereference_variable *ir)
+ {
+ struct hash_entry *entry =
+ _mesa_hash_table_search(this->var_table, ir->var);
+ assert(entry);
+ nir_variable *var = (nir_variable *) entry->data;
+
+ nir_deref_var *deref = nir_deref_var_create(this->shader, var);
+ this->deref_head = deref;
+ this->deref_tail = &deref->deref;
+ }
+
+ void
+ nir_visitor::visit(ir_dereference_record *ir)
+ {
+ ir->record->accept(this);
+
+ int field_index = this->deref_tail->type->field_index(ir->field);
+ assert(field_index >= 0);
+
+ nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
+ deref->deref.type = ir->type;
+ this->deref_tail->child = &deref->deref;
+ this->deref_tail = &deref->deref;
+ }
+
+ void
+ nir_visitor::visit(ir_dereference_array *ir)
+ {
+ nir_deref_array *deref = nir_deref_array_create(this->shader);
+ deref->deref.type = ir->type;
+
+ ir_constant *const_index = ir->array_index->as_constant();
+ if (const_index != NULL) {
+ deref->deref_array_type = nir_deref_array_type_direct;
+ deref->base_offset = const_index->value.u[0];
+ } else {
+ deref->deref_array_type = nir_deref_array_type_indirect;
+ deref->indirect =
+ nir_src_for_ssa(evaluate_rvalue(ir->array_index));
+ }
+
+ ir->array->accept(this);
+
+ this->deref_tail->child = &deref->deref;
+ ralloc_steal(this->deref_tail, deref);
+ this->deref_tail = &deref->deref;
+ }
+
+ void
+ nir_visitor::visit(ir_barrier *ir)
+ {
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier);
+ nir_builder_instr_insert(&b, &instr->instr);
+ }
--- /dev/null
-nir_function_impl_create(nir_function *function)
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir.h"
+ #include "nir_control_flow_private.h"
+ #include <assert.h>
+
+ nir_shader *
+ nir_shader_create(void *mem_ctx,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options)
+ {
+ nir_shader *shader = ralloc(mem_ctx, nir_shader);
+
+ exec_list_make_empty(&shader->uniforms);
+ exec_list_make_empty(&shader->inputs);
+ exec_list_make_empty(&shader->outputs);
++ exec_list_make_empty(&shader->shared);
+
+ shader->options = options;
+ memset(&shader->info, 0, sizeof(shader->info));
+
+ exec_list_make_empty(&shader->functions);
+ exec_list_make_empty(&shader->registers);
+ exec_list_make_empty(&shader->globals);
+ exec_list_make_empty(&shader->system_values);
+ shader->reg_alloc = 0;
+
+ shader->num_inputs = 0;
+ shader->num_outputs = 0;
+ shader->num_uniforms = 0;
++ shader->num_shared = 0;
+
+ shader->stage = stage;
+
+ return shader;
+ }
+
+ static nir_register *
+ reg_create(void *mem_ctx, struct exec_list *list)
+ {
+ nir_register *reg = ralloc(mem_ctx, nir_register);
+
+ list_inithead(®->uses);
+ list_inithead(®->defs);
+ list_inithead(®->if_uses);
+
+ reg->num_components = 0;
+ reg->num_array_elems = 0;
+ reg->is_packed = false;
+ reg->name = NULL;
+
+ exec_list_push_tail(list, ®->node);
+
+ return reg;
+ }
+
+ nir_register *
+ nir_global_reg_create(nir_shader *shader)
+ {
+ nir_register *reg = reg_create(shader, &shader->registers);
+ reg->index = shader->reg_alloc++;
+ reg->is_global = true;
+
+ return reg;
+ }
+
+ nir_register *
+ nir_local_reg_create(nir_function_impl *impl)
+ {
+ nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
+ reg->index = impl->reg_alloc++;
+ reg->is_global = false;
+
+ return reg;
+ }
+
+ void
+ nir_reg_remove(nir_register *reg)
+ {
+ exec_node_remove(®->node);
+ }
+
+ void
+ nir_shader_add_variable(nir_shader *shader, nir_variable *var)
+ {
+ switch (var->data.mode) {
+ case nir_var_all:
+ assert(!"invalid mode");
+ break;
+
+ case nir_var_local:
+ assert(!"nir_shader_add_variable cannot be used for local variables");
+ break;
+
+ case nir_var_global:
+ exec_list_push_tail(&shader->globals, &var->node);
+ break;
+
+ case nir_var_shader_in:
+ exec_list_push_tail(&shader->inputs, &var->node);
+ break;
+
+ case nir_var_shader_out:
+ exec_list_push_tail(&shader->outputs, &var->node);
+ break;
+
+ case nir_var_uniform:
+ case nir_var_shader_storage:
+ exec_list_push_tail(&shader->uniforms, &var->node);
+ break;
+
++ case nir_var_shared:
++ assert(shader->stage == MESA_SHADER_COMPUTE);
++ exec_list_push_tail(&shader->shared, &var->node);
++ break;
++
+ case nir_var_system_value:
+ exec_list_push_tail(&shader->system_values, &var->node);
+ break;
+ }
+ }
+
+ nir_variable *
+ nir_variable_create(nir_shader *shader, nir_variable_mode mode,
+ const struct glsl_type *type, const char *name)
+ {
+ nir_variable *var = rzalloc(shader, nir_variable);
+ var->name = ralloc_strdup(var, name);
+ var->type = type;
+ var->data.mode = mode;
+
+ if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) ||
+ (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT))
+ var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+
+ if (mode == nir_var_shader_in || mode == nir_var_uniform)
+ var->data.read_only = true;
+
+ nir_shader_add_variable(shader, var);
+
+ return var;
+ }
+
+ nir_variable *
+ nir_local_variable_create(nir_function_impl *impl,
+ const struct glsl_type *type, const char *name)
+ {
+ nir_variable *var = rzalloc(impl->function->shader, nir_variable);
+ var->name = ralloc_strdup(var, name);
+ var->type = type;
+ var->data.mode = nir_var_local;
+
+ nir_function_impl_add_variable(impl, var);
+
+ return var;
+ }
+
+ nir_function *
+ nir_function_create(nir_shader *shader, const char *name)
+ {
+ nir_function *func = ralloc(shader, nir_function);
+
+ exec_list_push_tail(&shader->functions, &func->node);
+
+ func->name = ralloc_strdup(func, name);
+ func->shader = shader;
+ func->num_params = 0;
+ func->params = NULL;
+ func->return_type = glsl_void_type();
+ func->impl = NULL;
+
+ return func;
+ }
+
+ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
+ {
+ dest->is_ssa = src->is_ssa;
+ if (src->is_ssa) {
+ dest->ssa = src->ssa;
+ } else {
+ dest->reg.base_offset = src->reg.base_offset;
+ dest->reg.reg = src->reg.reg;
+ if (src->reg.indirect) {
+ dest->reg.indirect = ralloc(mem_ctx, nir_src);
+ nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+ } else {
+ dest->reg.indirect = NULL;
+ }
+ }
+ }
+
+ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
+ {
+ /* Copying an SSA definition makes no sense whatsoever. */
+ assert(!src->is_ssa);
+
+ dest->is_ssa = false;
+
+ dest->reg.base_offset = src->reg.base_offset;
+ dest->reg.reg = src->reg.reg;
+ if (src->reg.indirect) {
+ dest->reg.indirect = ralloc(instr, nir_src);
+ nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
+ } else {
+ dest->reg.indirect = NULL;
+ }
+ }
+
+ void
+ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+ nir_alu_instr *instr)
+ {
+ nir_src_copy(&dest->src, &src->src, &instr->instr);
+ dest->abs = src->abs;
+ dest->negate = src->negate;
+ for (unsigned i = 0; i < 4; i++)
+ dest->swizzle[i] = src->swizzle[i];
+ }
+
+ void
+ nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+ nir_alu_instr *instr)
+ {
+ nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
+ dest->write_mask = src->write_mask;
+ dest->saturate = src->saturate;
+ }
+
+
+ static void
+ cf_init(nir_cf_node *node, nir_cf_node_type type)
+ {
+ exec_node_init(&node->node);
+ node->parent = NULL;
+ node->type = type;
+ }
+
+ nir_function_impl *
- assert(function->impl == NULL);
-
- void *mem_ctx = ralloc_parent(function);
-
- nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl);
++nir_function_impl_create_bare(nir_shader *shader)
+ {
- function->impl = impl;
- impl->function = function;
++ nir_function_impl *impl = ralloc(shader, nir_function_impl);
+
- nir_block *start_block = nir_block_create(mem_ctx);
- nir_block *end_block = nir_block_create(mem_ctx);
++ impl->function = NULL;
+
+ cf_init(&impl->cf_node, nir_cf_node_function);
+
+ exec_list_make_empty(&impl->body);
+ exec_list_make_empty(&impl->registers);
+ exec_list_make_empty(&impl->locals);
+ impl->num_params = 0;
+ impl->params = NULL;
+ impl->return_var = NULL;
+ impl->reg_alloc = 0;
+ impl->ssa_alloc = 0;
+ impl->valid_metadata = nir_metadata_none;
+
+ /* create start & end blocks */
- instr->sampler_array_size = 0;
++ nir_block *start_block = nir_block_create(shader);
++ nir_block *end_block = nir_block_create(shader);
+ start_block->cf_node.parent = &impl->cf_node;
+ end_block->cf_node.parent = &impl->cf_node;
+ impl->end_block = end_block;
+
+ exec_list_push_tail(&impl->body, &start_block->cf_node.node);
+
+ start_block->successors[0] = end_block;
+ _mesa_set_add(end_block->predecessors, start_block);
+ return impl;
+ }
+
++nir_function_impl *
++nir_function_impl_create(nir_function *function)
++{
++ assert(function->impl == NULL);
++
++ nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
++
++ function->impl = impl;
++ impl->function = function;
++
++ impl->num_params = function->num_params;
++ impl->params = ralloc_array(function->shader,
++ nir_variable *, impl->num_params);
++
++ return impl;
++}
++
+ nir_block *
+ nir_block_create(nir_shader *shader)
+ {
+ nir_block *block = ralloc(shader, nir_block);
+
+ cf_init(&block->cf_node, nir_cf_node_block);
+
+ block->successors[0] = block->successors[1] = NULL;
+ block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ block->imm_dom = NULL;
+ /* XXX maybe it would be worth it to defer allocation? This
+ * way it doesn't get allocated for shader ref's that never run
+ * nir_calc_dominance? For example, state-tracker creates an
+ * initial IR, clones that, runs appropriate lowering pass, passes
+ * to driver which does common lowering/opt, and then stores ref
+ * which is later used to do state specific lowering and futher
+ * opt. Do any of the references not need dominance metadata?
+ */
+ block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ exec_list_make_empty(&block->instr_list);
+
+ return block;
+ }
+
+ static inline void
+ src_init(nir_src *src)
+ {
+ src->is_ssa = false;
+ src->reg.reg = NULL;
+ src->reg.indirect = NULL;
+ src->reg.base_offset = 0;
+ }
+
+ nir_if *
+ nir_if_create(nir_shader *shader)
+ {
+ nir_if *if_stmt = ralloc(shader, nir_if);
+
+ cf_init(&if_stmt->cf_node, nir_cf_node_if);
+ src_init(&if_stmt->condition);
+
+ nir_block *then = nir_block_create(shader);
+ exec_list_make_empty(&if_stmt->then_list);
+ exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
+ then->cf_node.parent = &if_stmt->cf_node;
+
+ nir_block *else_stmt = nir_block_create(shader);
+ exec_list_make_empty(&if_stmt->else_list);
+ exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
+ else_stmt->cf_node.parent = &if_stmt->cf_node;
+
+ return if_stmt;
+ }
+
+ nir_loop *
+ nir_loop_create(nir_shader *shader)
+ {
+ nir_loop *loop = ralloc(shader, nir_loop);
+
+ cf_init(&loop->cf_node, nir_cf_node_loop);
+
+ nir_block *body = nir_block_create(shader);
+ exec_list_make_empty(&loop->body);
+ exec_list_push_tail(&loop->body, &body->cf_node.node);
+ body->cf_node.parent = &loop->cf_node;
+
+ body->successors[0] = body;
+ _mesa_set_add(body->predecessors, body);
+
+ return loop;
+ }
+
+ static void
+ instr_init(nir_instr *instr, nir_instr_type type)
+ {
+ instr->type = type;
+ instr->block = NULL;
+ exec_node_init(&instr->node);
+ }
+
+ static void
+ dest_init(nir_dest *dest)
+ {
+ dest->is_ssa = false;
+ dest->reg.reg = NULL;
+ dest->reg.indirect = NULL;
+ dest->reg.base_offset = 0;
+ }
+
+ static void
+ alu_dest_init(nir_alu_dest *dest)
+ {
+ dest_init(&dest->dest);
+ dest->saturate = false;
+ dest->write_mask = 0xf;
+ }
+
+ static void
+ alu_src_init(nir_alu_src *src)
+ {
+ src_init(&src->src);
+ src->abs = src->negate = false;
+ src->swizzle[0] = 0;
+ src->swizzle[1] = 1;
+ src->swizzle[2] = 2;
+ src->swizzle[3] = 3;
+ }
+
+ nir_alu_instr *
+ nir_alu_instr_create(nir_shader *shader, nir_op op)
+ {
+ unsigned num_srcs = nir_op_infos[op].num_inputs;
+ nir_alu_instr *instr =
+ ralloc_size(shader,
+ sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
+
+ instr_init(&instr->instr, nir_instr_type_alu);
+ instr->op = op;
+ alu_dest_init(&instr->dest);
+ for (unsigned i = 0; i < num_srcs; i++)
+ alu_src_init(&instr->src[i]);
+
+ return instr;
+ }
+
+ nir_jump_instr *
+ nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
+ {
+ nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
+ instr_init(&instr->instr, nir_instr_type_jump);
+ instr->type = type;
+ return instr;
+ }
+
+ nir_load_const_instr *
+ nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
+ {
+ nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
+ instr_init(&instr->instr, nir_instr_type_load_const);
+
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+ return instr;
+ }
+
+ nir_intrinsic_instr *
+ nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
+ {
+ unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
+ nir_intrinsic_instr *instr =
+ ralloc_size(shader,
+ sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
+
+ instr_init(&instr->instr, nir_instr_type_intrinsic);
+ instr->intrinsic = op;
+
+ if (nir_intrinsic_infos[op].has_dest)
+ dest_init(&instr->dest);
+
+ for (unsigned i = 0; i < num_srcs; i++)
+ src_init(&instr->src[i]);
+
+ return instr;
+ }
+
+ nir_call_instr *
+ nir_call_instr_create(nir_shader *shader, nir_function *callee)
+ {
+ nir_call_instr *instr = ralloc(shader, nir_call_instr);
+ instr_init(&instr->instr, nir_instr_type_call);
+
+ instr->callee = callee;
+ instr->num_params = callee->num_params;
+ instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
+ instr->return_deref = NULL;
+
+ return instr;
+ }
+
+ nir_tex_instr *
+ nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
+ {
+ nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
+ instr_init(&instr->instr, nir_instr_type_tex);
+
+ dest_init(&instr->dest);
+
+ instr->num_srcs = num_srcs;
+ instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
+ for (unsigned i = 0; i < num_srcs; i++)
+ src_init(&instr->src[i].src);
+
++ instr->texture_index = 0;
++ instr->texture_array_size = 0;
++ instr->texture = NULL;
+ instr->sampler_index = 0;
+ instr->sampler = NULL;
+
+ return instr;
+ }
+
+ nir_phi_instr *
+ nir_phi_instr_create(nir_shader *shader)
+ {
+ nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
+ instr_init(&instr->instr, nir_instr_type_phi);
+
+ dest_init(&instr->dest);
+ exec_list_make_empty(&instr->srcs);
+ return instr;
+ }
+
+ nir_parallel_copy_instr *
+ nir_parallel_copy_instr_create(nir_shader *shader)
+ {
+ nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
+ instr_init(&instr->instr, nir_instr_type_parallel_copy);
+
+ exec_list_make_empty(&instr->entries);
+
+ return instr;
+ }
+
+ nir_ssa_undef_instr *
+ nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
+ {
+ nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
+ instr_init(&instr->instr, nir_instr_type_ssa_undef);
+
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+ return instr;
+ }
+
+ nir_deref_var *
+ nir_deref_var_create(void *mem_ctx, nir_variable *var)
+ {
+ nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var);
+ deref->deref.deref_type = nir_deref_type_var;
+ deref->deref.child = NULL;
+ deref->deref.type = var->type;
+ deref->var = var;
+ return deref;
+ }
+
+ nir_deref_array *
+ nir_deref_array_create(void *mem_ctx)
+ {
+ nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array);
+ deref->deref.deref_type = nir_deref_type_array;
+ deref->deref.child = NULL;
+ deref->deref_array_type = nir_deref_array_type_direct;
+ src_init(&deref->indirect);
+ deref->base_offset = 0;
+ return deref;
+ }
+
+ nir_deref_struct *
+ nir_deref_struct_create(void *mem_ctx, unsigned field_index)
+ {
+ nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct);
+ deref->deref.deref_type = nir_deref_type_struct;
+ deref->deref.child = NULL;
+ deref->index = field_index;
+ return deref;
+ }
+
+ static nir_deref_var *
+ copy_deref_var(void *mem_ctx, nir_deref_var *deref)
+ {
+ nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
+ ret->deref.type = deref->deref.type;
+ if (deref->deref.child)
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+ return ret;
+ }
+
+ static nir_deref_array *
+ copy_deref_array(void *mem_ctx, nir_deref_array *deref)
+ {
+ nir_deref_array *ret = nir_deref_array_create(mem_ctx);
+ ret->base_offset = deref->base_offset;
+ ret->deref_array_type = deref->deref_array_type;
+ if (deref->deref_array_type == nir_deref_array_type_indirect) {
+ nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx);
+ }
+ ret->deref.type = deref->deref.type;
+ if (deref->deref.child)
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+ return ret;
+ }
+
+ static nir_deref_struct *
+ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
+ {
+ nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
+ ret->deref.type = deref->deref.type;
+ if (deref->deref.child)
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+ return ret;
+ }
+
+ nir_deref *
+ nir_copy_deref(void *mem_ctx, nir_deref *deref)
+ {
+ switch (deref->deref_type) {
+ case nir_deref_type_var:
+ return ©_deref_var(mem_ctx, nir_deref_as_var(deref))->deref;
+ case nir_deref_type_array:
+ return ©_deref_array(mem_ctx, nir_deref_as_array(deref))->deref;
+ case nir_deref_type_struct:
+ return ©_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref;
+ default:
+ unreachable("Invalid dereference type");
+ }
+
+ return NULL;
+ }
+
+ /* Returns a load_const instruction that represents the constant
+ * initializer for the given deref chain. The caller is responsible for
+ * ensuring that there actually is a constant initializer.
+ */
+ nir_load_const_instr *
+ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
+ {
+ nir_constant *constant = deref->var->constant_initializer;
+ assert(constant);
+
+ const nir_deref *tail = &deref->deref;
+ unsigned matrix_offset = 0;
+ while (tail->child) {
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(tail->child);
+ assert(arr->deref_array_type == nir_deref_array_type_direct);
+ if (glsl_type_is_matrix(tail->type)) {
+ assert(arr->deref.child == NULL);
+ matrix_offset = arr->base_offset;
+ } else {
+ constant = constant->elements[arr->base_offset];
+ }
+ break;
+ }
+
+ case nir_deref_type_struct: {
+ constant = constant->elements[nir_deref_as_struct(tail->child)->index];
+ break;
+ }
+
+ default:
+ unreachable("Invalid deref child type");
+ }
+
+ tail = tail->child;
+ }
+
+ nir_load_const_instr *load =
+ nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type));
+
+ matrix_offset *= load->def.num_components;
+ for (unsigned i = 0; i < load->def.num_components; i++) {
+ switch (glsl_get_base_type(tail->type)) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ load->value.u[i] = constant->value.u[matrix_offset + i];
+ break;
+ case GLSL_TYPE_BOOL:
+ load->value.u[i] = constant->value.b[matrix_offset + i] ?
+ NIR_TRUE : NIR_FALSE;
+ break;
+ default:
+ unreachable("Invalid immediate type");
+ }
+ }
+
+ return load;
+ }
+
+ nir_function_impl *
+ nir_cf_node_get_function(nir_cf_node *node)
+ {
+ while (node->type != nir_cf_node_function) {
+ node = node->parent;
+ }
+
+ return nir_cf_node_as_function(node);
+ }
+
++/* Reduces a cursor by trying to convert everything to after and trying to
++ * go up to block granularity when possible.
++ */
++static nir_cursor
++reduce_cursor(nir_cursor cursor)
++{
++ switch (cursor.option) {
++ case nir_cursor_before_block:
++ if (exec_list_is_empty(&cursor.block->instr_list)) {
++ /* Empty block. After is as good as before. */
++ cursor.option = nir_cursor_after_block;
++ } else {
++ /* Try to switch to after the previous block if there is one.
++ * (This isn't likely, but it can happen.)
++ */
++ nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node);
++ if (prev_node && prev_node->type == nir_cf_node_block) {
++ cursor.block = nir_cf_node_as_block(prev_node);
++ cursor.option = nir_cursor_after_block;
++ }
++ }
++ return cursor;
++
++ case nir_cursor_after_block:
++ return cursor;
++
++ case nir_cursor_before_instr: {
++ nir_instr *prev_instr = nir_instr_prev(cursor.instr);
++ if (prev_instr) {
++ /* Before this instruction is after the previous */
++ cursor.instr = prev_instr;
++ cursor.option = nir_cursor_after_instr;
++ } else {
++ /* No previous instruction. Switch to before block */
++ cursor.block = cursor.instr->block;
++ cursor.option = nir_cursor_before_block;
++ }
++ return reduce_cursor(cursor);
++ }
++
++ case nir_cursor_after_instr:
++ if (nir_instr_next(cursor.instr) == NULL) {
++ /* This is the last instruction, switch to after block */
++ cursor.option = nir_cursor_after_block;
++ cursor.block = cursor.instr->block;
++ }
++ return cursor;
++
++ default:
++ unreachable("Inavlid cursor option");
++ }
++}
++
++bool
++nir_cursors_equal(nir_cursor a, nir_cursor b)
++{
++ /* Reduced cursors should be unique */
++ a = reduce_cursor(a);
++ b = reduce_cursor(b);
++
++ return a.block == b.block && a.option == b.option;
++}
++
+ static bool
+ add_use_cb(nir_src *src, void *state)
+ {
+ nir_instr *instr = state;
+
+ src->parent_instr = instr;
+ list_addtail(&src->use_link,
+ src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
+
+ return true;
+ }
+
+ static bool
+ add_ssa_def_cb(nir_ssa_def *def, void *state)
+ {
+ nir_instr *instr = state;
+
+ if (instr->block && def->index == UINT_MAX) {
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&instr->block->cf_node);
+
+ def->index = impl->ssa_alloc++;
+ }
+
+ return true;
+ }
+
+ static bool
+ add_reg_def_cb(nir_dest *dest, void *state)
+ {
+ nir_instr *instr = state;
+
+ if (!dest->is_ssa) {
+ dest->reg.parent_instr = instr;
+ list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
+ }
+
+ return true;
+ }
+
+ static void
+ add_defs_uses(nir_instr *instr)
+ {
+ nir_foreach_src(instr, add_use_cb, instr);
+ nir_foreach_dest(instr, add_reg_def_cb, instr);
+ nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
+ }
+
+ void
+ nir_instr_insert(nir_cursor cursor, nir_instr *instr)
+ {
+ switch (cursor.option) {
+ case nir_cursor_before_block:
+ /* Only allow inserting jumps into empty blocks. */
+ if (instr->type == nir_instr_type_jump)
+ assert(exec_list_is_empty(&cursor.block->instr_list));
+
+ instr->block = cursor.block;
+ add_defs_uses(instr);
+ exec_list_push_head(&cursor.block->instr_list, &instr->node);
+ break;
+ case nir_cursor_after_block: {
+ /* Inserting instructions after a jump is illegal. */
+ nir_instr *last = nir_block_last_instr(cursor.block);
+ assert(last == NULL || last->type != nir_instr_type_jump);
+ (void) last;
+
+ instr->block = cursor.block;
+ add_defs_uses(instr);
+ exec_list_push_tail(&cursor.block->instr_list, &instr->node);
+ break;
+ }
+ case nir_cursor_before_instr:
+ assert(instr->type != nir_instr_type_jump);
+ instr->block = cursor.instr->block;
+ add_defs_uses(instr);
+ exec_node_insert_node_before(&cursor.instr->node, &instr->node);
+ break;
+ case nir_cursor_after_instr:
+ /* Inserting instructions after a jump is illegal. */
+ assert(cursor.instr->type != nir_instr_type_jump);
+
+ /* Only allow inserting jumps at the end of the block. */
+ if (instr->type == nir_instr_type_jump)
+ assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
+
+ instr->block = cursor.instr->block;
+ add_defs_uses(instr);
+ exec_node_insert_after(&cursor.instr->node, &instr->node);
+ break;
+ }
+
+ if (instr->type == nir_instr_type_jump)
+ nir_handle_add_jump(instr->block);
+ }
+
+ static bool
+ src_is_valid(const nir_src *src)
+ {
+ return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
+ }
+
+ static bool
+ remove_use_cb(nir_src *src, void *state)
+ {
+ if (src_is_valid(src))
+ list_del(&src->use_link);
+
+ return true;
+ }
+
+ static bool
+ remove_def_cb(nir_dest *dest, void *state)
+ {
+ if (!dest->is_ssa)
+ list_del(&dest->reg.def_link);
+
+ return true;
+ }
+
+ static void
+ remove_defs_uses(nir_instr *instr)
+ {
+ nir_foreach_dest(instr, remove_def_cb, instr);
+ nir_foreach_src(instr, remove_use_cb, instr);
+ }
+
+ void nir_instr_remove(nir_instr *instr)
+ {
+ remove_defs_uses(instr);
+ exec_node_remove(&instr->node);
+
+ if (instr->type == nir_instr_type_jump) {
+ nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+ nir_handle_remove_jump(instr->block, jump_instr->type);
+ }
+ }
+
+ /*@}*/
+
+ void
+ nir_index_local_regs(nir_function_impl *impl)
+ {
+ unsigned index = 0;
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ reg->index = index++;
+ }
+ impl->reg_alloc = index;
+ }
+
+ void
+ nir_index_global_regs(nir_shader *shader)
+ {
+ unsigned index = 0;
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ reg->index = index++;
+ }
+ shader->reg_alloc = index;
+ }
+
+ static bool
+ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
+ {
+ return cb(&instr->dest.dest, state);
+ }
+
+ static bool
+ visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
+ void *state)
+ {
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ return cb(&instr->dest, state);
+
+ return true;
+ }
+
+ static bool
+ visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
+ void *state)
+ {
+ return cb(&instr->dest, state);
+ }
+
+ static bool
+ visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
+ {
+ return cb(&instr->dest, state);
+ }
+
+ static bool
+ visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
+ nir_foreach_dest_cb cb, void *state)
+ {
+ nir_foreach_parallel_copy_entry(instr, entry) {
+ if (!cb(&entry->dest, state))
+ return false;
+ }
+
+ return true;
+ }
+
+ bool
+ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
+ {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
+ case nir_instr_type_intrinsic:
+ return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
+ case nir_instr_type_tex:
+ return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
+ case nir_instr_type_phi:
+ return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
+ case nir_instr_type_parallel_copy:
+ return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
+ cb, state);
+
+ case nir_instr_type_load_const:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ break;
+
+ default:
+ unreachable("Invalid instruction type");
+ break;
+ }
+
+ return true;
+ }
+
+ struct foreach_ssa_def_state {
+ nir_foreach_ssa_def_cb cb;
+ void *client_state;
+ };
+
+ static inline bool
+ nir_ssa_def_visitor(nir_dest *dest, void *void_state)
+ {
+ struct foreach_ssa_def_state *state = void_state;
+
+ if (dest->is_ssa)
+ return state->cb(&dest->ssa, state->client_state);
+ else
+ return true;
+ }
+
+ bool
+ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
+ {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ case nir_instr_type_tex:
+ case nir_instr_type_intrinsic:
+ case nir_instr_type_phi:
+ case nir_instr_type_parallel_copy: {
+ struct foreach_ssa_def_state foreach_state = {cb, state};
+ return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
+ }
+
+ case nir_instr_type_load_const:
+ return cb(&nir_instr_as_load_const(instr)->def, state);
+ case nir_instr_type_ssa_undef:
+ return cb(&nir_instr_as_ssa_undef(instr)->def, state);
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ return true;
+ default:
+ unreachable("Invalid instruction type");
+ }
+ }
+
+ static bool
+ visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
+ {
+ if (!cb(src, state))
+ return false;
+ if (!src->is_ssa && src->reg.indirect)
+ return cb(src->reg.indirect, state);
+ return true;
+ }
+
+ static bool
+ visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb,
+ void *state)
+ {
+ if (deref->deref_array_type == nir_deref_array_type_indirect)
+ return visit_src(&deref->indirect, cb, state);
+ return true;
+ }
+
+ static bool
+ visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state)
+ {
+ nir_deref *cur = &deref->deref;
+ while (cur != NULL) {
+ if (cur->deref_type == nir_deref_type_array)
+ if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state))
+ return false;
+
+ cur = cur->child;
+ }
+
+ return true;
+ }
+
+ static bool
+ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ if (!visit_src(&instr->src[i].src, cb, state))
+ return false;
+
+ return true;
+ }
+
+ static bool
+ visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+ for (unsigned i = 0; i < instr->num_srcs; i++)
+ if (!visit_src(&instr->src[i].src, cb, state))
+ return false;
+
++ if (instr->texture != NULL)
++ if (!visit_deref_src(instr->texture, cb, state))
++ return false;
++
+ if (instr->sampler != NULL)
+ if (!visit_deref_src(instr->sampler, cb, state))
+ return false;
+
+ return true;
+ }
+
+ static bool
+ visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
+ void *state)
+ {
+ unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+ for (unsigned i = 0; i < num_srcs; i++)
+ if (!visit_src(&instr->src[i], cb, state))
+ return false;
+
+ unsigned num_vars =
+ nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++)
+ if (!visit_deref_src(instr->variables[i], cb, state))
+ return false;
+
+ return true;
+ }
+
+ static bool
+ visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+ return true;
+ }
+
+ static bool
+ visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb,
+ void *state)
+ {
+ return true;
+ }
+
+ static bool
+ visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+ nir_foreach_phi_src(instr, src) {
+ if (!visit_src(&src->src, cb, state))
+ return false;
+ }
+
+ return true;
+ }
+
+ static bool
+ visit_parallel_copy_src(nir_parallel_copy_instr *instr,
+ nir_foreach_src_cb cb, void *state)
+ {
+ nir_foreach_parallel_copy_entry(instr, entry) {
+ if (!visit_src(&entry->src, cb, state))
+ return false;
+ }
+
+ return true;
+ }
+
+ typedef struct {
+ void *state;
+ nir_foreach_src_cb cb;
+ } visit_dest_indirect_state;
+
+ static bool
+ visit_dest_indirect(nir_dest *dest, void *_state)
+ {
+ visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
+
+ if (!dest->is_ssa && dest->reg.indirect)
+ return state->cb(dest->reg.indirect, state->state);
+
+ return true;
+ }
+
+ bool
+ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_intrinsic:
+ if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_tex:
+ if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_call:
+ if (!visit_call_src(nir_instr_as_call(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_load_const:
+ if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_phi:
+ if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_parallel_copy:
+ if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
+ cb, state))
+ return false;
+ break;
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ return true;
+
+ default:
+ unreachable("Invalid instruction type");
+ break;
+ }
+
+ visit_dest_indirect_state dest_state;
+ dest_state.state = state;
+ dest_state.cb = cb;
+ return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
+ }
+
+ nir_const_value *
+ nir_src_as_const_value(nir_src src)
+ {
+ if (!src.is_ssa)
+ return NULL;
+
+ if (src.ssa->parent_instr->type != nir_instr_type_load_const)
+ return NULL;
+
+ nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
+
+ return &load->value;
+ }
+
+ /**
+ * Returns true if the source is known to be dynamically uniform. Otherwise it
+ * returns false which means it may or may not be dynamically uniform but it
+ * can't be determined.
+ */
+ bool
+ nir_src_is_dynamically_uniform(nir_src src)
+ {
+ if (!src.is_ssa)
+ return false;
+
+ /* Constants are trivially dynamically uniform */
+ if (src.ssa->parent_instr->type == nir_instr_type_load_const)
+ return true;
+
+ /* As are uniform variables */
+ if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
+
+ if (intr->intrinsic == nir_intrinsic_load_uniform)
+ return true;
+ }
+
+ /* XXX: this could have many more tests, such as when a sampler function is
+ * called with dynamically uniform arguments.
+ */
+ return false;
+ }
+
+ static void
+ src_remove_all_uses(nir_src *src)
+ {
+ for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
+ if (!src_is_valid(src))
+ continue;
+
+ list_del(&src->use_link);
+ }
+ }
+
+ static void
+ src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+ {
+ for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
+ if (!src_is_valid(src))
+ continue;
+
+ if (parent_instr) {
+ src->parent_instr = parent_instr;
+ if (src->is_ssa)
+ list_addtail(&src->use_link, &src->ssa->uses);
+ else
+ list_addtail(&src->use_link, &src->reg.reg->uses);
+ } else {
+ assert(parent_if);
+ src->parent_if = parent_if;
+ if (src->is_ssa)
+ list_addtail(&src->use_link, &src->ssa->if_uses);
+ else
+ list_addtail(&src->use_link, &src->reg.reg->if_uses);
+ }
+ }
+ }
+
+ void
+ nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
+ {
+ assert(!src_is_valid(src) || src->parent_instr == instr);
+
+ src_remove_all_uses(src);
+ *src = new_src;
+ src_add_all_uses(src, instr, NULL);
+ }
+
+ void
+ nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
+ {
+ assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
+
+ src_remove_all_uses(dest);
+ src_remove_all_uses(src);
+ *dest = *src;
+ *src = NIR_SRC_INIT;
+ src_add_all_uses(dest, dest_instr, NULL);
+ }
+
+ void
+ nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
+ {
+ nir_src *src = &if_stmt->condition;
+ assert(!src_is_valid(src) || src->parent_if == if_stmt);
+
+ src_remove_all_uses(src);
+ *src = new_src;
+ src_add_all_uses(src, NULL, if_stmt);
+ }
+
+ void
+ nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
+ {
+ if (dest->is_ssa) {
+ /* We can only overwrite an SSA destination if it has no uses. */
+ assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
+ } else {
+ list_del(&dest->reg.def_link);
+ if (dest->reg.indirect)
+ src_remove_all_uses(dest->reg.indirect);
+ }
+
+ /* We can't re-write with an SSA def */
+ assert(!new_dest.is_ssa);
+
+ nir_dest_copy(dest, &new_dest, instr);
+
+ dest->reg.parent_instr = instr;
+ list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
+
+ if (dest->reg.indirect)
+ src_add_all_uses(dest->reg.indirect, instr, NULL);
+ }
+
+ void
+ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+ unsigned num_components, const char *name)
+ {
+ def->name = name;
+ def->parent_instr = instr;
+ list_inithead(&def->uses);
+ list_inithead(&def->if_uses);
+ def->num_components = num_components;
+
+ if (instr->block) {
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&instr->block->cf_node);
+
+ def->index = impl->ssa_alloc++;
+ } else {
+ def->index = UINT_MAX;
+ }
+ }
+
+ void
+ nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+ unsigned num_components, const char *name)
+ {
+ dest->is_ssa = true;
+ nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+ }
+
+ void
+ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
+ {
+ assert(!new_src.is_ssa || def != new_src.ssa);
+
+ nir_foreach_use_safe(def, use_src)
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+ }
+
+ static bool
+ is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
+ {
+ assert(start->block == end->block);
+
+ if (between->block != start->block)
+ return false;
+
+ /* Search backwards looking for "between" */
+ while (start != end) {
+ if (between == end)
+ return true;
+
+ end = nir_instr_prev(end);
+ assert(end);
+ }
+
+ return false;
+ }
+
+ /* Replaces all uses of the given SSA def with the given source but only if
+ * the use comes after the after_me instruction. This can be useful if you
+ * are emitting code to fix up the result of some instruction: you can freely
+ * use the result in that code and then call rewrite_uses_after and pass the
+ * last fixup instruction as after_me and it will replace all of the uses you
+ * want without touching the fixup code.
+ *
+ * This function assumes that after_me is in the same block as
+ * def->parent_instr and that after_me comes after def->parent_instr.
+ */
+ void
+ nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me)
+ {
+ assert(!new_src.is_ssa || def != new_src.ssa);
+
+ nir_foreach_use_safe(def, use_src) {
+ assert(use_src->parent_instr != def->parent_instr);
+ /* Since def already dominates all of its uses, the only way a use can
+ * not be dominated by after_me is if it is between def and after_me in
+ * the instruction list.
+ */
+ if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+ }
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+ }
+
+ static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ bool reverse, void *state);
+
+ static inline bool
+ foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state)
+ {
+ if (reverse) {
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node,
+ &if_stmt->else_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node,
+ &if_stmt->then_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ } else {
+ foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+
+ foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ static inline bool
+ foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state)
+ {
+ if (reverse) {
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ } else {
+ foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ static bool
+ foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ bool reverse, void *state)
+ {
+ switch (node->type) {
+ case nir_cf_node_block:
+ return cb(nir_cf_node_as_block(node), state);
+ case nir_cf_node_if:
+ return foreach_if(nir_cf_node_as_if(node), cb, reverse, state);
+ case nir_cf_node_loop:
+ return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state);
+ break;
+
+ default:
+ unreachable("Invalid CFG node type");
+ break;
+ }
+
+ return false;
+ }
+
+ bool
+ nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ void *state)
+ {
+ return foreach_cf_node(node, cb, false, state);
+ }
+
+ bool
+ nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
+ {
+ foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+ if (!foreach_cf_node(node, cb, false, state))
+ return false;
+ }
+
+ return cb(impl->end_block, state);
+ }
+
+ bool
+ nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+ void *state)
+ {
+ if (!cb(impl->end_block, state))
+ return false;
+
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) {
+ if (!foreach_cf_node(node, cb, true, state))
+ return false;
+ }
+
+ return true;
+ }
+
+ nir_if *
+ nir_block_get_following_if(nir_block *block)
+ {
+ if (exec_node_is_tail_sentinel(&block->cf_node.node))
+ return NULL;
+
+ if (nir_cf_node_is_last(&block->cf_node))
+ return NULL;
+
+ nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+ if (next_node->type != nir_cf_node_if)
+ return NULL;
+
+ return nir_cf_node_as_if(next_node);
+ }
+
+ nir_loop *
+ nir_block_get_following_loop(nir_block *block)
+ {
+ if (exec_node_is_tail_sentinel(&block->cf_node.node))
+ return NULL;
+
+ if (nir_cf_node_is_last(&block->cf_node))
+ return NULL;
+
+ nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+ if (next_node->type != nir_cf_node_loop)
+ return NULL;
+
+ return nir_cf_node_as_loop(next_node);
+ }
+ static bool
+ index_block(nir_block *block, void *state)
+ {
+ unsigned *index = state;
+ block->index = (*index)++;
+ return true;
+ }
+
+ void
+ nir_index_blocks(nir_function_impl *impl)
+ {
+ unsigned index = 0;
+
+ if (impl->valid_metadata & nir_metadata_block_index)
+ return;
+
+ nir_foreach_block(impl, index_block, &index);
+
+ impl->num_blocks = index;
+ }
+
+ static bool
+ index_ssa_def_cb(nir_ssa_def *def, void *state)
+ {
+ unsigned *index = (unsigned *) state;
+ def->index = (*index)++;
+
+ return true;
+ }
+
+ static bool
+ index_ssa_block(nir_block *block, void *state)
+ {
+ nir_foreach_instr(block, instr)
+ nir_foreach_ssa_def(instr, index_ssa_def_cb, state);
+
+ return true;
+ }
+
+ /**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
+ void
+ nir_index_ssa_defs(nir_function_impl *impl)
+ {
+ unsigned index = 0;
+ nir_foreach_block(impl, index_ssa_block, &index);
+ impl->ssa_alloc = index;
+ }
+
+ static bool
+ index_instrs_block(nir_block *block, void *state)
+ {
+ unsigned *index = state;
+ nir_foreach_instr(block, instr)
+ instr->index = (*index)++;
+
+ return true;
+ }
+
+ /**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
+ unsigned
+ nir_index_instrs(nir_function_impl *impl)
+ {
+ unsigned index = 0;
+ nir_foreach_block(impl, index_instrs_block, &index);
+ return index;
+ }
+
+ nir_intrinsic_op
+ nir_intrinsic_from_system_value(gl_system_value val)
+ {
+ switch (val) {
+ case SYSTEM_VALUE_VERTEX_ID:
+ return nir_intrinsic_load_vertex_id;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ return nir_intrinsic_load_instance_id;
+ case SYSTEM_VALUE_DRAW_ID:
+ return nir_intrinsic_load_draw_id;
+ case SYSTEM_VALUE_BASE_INSTANCE:
+ return nir_intrinsic_load_base_instance;
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ return nir_intrinsic_load_vertex_id_zero_base;
+ case SYSTEM_VALUE_BASE_VERTEX:
+ return nir_intrinsic_load_base_vertex;
+ case SYSTEM_VALUE_INVOCATION_ID:
+ return nir_intrinsic_load_invocation_id;
+ case SYSTEM_VALUE_FRONT_FACE:
+ return nir_intrinsic_load_front_face;
+ case SYSTEM_VALUE_SAMPLE_ID:
+ return nir_intrinsic_load_sample_id;
+ case SYSTEM_VALUE_SAMPLE_POS:
+ return nir_intrinsic_load_sample_pos;
+ case SYSTEM_VALUE_SAMPLE_MASK_IN:
+ return nir_intrinsic_load_sample_mask_in;
+ case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+ return nir_intrinsic_load_local_invocation_id;
+ case SYSTEM_VALUE_WORK_GROUP_ID:
+ return nir_intrinsic_load_work_group_id;
+ case SYSTEM_VALUE_NUM_WORK_GROUPS:
+ return nir_intrinsic_load_num_work_groups;
+ case SYSTEM_VALUE_PRIMITIVE_ID:
+ return nir_intrinsic_load_primitive_id;
+ case SYSTEM_VALUE_TESS_COORD:
+ return nir_intrinsic_load_tess_coord;
+ case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+ return nir_intrinsic_load_tess_level_outer;
+ case SYSTEM_VALUE_TESS_LEVEL_INNER:
+ return nir_intrinsic_load_tess_level_inner;
+ case SYSTEM_VALUE_VERTICES_IN:
+ return nir_intrinsic_load_patch_vertices_in;
+ case SYSTEM_VALUE_HELPER_INVOCATION:
+ return nir_intrinsic_load_helper_invocation;
+ default:
+ unreachable("system value does not directly correspond to intrinsic");
+ }
+ }
+
+ gl_system_value
+ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
+ {
+ switch (intrin) {
+ case nir_intrinsic_load_vertex_id:
+ return SYSTEM_VALUE_VERTEX_ID;
+ case nir_intrinsic_load_instance_id:
+ return SYSTEM_VALUE_INSTANCE_ID;
+ case nir_intrinsic_load_draw_id:
+ return SYSTEM_VALUE_DRAW_ID;
+ case nir_intrinsic_load_base_instance:
+ return SYSTEM_VALUE_BASE_INSTANCE;
+ case nir_intrinsic_load_vertex_id_zero_base:
+ return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+ case nir_intrinsic_load_base_vertex:
+ return SYSTEM_VALUE_BASE_VERTEX;
+ case nir_intrinsic_load_invocation_id:
+ return SYSTEM_VALUE_INVOCATION_ID;
+ case nir_intrinsic_load_front_face:
+ return SYSTEM_VALUE_FRONT_FACE;
+ case nir_intrinsic_load_sample_id:
+ return SYSTEM_VALUE_SAMPLE_ID;
+ case nir_intrinsic_load_sample_pos:
+ return SYSTEM_VALUE_SAMPLE_POS;
+ case nir_intrinsic_load_sample_mask_in:
+ return SYSTEM_VALUE_SAMPLE_MASK_IN;
+ case nir_intrinsic_load_local_invocation_id:
+ return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+ case nir_intrinsic_load_num_work_groups:
+ return SYSTEM_VALUE_NUM_WORK_GROUPS;
+ case nir_intrinsic_load_work_group_id:
+ return SYSTEM_VALUE_WORK_GROUP_ID;
+ case nir_intrinsic_load_primitive_id:
+ return SYSTEM_VALUE_PRIMITIVE_ID;
+ case nir_intrinsic_load_tess_coord:
+ return SYSTEM_VALUE_TESS_COORD;
+ case nir_intrinsic_load_tess_level_outer:
+ return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+ case nir_intrinsic_load_tess_level_inner:
+ return SYSTEM_VALUE_TESS_LEVEL_INNER;
+ case nir_intrinsic_load_patch_vertices_in:
+ return SYSTEM_VALUE_VERTICES_IN;
+ case nir_intrinsic_load_helper_invocation:
+ return SYSTEM_VALUE_HELPER_INVOCATION;
+ default:
+ unreachable("intrinsic doesn't produce a system value");
+ }
+ }
--- /dev/null
-#define NIR_SRC_INIT (nir_src) { { NULL } }
+ /*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #pragma once
+
+ #include "util/hash_table.h"
+ #include "compiler/glsl/list.h"
+ #include "GL/gl.h" /* GLenum */
+ #include "util/list.h"
+ #include "util/ralloc.h"
+ #include "util/set.h"
+ #include "util/bitset.h"
+ #include "compiler/nir_types.h"
+ #include "compiler/shader_enums.h"
+ #include <stdio.h>
+
+ #include "nir_opcodes.h"
+
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+
+ struct gl_program;
+ struct gl_shader_program;
+
+ #define NIR_FALSE 0u
+ #define NIR_TRUE (~0u)
+
+ /** Defines a cast function
+ *
+ * This macro defines a cast function from in_type to out_type where
+ * out_type is some structure type that contains a field of type out_type.
+ *
+ * Note that you have to be a bit careful as the generated cast function
+ * destroys constness.
+ */
+ #define NIR_DEFINE_CAST(name, in_type, out_type, field) \
+ static inline out_type * \
+ name(const in_type *parent) \
+ { \
+ return exec_node_data(out_type, parent, field); \
+ }
+
+ struct nir_function;
+ struct nir_shader;
+ struct nir_instr;
+
+
+ /**
+ * Description of built-in state associated with a uniform
+ *
+ * \sa nir_variable::state_slots
+ */
+ typedef struct {
+ int tokens[5];
+ int swizzle;
+ } nir_state_slot;
+
+ typedef enum {
+ nir_var_all = -1,
+ nir_var_shader_in,
+ nir_var_shader_out,
+ nir_var_global,
+ nir_var_local,
+ nir_var_uniform,
+ nir_var_shader_storage,
++ nir_var_shared,
+ nir_var_system_value
+ } nir_variable_mode;
+
+ /**
+ * Data stored in an nir_constant
+ */
+ union nir_constant_data {
+ unsigned u[16];
+ int i[16];
+ float f[16];
+ bool b[16];
+ };
+
+ typedef struct nir_constant {
+ /**
+ * Value of the constant.
+ *
+ * The field used to back the values supplied by the constant is determined
+ * by the type associated with the \c nir_variable. Constants may be
+ * scalars, vectors, or matrices.
+ */
+ union nir_constant_data value;
+
+ /* we could get this from the var->type but makes clone *much* easier to
+ * not have to care about the type.
+ */
+ unsigned num_elements;
+
+ /* Array elements / Structure Fields */
+ struct nir_constant **elements;
+ } nir_constant;
+
+ /**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
+ * with a layout qualifier.
+ */
+ typedef enum {
+ nir_depth_layout_none, /**< No depth layout is specified. */
+ nir_depth_layout_any,
+ nir_depth_layout_greater,
+ nir_depth_layout_less,
+ nir_depth_layout_unchanged
+ } nir_depth_layout;
+
+ /**
+ * Either a uniform, global variable, shader input, or shader output. Based on
+ * ir_variable - it should be easy to translate between the two.
+ */
+
+ typedef struct nir_variable {
+ struct exec_node node;
+
+ /**
+ * Declared type of the variable
+ */
+ const struct glsl_type *type;
+
+ /**
+ * Declared name of the variable
+ */
+ char *name;
+
+ struct nir_variable_data {
+
+ /**
+ * Is the variable read-only?
+ *
+ * This is set for variables declared as \c const, shader inputs,
+ * and uniforms.
+ */
+ unsigned read_only:1;
+ unsigned centroid:1;
+ unsigned sample:1;
+ unsigned patch:1;
+ unsigned invariant:1;
+
+ /**
+ * Storage class of the variable.
+ *
+ * \sa nir_variable_mode
+ */
+ nir_variable_mode mode:4;
+
+ /**
+ * Interpolation mode for shader inputs / outputs
+ *
+ * \sa glsl_interp_qualifier
+ */
+ unsigned interpolation:2;
+
+ /**
+ * \name ARB_fragment_coord_conventions
+ * @{
+ */
+ unsigned origin_upper_left:1;
+ unsigned pixel_center_integer:1;
+ /*@}*/
+
+ /**
+ * Was the location explicitly set in the shader?
+ *
+ * If the location is explicitly set in the shader, it \b cannot be changed
+ * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+ * no effect).
+ */
+ unsigned explicit_location:1;
+ unsigned explicit_index:1;
+
+ /**
+ * Was an initial binding explicitly set in the shader?
+ *
+ * If so, constant_initializer contains an integer nir_constant
+ * representing the initial binding point.
+ */
+ unsigned explicit_binding:1;
+
+ /**
+ * Does this variable have an initializer?
+ *
+ * This is used by the linker to cross-validiate initializers of global
+ * variables.
+ */
+ unsigned has_initializer:1;
+
+ /**
+ * If non-zero, then this variable may be packed along with other variables
+ * into a single varying slot, so this offset should be applied when
+ * accessing components. For example, an offset of 1 means that the x
+ * component of this variable is actually stored in component y of the
+ * location specified by \c location.
+ */
+ unsigned location_frac:2;
+
+ /**
+ * Non-zero if this variable was created by lowering a named interface
+ * block which was not an array.
+ *
+ * Note that this variable and \c from_named_ifc_block_array will never
+ * both be non-zero.
+ */
+ unsigned from_named_ifc_block_nonarray:1;
+
+ /**
+ * Non-zero if this variable was created by lowering a named interface
+ * block which was an array.
+ *
+ * Note that this variable and \c from_named_ifc_block_nonarray will never
+ * both be non-zero.
+ */
+ unsigned from_named_ifc_block_array:1;
+
+ /**
+ * \brief Layout qualifier for gl_FragDepth.
+ *
+ * This is not equal to \c ir_depth_layout_none if and only if this
+ * variable is \c gl_FragDepth and a layout qualifier is specified.
+ */
+ nir_depth_layout depth_layout;
+
+ /**
+ * Storage location of the base of this variable
+ *
+ * The precise meaning of this field depends on the nature of the variable.
+ *
+ * - Vertex shader input: one of the values from \c gl_vert_attrib.
+ * - Vertex shader output: one of the values from \c gl_varying_slot.
+ * - Geometry shader input: one of the values from \c gl_varying_slot.
+ * - Geometry shader output: one of the values from \c gl_varying_slot.
+ * - Fragment shader input: one of the values from \c gl_varying_slot.
+ * - Fragment shader output: one of the values from \c gl_frag_result.
+ * - Uniforms: Per-stage uniform slot number for default uniform block.
+ * - Uniforms: Index within the uniform block definition for UBO members.
+ * - Non-UBO Uniforms: uniform slot number.
+ * - Other: This field is not currently used.
+ *
+ * If the variable is a uniform, shader input, or shader output, and the
+ * slot has not been assigned, the value will be -1.
+ */
+ int location;
+
+ /**
+ * The actual location of the variable in the IR. Only valid for inputs
+ * and outputs.
+ */
+ unsigned int driver_location;
+
+ /**
+ * output index for dual source blending.
+ */
+ int index;
+
++ /**
++ * Descriptor set binding for sampler or UBO.
++ */
++ int descriptor_set;
++
+ /**
+ * Initial binding point for a sampler or UBO.
+ *
+ * For array types, this represents the binding point for the first element.
+ */
+ int binding;
+
+ /**
+ * Location an atomic counter is stored at.
+ */
+ unsigned offset;
+
+ /**
+ * ARB_shader_image_load_store qualifiers.
+ */
+ struct {
+ bool read_only; /**< "readonly" qualifier. */
+ bool write_only; /**< "writeonly" qualifier. */
+ bool coherent;
+ bool _volatile;
+ bool restrict_flag;
+
+ /** Image internal format if specified explicitly, otherwise GL_NONE. */
+ GLenum format;
+ } image;
+
+ /**
+ * Highest element accessed with a constant expression array index
+ *
+ * Not used for non-array variables.
+ */
+ unsigned max_array_access;
+
+ } data;
+
+ /**
+ * Built-in state that backs this uniform
+ *
+ * Once set at variable creation, \c state_slots must remain invariant.
+ * This is because, ideally, this array would be shared by all clones of
+ * this variable in the IR tree. In other words, we'd really like for it
+ * to be a fly-weight.
+ *
+ * If the variable is not a uniform, \c num_state_slots will be zero and
+ * \c state_slots will be \c NULL.
+ */
+ /*@{*/
+ unsigned num_state_slots; /**< Number of state slots used */
+ nir_state_slot *state_slots; /**< State descriptors. */
+ /*@}*/
+
+ /**
+ * Constant expression assigned in the initializer of the variable
+ */
+ nir_constant *constant_initializer;
+
+ /**
+ * For variables that are in an interface block or are an instance of an
+ * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+ *
+ * \sa ir_variable::location
+ */
+ const struct glsl_type *interface_type;
+ } nir_variable;
+
+ #define nir_foreach_variable(var, var_list) \
+ foreach_list_typed(nir_variable, var, node, var_list)
+
++/**
++ * Returns the bits in the inputs_read, outputs_written, or
++ * system_values_read bitfield corresponding to this variable.
++ */
++static inline uint64_t
++nir_variable_get_io_mask(nir_variable *var, gl_shader_stage stage)
++{
++ assert(var->data.mode == nir_var_shader_in ||
++ var->data.mode == nir_var_shader_out ||
++ var->data.mode == nir_var_system_value);
++ assert(var->data.location >= 0);
++
++ const struct glsl_type *var_type = var->type;
++ if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) {
++ /* Most geometry shader inputs are per-vertex arrays */
++ if (var->data.location >= VARYING_SLOT_VAR0)
++ assert(glsl_type_is_array(var_type));
++
++ if (glsl_type_is_array(var_type))
++ var_type = glsl_get_array_element(var_type);
++ }
++
++ bool is_vertex_input = (var->data.mode == nir_var_shader_in &&
++ stage == MESA_SHADER_VERTEX);
++ unsigned slots = glsl_count_attribute_slots(var_type, is_vertex_input);
++ return ((1ull << slots) - 1) << var->data.location;
++}
++
+ typedef struct nir_register {
+ struct exec_node node;
+
+ unsigned num_components; /** < number of vector components */
+ unsigned num_array_elems; /** < size of array (0 for no array) */
+
+ /** generic register index. */
+ unsigned index;
+
+ /** only for debug purposes, can be NULL */
+ const char *name;
+
+ /** whether this register is local (per-function) or global (per-shader) */
+ bool is_global;
+
+ /**
+ * If this flag is set to true, then accessing channels >= num_components
+ * is well-defined, and simply spills over to the next array element. This
+ * is useful for backends that can do per-component accessing, in
+ * particular scalar backends. By setting this flag and making
+ * num_components equal to 1, structures can be packed tightly into
+ * registers and then registers can be accessed per-component to get to
+ * each structure member, even if it crosses vec4 boundaries.
+ */
+ bool is_packed;
+
+ /** set of nir_src's where this register is used (read from) */
+ struct list_head uses;
+
+ /** set of nir_dest's where this register is defined (written to) */
+ struct list_head defs;
+
+ /** set of nir_if's where this register is used as a condition */
+ struct list_head if_uses;
+ } nir_register;
+
+ typedef enum {
+ nir_instr_type_alu,
+ nir_instr_type_call,
+ nir_instr_type_tex,
+ nir_instr_type_intrinsic,
+ nir_instr_type_load_const,
+ nir_instr_type_jump,
+ nir_instr_type_ssa_undef,
+ nir_instr_type_phi,
+ nir_instr_type_parallel_copy,
+ } nir_instr_type;
+
+ typedef struct nir_instr {
+ struct exec_node node;
+ nir_instr_type type;
+ struct nir_block *block;
+
+ /** generic instruction index. */
+ unsigned index;
+
+ /* A temporary for optimization and analysis passes to use for storing
+ * flags. For instance, DCE uses this to store the "dead/live" info.
+ */
+ uint8_t pass_flags;
+ } nir_instr;
+
+ static inline nir_instr *
+ nir_instr_next(nir_instr *instr)
+ {
+ struct exec_node *next = exec_node_get_next(&instr->node);
+ if (exec_node_is_tail_sentinel(next))
+ return NULL;
+ else
+ return exec_node_data(nir_instr, next, node);
+ }
+
+ static inline nir_instr *
+ nir_instr_prev(nir_instr *instr)
+ {
+ struct exec_node *prev = exec_node_get_prev(&instr->node);
+ if (exec_node_is_head_sentinel(prev))
+ return NULL;
+ else
+ return exec_node_data(nir_instr, prev, node);
+ }
+
+ static inline bool
+ nir_instr_is_first(nir_instr *instr)
+ {
+ return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node));
+ }
+
+ static inline bool
+ nir_instr_is_last(nir_instr *instr)
+ {
+ return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node));
+ }
+
+ typedef struct nir_ssa_def {
+ /** for debugging only, can be NULL */
+ const char* name;
+
+ /** generic SSA definition index. */
+ unsigned index;
+
+ /** Index into the live_in and live_out bitfields */
+ unsigned live_index;
+
+ nir_instr *parent_instr;
+
+ /** set of nir_instr's where this register is used (read from) */
+ struct list_head uses;
+
+ /** set of nir_if's where this register is used as a condition */
+ struct list_head if_uses;
+
+ uint8_t num_components;
+ } nir_ssa_def;
+
+ struct nir_src;
+
+ typedef struct {
+ nir_register *reg;
+ struct nir_src *indirect; /** < NULL for no indirect offset */
+ unsigned base_offset;
+
+ /* TODO use-def chain goes here */
+ } nir_reg_src;
+
+ typedef struct {
+ nir_instr *parent_instr;
+ struct list_head def_link;
+
+ nir_register *reg;
+ struct nir_src *indirect; /** < NULL for no indirect offset */
+ unsigned base_offset;
+
+ /* TODO def-use chain goes here */
+ } nir_reg_dest;
+
+ struct nir_if;
+
+ typedef struct nir_src {
+ union {
+ nir_instr *parent_instr;
+ struct nir_if *parent_if;
+ };
+
+ struct list_head use_link;
+
+ union {
+ nir_reg_src reg;
+ nir_ssa_def *ssa;
+ };
+
+ bool is_ssa;
+ } nir_src;
+
-#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
++#ifdef __cplusplus
++# define NIR_SRC_INIT nir_src()
++#else
++# define NIR_SRC_INIT (nir_src) { { NULL } }
++#endif
+
+ #define nir_foreach_use(reg_or_ssa_def, src) \
+ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
+
+ #define nir_foreach_use_safe(reg_or_ssa_def, src) \
+ list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
+
+ #define nir_foreach_if_use(reg_or_ssa_def, src) \
+ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
+
+ #define nir_foreach_if_use_safe(reg_or_ssa_def, src) \
+ list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
+
+ typedef struct {
+ union {
+ nir_reg_dest reg;
+ nir_ssa_def ssa;
+ };
+
+ bool is_ssa;
+ } nir_dest;
+
- /** The size of the sampler array or 0 if it's not an array */
- unsigned sampler_array_size;
-
- nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */
++#ifdef __cplusplus
++# define NIR_DEST_INIT nir_dest()
++#else
++# define NIR_DEST_INIT (nir_dest) { { { NULL } } }
++#endif
+
+ #define nir_foreach_def(reg, dest) \
+ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
+
+ #define nir_foreach_def_safe(reg, dest) \
+ list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link)
+
+ static inline nir_src
+ nir_src_for_ssa(nir_ssa_def *def)
+ {
+ nir_src src = NIR_SRC_INIT;
+
+ src.is_ssa = true;
+ src.ssa = def;
+
+ return src;
+ }
+
+ static inline nir_src
+ nir_src_for_reg(nir_register *reg)
+ {
+ nir_src src = NIR_SRC_INIT;
+
+ src.is_ssa = false;
+ src.reg.reg = reg;
+ src.reg.indirect = NULL;
+ src.reg.base_offset = 0;
+
+ return src;
+ }
+
+ static inline nir_dest
+ nir_dest_for_reg(nir_register *reg)
+ {
+ nir_dest dest = NIR_DEST_INIT;
+
+ dest.reg.reg = reg;
+
+ return dest;
+ }
+
+ void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
+
+ typedef struct {
+ nir_src src;
+
+ /**
+ * \name input modifiers
+ */
+ /*@{*/
+ /**
+ * For inputs interpreted as floating point, flips the sign bit. For
+ * inputs interpreted as integers, performs the two's complement negation.
+ */
+ bool negate;
+
+ /**
+ * Clears the sign bit for floating point values, and computes the integer
+ * absolute value for integers. Note that the negate modifier acts after
+ * the absolute value modifier, therefore if both are set then all inputs
+ * will become negative.
+ */
+ bool abs;
+ /*@}*/
+
+ /**
+ * For each input component, says which component of the register it is
+ * chosen from. Note that which elements of the swizzle are used and which
+ * are ignored are based on the write mask for most opcodes - for example,
+ * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
+ * a swizzle of {2, x, 1, 0} where x means "don't care."
+ */
+ uint8_t swizzle[4];
+ } nir_alu_src;
+
+ typedef struct {
+ nir_dest dest;
+
+ /**
+ * \name saturate output modifier
+ *
+ * Only valid for opcodes that output floating-point numbers. Clamps the
+ * output to between 0.0 and 1.0 inclusive.
+ */
+
+ bool saturate;
+
+ unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
+ } nir_alu_dest;
+
+ typedef enum {
+ nir_type_invalid = 0, /* Not a valid type */
+ nir_type_float,
+ nir_type_int,
+ nir_type_uint,
+ nir_type_bool
+ } nir_alu_type;
+
+ typedef enum {
+ NIR_OP_IS_COMMUTATIVE = (1 << 0),
+ NIR_OP_IS_ASSOCIATIVE = (1 << 1),
+ } nir_op_algebraic_property;
+
+ typedef struct {
+ const char *name;
+
+ unsigned num_inputs;
+
+ /**
+ * The number of components in the output
+ *
+ * If non-zero, this is the size of the output and input sizes are
+ * explicitly given; swizzle and writemask are still in effect, but if
+ * the output component is masked out, then the input component may
+ * still be in use.
+ *
+ * If zero, the opcode acts in the standard, per-component manner; the
+ * operation is performed on each component (except the ones that are
+ * masked out) with the input being taken from the input swizzle for
+ * that component.
+ *
+ * The size of some of the inputs may be given (i.e. non-zero) even
+ * though output_size is zero; in that case, the inputs with a zero
+ * size act per-component, while the inputs with non-zero size don't.
+ */
+ unsigned output_size;
+
+ /**
+ * The type of vector that the instruction outputs. Note that the
+ * staurate modifier is only allowed on outputs with the float type.
+ */
+
+ nir_alu_type output_type;
+
+ /**
+ * The number of components in each input
+ */
+ unsigned input_sizes[4];
+
+ /**
+ * The type of vector that each input takes. Note that negate and
+ * absolute value are only allowed on inputs with int or float type and
+ * behave differently on the two.
+ */
+ nir_alu_type input_types[4];
+
+ nir_op_algebraic_property algebraic_properties;
+ } nir_op_info;
+
+ extern const nir_op_info nir_op_infos[nir_num_opcodes];
+
+ typedef struct nir_alu_instr {
+ nir_instr instr;
+ nir_op op;
+ nir_alu_dest dest;
+ nir_alu_src src[];
+ } nir_alu_instr;
+
+ void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+ nir_alu_instr *instr);
+ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+ nir_alu_instr *instr);
+
+ /* is this source channel used? */
+ static inline bool
+ nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
+ {
+ if (nir_op_infos[instr->op].input_sizes[src] > 0)
+ return channel < nir_op_infos[instr->op].input_sizes[src];
+
+ return (instr->dest.write_mask >> channel) & 1;
+ }
+
+ /*
+ * For instructions whose destinations are SSA, get the number of channels
+ * used for a source
+ */
+ static inline unsigned
+ nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
+ {
+ assert(instr->dest.dest.is_ssa);
+
+ if (nir_op_infos[instr->op].input_sizes[src] > 0)
+ return nir_op_infos[instr->op].input_sizes[src];
+
+ return instr->dest.dest.ssa.num_components;
+ }
+
+ typedef enum {
+ nir_deref_type_var,
+ nir_deref_type_array,
+ nir_deref_type_struct
+ } nir_deref_type;
+
+ typedef struct nir_deref {
+ nir_deref_type deref_type;
+ struct nir_deref *child;
+ const struct glsl_type *type;
+ } nir_deref;
+
+ typedef struct {
+ nir_deref deref;
+
+ nir_variable *var;
+ } nir_deref_var;
+
+ /* This enum describes how the array is referenced. If the deref is
+ * direct then the base_offset is used. If the deref is indirect then then
+ * offset is given by base_offset + indirect. If the deref is a wildcard
+ * then the deref refers to all of the elements of the array at the same
+ * time. Wildcard dereferences are only ever allowed in copy_var
+ * intrinsics and the source and destination derefs must have matching
+ * wildcards.
+ */
+ typedef enum {
+ nir_deref_array_type_direct,
+ nir_deref_array_type_indirect,
+ nir_deref_array_type_wildcard,
+ } nir_deref_array_type;
+
+ typedef struct {
+ nir_deref deref;
+
+ nir_deref_array_type deref_array_type;
+ unsigned base_offset;
+ nir_src indirect;
+ } nir_deref_array;
+
+ typedef struct {
+ nir_deref deref;
+
+ unsigned index;
+ } nir_deref_struct;
+
+ NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref)
+ NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
+ NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
+
+ /* Returns the last deref in the chain. */
+ static inline nir_deref *
+ nir_deref_tail(nir_deref *deref)
+ {
+ while (deref->child)
+ deref = deref->child;
+ return deref;
+ }
+
+ typedef struct {
+ nir_instr instr;
+
+ unsigned num_params;
+ nir_deref_var **params;
+ nir_deref_var *return_deref;
+
+ struct nir_function *callee;
+ } nir_call_instr;
+
+ #define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
+ num_variables, num_indices, flags) \
+ nir_intrinsic_##name,
+
+ #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
+
+ typedef enum {
+ #include "nir_intrinsics.h"
+ nir_num_intrinsics = nir_last_intrinsic + 1
+ } nir_intrinsic_op;
+
+ #undef INTRINSIC
+ #undef LAST_INTRINSIC
+
+ /** Represents an intrinsic
+ *
+ * An intrinsic is an instruction type for handling things that are
+ * more-or-less regular operations but don't just consume and produce SSA
+ * values like ALU operations do. Intrinsics are not for things that have
+ * special semantic meaning such as phi nodes and parallel copies.
+ * Examples of intrinsics include variable load/store operations, system
+ * value loads, and the like. Even though texturing more-or-less falls
+ * under this category, texturing is its own instruction type because
+ * trying to represent texturing with intrinsics would lead to a
+ * combinatorial explosion of intrinsic opcodes.
+ *
+ * By having a single instruction type for handling a lot of different
+ * cases, optimization passes can look for intrinsics and, for the most
+ * part, completely ignore them. Each intrinsic type also has a few
+ * possible flags that govern whether or not they can be reordered or
+ * eliminated. That way passes like dead code elimination can still work
+ * on intrisics without understanding the meaning of each.
+ *
+ * Each intrinsic has some number of constant indices, some number of
+ * variables, and some number of sources. What these sources, variables,
+ * and indices mean depends on the intrinsic and is documented with the
+ * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture
+ * instructions are the only types of instruction that can operate on
+ * variables.
+ */
+ typedef struct {
+ nir_instr instr;
+
+ nir_intrinsic_op intrinsic;
+
+ nir_dest dest;
+
+ /** number of components if this is a vectorized intrinsic
+ *
+ * Similarly to ALU operations, some intrinsics are vectorized.
+ * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
+ * For vectorized intrinsics, the num_components field specifies the
+ * number of destination components and the number of source components
+ * for all sources with nir_intrinsic_infos.src_components[i] == 0.
+ */
+ uint8_t num_components;
+
+ int const_index[3];
+
+ nir_deref_var *variables[2];
+
+ nir_src src[];
+ } nir_intrinsic_instr;
+
+ /**
+ * \name NIR intrinsics semantic flags
+ *
+ * information about what the compiler can do with the intrinsics.
+ *
+ * \sa nir_intrinsic_info::flags
+ */
+ typedef enum {
+ /**
+ * whether the intrinsic can be safely eliminated if none of its output
+ * value is not being used.
+ */
+ NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
+
+ /**
+ * Whether the intrinsic can be reordered with respect to any other
+ * intrinsic, i.e. whether the only reordering dependencies of the
+ * intrinsic are due to the register reads/writes.
+ */
+ NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+ } nir_intrinsic_semantic_flag;
+
+ #define NIR_INTRINSIC_MAX_INPUTS 4
+
+ typedef struct {
+ const char *name;
+
+ unsigned num_srcs; /** < number of register/SSA inputs */
+
+ /** number of components of each input register
+ *
+ * If this value is 0, the number of components is given by the
+ * num_components field of nir_intrinsic_instr.
+ */
+ unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
+
+ bool has_dest;
+
+ /** number of components of the output register
+ *
+ * If this value is 0, the number of components is given by the
+ * num_components field of nir_intrinsic_instr.
+ */
+ unsigned dest_components;
+
+ /** the number of inputs/outputs that are variables */
+ unsigned num_variables;
+
+ /** the number of constant indices used by the intrinsic */
+ unsigned num_indices;
+
+ /** semantic flags for calls to this intrinsic */
+ nir_intrinsic_semantic_flag flags;
+ } nir_intrinsic_info;
+
+ extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
+
+ /**
+ * \group texture information
+ *
+ * This gives semantic information about textures which is useful to the
+ * frontend, the backend, and lowering passes, but not the optimizer.
+ */
+
+ typedef enum {
+ nir_tex_src_coord,
+ nir_tex_src_projector,
+ nir_tex_src_comparitor, /* shadow comparitor */
+ nir_tex_src_offset,
+ nir_tex_src_bias,
+ nir_tex_src_lod,
+ nir_tex_src_ms_index, /* MSAA sample index */
+ nir_tex_src_ddx,
+ nir_tex_src_ddy,
++ nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
+ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
+ nir_num_tex_src_types
+ } nir_tex_src_type;
+
+ typedef struct {
+ nir_src src;
+ nir_tex_src_type src_type;
+ } nir_tex_src;
+
+ typedef enum {
+ nir_texop_tex, /**< Regular texture look-up */
+ nir_texop_txb, /**< Texture look-up with LOD bias */
+ nir_texop_txl, /**< Texture look-up with explicit LOD */
+ nir_texop_txd, /**< Texture look-up with partial derivatvies */
+ nir_texop_txf, /**< Texel fetch with explicit LOD */
+ nir_texop_txf_ms, /**< Multisample texture fetch */
+ nir_texop_txs, /**< Texture size */
+ nir_texop_lod, /**< Texture lod query */
+ nir_texop_tg4, /**< Texture gather */
+ nir_texop_query_levels, /**< Texture levels query */
+ nir_texop_texture_samples, /**< Texture samples query */
+ nir_texop_samples_identical, /**< Query whether all samples are definitely
+ * identical.
+ */
+ } nir_texop;
+
+ typedef struct {
+ nir_instr instr;
+
+ enum glsl_sampler_dim sampler_dim;
+ nir_alu_type dest_type;
+
+ nir_texop op;
+ nir_dest dest;
+ nir_tex_src *src;
+ unsigned num_srcs, coord_components;
+ bool is_array, is_shadow;
+
+ /**
+ * If is_shadow is true, whether this is the old-style shadow that outputs 4
+ * components or the new-style shadow that outputs 1 component.
+ */
+ bool is_new_style_shadow;
+
+ /* constant offset - must be 0 if the offset source is used */
+ int const_offset[4];
+
+ /* gather component selector */
+ unsigned component : 2;
+
++ /** The texture index
++ *
++ * If this texture instruction has a nir_tex_src_texture_offset source,
++ * then the texture index is given by texture_index + texture_offset.
++ */
++ unsigned texture_index;
++
++ /** The size of the texture array or 0 if it's not an array */
++ unsigned texture_array_size;
++
++ /** The texture deref
++ *
++ * If both this and `sampler` are both NULL, use texture_index instead.
++ * If `texture` is NULL, but `sampler` is non-NULL, then the texture is
++ * implied from the sampler.
++ */
++ nir_deref_var *texture;
++
+ /** The sampler index
+ *
+ * If this texture instruction has a nir_tex_src_sampler_offset source,
+ * then the sampler index is given by sampler_index + sampler_offset.
+ */
+ unsigned sampler_index;
+
- unsigned num_inputs, num_uniforms, num_outputs;
++ /** The sampler deref
++ *
++ * If this is null, use sampler_index instead.
++ */
++ nir_deref_var *sampler;
+ } nir_tex_instr;
+
+ static inline unsigned
+ nir_tex_instr_dest_size(nir_tex_instr *instr)
+ {
+ switch (instr->op) {
+ case nir_texop_txs: {
+ unsigned ret;
+ switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ case GLSL_SAMPLER_DIM_BUF:
+ ret = 1;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ case GLSL_SAMPLER_DIM_MS:
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ ret = 2;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ ret = 3;
+ break;
+ default:
+ unreachable("not reached");
+ }
+ if (instr->is_array)
+ ret++;
+ return ret;
+ }
+
+ case nir_texop_lod:
+ return 2;
+
+ case nir_texop_texture_samples:
+ case nir_texop_query_levels:
+ case nir_texop_samples_identical:
+ return 1;
+
+ default:
+ if (instr->is_shadow && instr->is_new_style_shadow)
+ return 1;
+
+ return 4;
+ }
+ }
+
+ /* Returns true if this texture operation queries something about the texture
+ * rather than actually sampling it.
+ */
+ static inline bool
+ nir_tex_instr_is_query(nir_tex_instr *instr)
+ {
+ switch (instr->op) {
+ case nir_texop_txs:
+ case nir_texop_lod:
+ case nir_texop_texture_samples:
+ case nir_texop_query_levels:
+ return true;
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txl:
+ case nir_texop_txd:
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_tg4:
+ return false;
+ default:
+ unreachable("Invalid texture opcode");
+ }
+ }
+
+ static inline unsigned
+ nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
+ {
+ if (instr->src[src].src_type == nir_tex_src_coord)
+ return instr->coord_components;
+
+
+ if (instr->src[src].src_type == nir_tex_src_offset ||
+ instr->src[src].src_type == nir_tex_src_ddx ||
+ instr->src[src].src_type == nir_tex_src_ddy) {
+ if (instr->is_array)
+ return instr->coord_components - 1;
+ else
+ return instr->coord_components;
+ }
+
+ return 1;
+ }
+
+ static inline int
+ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
+ {
+ for (unsigned i = 0; i < instr->num_srcs; i++)
+ if (instr->src[i].src_type == type)
+ return (int) i;
+
+ return -1;
+ }
+
+ typedef struct {
+ union {
+ float f[4];
+ int32_t i[4];
+ uint32_t u[4];
+ };
+ } nir_const_value;
+
+ typedef struct {
+ nir_instr instr;
+
+ nir_const_value value;
+
+ nir_ssa_def def;
+ } nir_load_const_instr;
+
+ typedef enum {
+ nir_jump_return,
+ nir_jump_break,
+ nir_jump_continue,
+ } nir_jump_type;
+
+ typedef struct {
+ nir_instr instr;
+ nir_jump_type type;
+ } nir_jump_instr;
+
+ /* creates a new SSA variable in an undefined state */
+
+ typedef struct {
+ nir_instr instr;
+ nir_ssa_def def;
+ } nir_ssa_undef_instr;
+
+ typedef struct {
+ struct exec_node node;
+
+ /* The predecessor block corresponding to this source */
+ struct nir_block *pred;
+
+ nir_src src;
+ } nir_phi_src;
+
+ #define nir_foreach_phi_src(phi, entry) \
+ foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs)
+ #define nir_foreach_phi_src_safe(phi, entry) \
+ foreach_list_typed_safe(nir_phi_src, entry, node, &(phi)->srcs)
+
+ typedef struct {
+ nir_instr instr;
+
+ struct exec_list srcs; /** < list of nir_phi_src */
+
+ nir_dest dest;
+ } nir_phi_instr;
+
+ typedef struct {
+ struct exec_node node;
+ nir_src src;
+ nir_dest dest;
+ } nir_parallel_copy_entry;
+
+ #define nir_foreach_parallel_copy_entry(pcopy, entry) \
+ foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
+
+ typedef struct {
+ nir_instr instr;
+
+ /* A list of nir_parallel_copy_entry's. The sources of all of the
+ * entries are copied to the corresponding destinations "in parallel".
+ * In other words, if we have two entries: a -> b and b -> a, the values
+ * get swapped.
+ */
+ struct exec_list entries;
+ } nir_parallel_copy_instr;
+
+ NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
+ nir_parallel_copy_instr, instr)
+
+ /*
+ * Control flow
+ *
+ * Control flow consists of a tree of control flow nodes, which include
+ * if-statements and loops. The leaves of the tree are basic blocks, lists of
+ * instructions that always run start-to-finish. Each basic block also keeps
+ * track of its successors (blocks which may run immediately after the current
+ * block) and predecessors (blocks which could have run immediately before the
+ * current block). Each function also has a start block and an end block which
+ * all return statements point to (which is always empty). Together, all the
+ * blocks with their predecessors and successors make up the control flow
+ * graph (CFG) of the function. There are helpers that modify the tree of
+ * control flow nodes while modifying the CFG appropriately; these should be
+ * used instead of modifying the tree directly.
+ */
+
+ typedef enum {
+ nir_cf_node_block,
+ nir_cf_node_if,
+ nir_cf_node_loop,
+ nir_cf_node_function
+ } nir_cf_node_type;
+
+ typedef struct nir_cf_node {
+ struct exec_node node;
+ nir_cf_node_type type;
+ struct nir_cf_node *parent;
+ } nir_cf_node;
+
+ typedef struct nir_block {
+ nir_cf_node cf_node;
+
+ struct exec_list instr_list; /** < list of nir_instr */
+
+ /** generic block index; generated by nir_index_blocks */
+ unsigned index;
+
+ /*
+ * Each block can only have up to 2 successors, so we put them in a simple
+ * array - no need for anything more complicated.
+ */
+ struct nir_block *successors[2];
+
+ /* Set of nir_block predecessors in the CFG */
+ struct set *predecessors;
+
+ /*
+ * this node's immediate dominator in the dominance tree - set to NULL for
+ * the start block.
+ */
+ struct nir_block *imm_dom;
+
+ /* This node's children in the dominance tree */
+ unsigned num_dom_children;
+ struct nir_block **dom_children;
+
+ /* Set of nir_block's on the dominance frontier of this block */
+ struct set *dom_frontier;
+
+ /*
+ * These two indices have the property that dom_{pre,post}_index for each
+ * child of this block in the dominance tree will always be between
+ * dom_pre_index and dom_post_index for this block, which makes testing if
+ * a given block is dominated by another block an O(1) operation.
+ */
+ unsigned dom_pre_index, dom_post_index;
+
+ /* live in and out for this block; used for liveness analysis */
+ BITSET_WORD *live_in;
+ BITSET_WORD *live_out;
+ } nir_block;
+
+ static inline nir_instr *
+ nir_block_first_instr(nir_block *block)
+ {
+ struct exec_node *head = exec_list_get_head(&block->instr_list);
+ return exec_node_data(nir_instr, head, node);
+ }
+
+ static inline nir_instr *
+ nir_block_last_instr(nir_block *block)
+ {
+ struct exec_node *tail = exec_list_get_tail(&block->instr_list);
+ return exec_node_data(nir_instr, tail, node);
+ }
+
+ #define nir_foreach_instr(block, instr) \
+ foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
+ #define nir_foreach_instr_reverse(block, instr) \
+ foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
+ #define nir_foreach_instr_safe(block, instr) \
+ foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
+ #define nir_foreach_instr_reverse_safe(block, instr) \
+ foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
+
+ typedef struct nir_if {
+ nir_cf_node cf_node;
+ nir_src condition;
+
+ struct exec_list then_list; /** < list of nir_cf_node */
+ struct exec_list else_list; /** < list of nir_cf_node */
+ } nir_if;
+
+ static inline nir_cf_node *
+ nir_if_first_then_node(nir_if *if_stmt)
+ {
+ struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
+ return exec_node_data(nir_cf_node, head, node);
+ }
+
+ static inline nir_cf_node *
+ nir_if_last_then_node(nir_if *if_stmt)
+ {
+ struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
+ return exec_node_data(nir_cf_node, tail, node);
+ }
+
+ static inline nir_cf_node *
+ nir_if_first_else_node(nir_if *if_stmt)
+ {
+ struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
+ return exec_node_data(nir_cf_node, head, node);
+ }
+
+ static inline nir_cf_node *
+ nir_if_last_else_node(nir_if *if_stmt)
+ {
+ struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
+ return exec_node_data(nir_cf_node, tail, node);
+ }
+
+ typedef struct {
+ nir_cf_node cf_node;
+
+ struct exec_list body; /** < list of nir_cf_node */
+ } nir_loop;
+
+ static inline nir_cf_node *
+ nir_loop_first_cf_node(nir_loop *loop)
+ {
+ return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node);
+ }
+
+ static inline nir_cf_node *
+ nir_loop_last_cf_node(nir_loop *loop)
+ {
+ return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node);
+ }
+
+ /**
+ * Various bits of metadata that can may be created or required by
+ * optimization and analysis passes
+ */
+ typedef enum {
+ nir_metadata_none = 0x0,
+ nir_metadata_block_index = 0x1,
+ nir_metadata_dominance = 0x2,
+ nir_metadata_live_ssa_defs = 0x4,
+ nir_metadata_not_properly_reset = 0x8,
+ } nir_metadata;
+
+ typedef struct {
+ nir_cf_node cf_node;
+
+ /** pointer to the function of which this is an implementation */
+ struct nir_function *function;
+
+ struct exec_list body; /** < list of nir_cf_node */
+
+ nir_block *end_block;
+
+ /** list for all local variables in the function */
+ struct exec_list locals;
+
+ /** array of variables used as parameters */
+ unsigned num_params;
+ nir_variable **params;
+
+ /** variable used to hold the result of the function */
+ nir_variable *return_var;
+
+ /** list of local registers in the function */
+ struct exec_list registers;
+
+ /** next available local register index */
+ unsigned reg_alloc;
+
+ /** next available SSA value index */
+ unsigned ssa_alloc;
+
+ /* total number of basic blocks, only valid when block_index_dirty = false */
+ unsigned num_blocks;
+
+ nir_metadata valid_metadata;
+ } nir_function_impl;
+
+ static inline nir_block *
+ nir_start_block(nir_function_impl *impl)
+ {
+ return (nir_block *) exec_list_get_head(&impl->body);
+ }
+
+ static inline nir_cf_node *
+ nir_cf_node_next(nir_cf_node *node)
+ {
+ struct exec_node *next = exec_node_get_next(&node->node);
+ if (exec_node_is_tail_sentinel(next))
+ return NULL;
+ else
+ return exec_node_data(nir_cf_node, next, node);
+ }
+
+ static inline nir_cf_node *
+ nir_cf_node_prev(nir_cf_node *node)
+ {
+ struct exec_node *prev = exec_node_get_prev(&node->node);
+ if (exec_node_is_head_sentinel(prev))
+ return NULL;
+ else
+ return exec_node_data(nir_cf_node, prev, node);
+ }
+
+ static inline bool
+ nir_cf_node_is_first(const nir_cf_node *node)
+ {
+ return exec_node_is_head_sentinel(node->node.prev);
+ }
+
+ static inline bool
+ nir_cf_node_is_last(const nir_cf_node *node)
+ {
+ return exec_node_is_tail_sentinel(node->node.next);
+ }
+
+ NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node)
+ NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node)
+ NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node)
+ NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node)
+
+ typedef enum {
+ nir_parameter_in,
+ nir_parameter_out,
+ nir_parameter_inout,
+ } nir_parameter_type;
+
+ typedef struct {
+ nir_parameter_type param_type;
+ const struct glsl_type *type;
+ } nir_parameter;
+
+ typedef struct nir_function {
+ struct exec_node node;
+
+ const char *name;
+ struct nir_shader *shader;
+
+ unsigned num_params;
+ nir_parameter *params;
+ const struct glsl_type *return_type;
+
+ /** The implementation of this function.
+ *
+ * If the function is only declared and not implemented, this is NULL.
+ */
+ nir_function_impl *impl;
+ } nir_function;
+
+ typedef struct nir_shader_compiler_options {
+ bool lower_fdiv;
+ bool lower_ffma;
+ bool lower_flrp;
+ bool lower_fpow;
+ bool lower_fsat;
+ bool lower_fsqrt;
+ bool lower_fmod;
+ bool lower_bitfield_extract;
+ bool lower_bitfield_insert;
+ bool lower_uadd_carry;
+ bool lower_usub_borrow;
+ /** lowers fneg and ineg to fsub and isub. */
+ bool lower_negate;
+ /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+ bool lower_sub;
+
+ /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+ bool lower_scmp;
+
+ /* Does the native fdot instruction replicate its result for four
+ * components? If so, then opt_algebraic_late will turn all fdotN
+ * instructions into fdot_replicatedN instructions.
+ */
+ bool fdot_replicates;
+
+ /** lowers ffract to fsub+ffloor: */
+ bool lower_ffract;
+
++ bool lower_pack_half_2x16;
++ bool lower_pack_unorm_2x16;
++ bool lower_pack_snorm_2x16;
++ bool lower_pack_unorm_4x8;
++ bool lower_pack_snorm_4x8;
++ bool lower_unpack_half_2x16;
++ bool lower_unpack_unorm_2x16;
++ bool lower_unpack_snorm_2x16;
++ bool lower_unpack_unorm_4x8;
++ bool lower_unpack_snorm_4x8;
++
++ bool lower_extract_byte;
++ bool lower_extract_word;
++
+ /**
+ * Does the driver support real 32-bit integers? (Otherwise, integers
+ * are simulated by floats.)
+ */
+ bool native_integers;
++
++ /* Indicates that the driver only has zero-based vertex id */
++ bool vertex_id_zero_based;
+ } nir_shader_compiler_options;
+
+ typedef struct nir_shader_info {
+ const char *name;
+
+ /* Descriptive name provided by the client; may be NULL */
+ const char *label;
+
+ /* Number of textures used by this shader */
+ unsigned num_textures;
+ /* Number of uniform buffers used by this shader */
+ unsigned num_ubos;
+ /* Number of atomic buffers used by this shader */
+ unsigned num_abos;
+ /* Number of shader storage buffers used by this shader */
+ unsigned num_ssbos;
+ /* Number of images used by this shader */
+ unsigned num_images;
+
+ /* Which inputs are actually read */
+ uint64_t inputs_read;
+ /* Which outputs are actually written */
+ uint64_t outputs_written;
+ /* Which system values are actually read */
+ uint64_t system_values_read;
+
+ /* Which patch inputs are actually read */
+ uint32_t patch_inputs_read;
+ /* Which patch outputs are actually written */
+ uint32_t patch_outputs_written;
+
+ /* Whether or not this shader ever uses textureGather() */
+ bool uses_texture_gather;
+
+ /* Whether or not this shader uses the gl_ClipDistance output */
+ bool uses_clip_distance_out;
+
+ /* Whether or not separate shader objects were used */
+ bool separate_shader;
+
+ /** Was this shader linked with any transform feedback varyings? */
+ bool has_transform_feedback_varyings;
+
+ union {
+ struct {
+ /** The number of vertices recieves per input primitive */
+ unsigned vertices_in;
+
+ /** The output primitive type (GL enum value) */
+ unsigned output_primitive;
+
+ /** The maximum number of vertices the geometry shader might write. */
+ unsigned vertices_out;
+
+ /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+ unsigned invocations;
+
+ /** Whether or not this shader uses EndPrimitive */
+ bool uses_end_primitive;
+
+ /** Whether or not this shader uses non-zero streams */
+ bool uses_streams;
+ } gs;
+
+ struct {
+ bool uses_discard;
+
+ /**
+ * Whether early fragment tests are enabled as defined by
+ * ARB_shader_image_load_store.
+ */
+ bool early_fragment_tests;
+
+ /** gl_FragDepth layout for ARB_conservative_depth. */
+ enum gl_frag_depth_layout depth_layout;
+ } fs;
+
+ struct {
+ unsigned local_size[3];
+ } cs;
+
+ struct {
+ /** The number of vertices in the TCS output patch. */
+ unsigned vertices_out;
+ } tcs;
+ };
+ } nir_shader_info;
+
+ typedef struct nir_shader {
+ /** list of uniforms (nir_variable) */
+ struct exec_list uniforms;
+
+ /** list of inputs (nir_variable) */
+ struct exec_list inputs;
+
+ /** list of outputs (nir_variable) */
+ struct exec_list outputs;
+
++ /** list of shared compute variables (nir_variable) */
++ struct exec_list shared;
++
+ /** Set of driver-specific options for the shader.
+ *
+ * The memory for the options is expected to be kept in a single static
+ * copy by the driver.
+ */
+ const struct nir_shader_compiler_options *options;
+
+ /** Various bits of compile-time information about a given shader */
+ struct nir_shader_info info;
+
+ /** list of global variables in the shader (nir_variable) */
+ struct exec_list globals;
+
+ /** list of system value variables in the shader (nir_variable) */
+ struct exec_list system_values;
+
+ struct exec_list functions; /** < list of nir_function */
+
+ /** list of global register in the shader */
+ struct exec_list registers;
+
+ /** next available global register index */
+ unsigned reg_alloc;
+
+ /**
+ * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
+ * access plus one
+ */
-nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
++ unsigned num_inputs, num_uniforms, num_outputs, num_shared;
+
+ /** The shader stage, such as MESA_SHADER_VERTEX. */
+ gl_shader_stage stage;
+ } nir_shader;
+
+ #define nir_foreach_function(shader, func) \
+ foreach_list_typed(nir_function, func, node, &(shader)->functions)
+
+ nir_shader *nir_shader_create(void *mem_ctx,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options);
+
+ /** creates a register, including assigning it an index and adding it to the list */
+ nir_register *nir_global_reg_create(nir_shader *shader);
+
+ nir_register *nir_local_reg_create(nir_function_impl *impl);
+
+ void nir_reg_remove(nir_register *reg);
+
+ /** Adds a variable to the appropreate list in nir_shader */
+ void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
+
+ static inline void
+ nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
+ {
+ assert(var->data.mode == nir_var_local);
+ exec_list_push_tail(&impl->locals, &var->node);
+ }
+
+ /** creates a variable, sets a few defaults, and adds it to the list */
+ nir_variable *nir_variable_create(nir_shader *shader,
+ nir_variable_mode mode,
+ const struct glsl_type *type,
+ const char *name);
+ /** creates a local variable and adds it to the list */
+ nir_variable *nir_local_variable_create(nir_function_impl *impl,
+ const struct glsl_type *type,
+ const char *name);
+
+ /** creates a function and adds it to the shader's list of functions */
+ nir_function *nir_function_create(nir_shader *shader, const char *name);
+
+ nir_function_impl *nir_function_impl_create(nir_function *func);
++/** creates a function_impl that isn't tied to any particular function */
++nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
+
+ nir_block *nir_block_create(nir_shader *shader);
+ nir_if *nir_if_create(nir_shader *shader);
+ nir_loop *nir_loop_create(nir_shader *shader);
+
+ nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
+
+ /** requests that the given pieces of metadata be generated */
+ void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
+ /** dirties all but the preserved metadata */
+ void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
+
+ /** creates an instruction with default swizzle/writemask/etc. with NULL registers */
+ nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
+
+ nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
+
+ nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
+ unsigned num_components);
+
+ nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
+ nir_intrinsic_op op);
+
+ nir_call_instr *nir_call_instr_create(nir_shader *shader,
+ nir_function *callee);
+
+ nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
+
+ nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
+
+ nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
+
+ nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
+ unsigned num_components);
+
+ nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
+ nir_deref_array *nir_deref_array_create(void *mem_ctx);
+ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);
+
+ nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
+
+ nir_load_const_instr *
+ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
+
+ /**
+ * NIR Cursors and Instruction Insertion API
+ * @{
+ *
+ * A tiny struct representing a point to insert/extract instructions or
+ * control flow nodes. Helps reduce the combinatorial explosion of possible
+ * points to insert/extract.
+ *
+ * \sa nir_control_flow.h
+ */
+ typedef enum {
+ nir_cursor_before_block,
+ nir_cursor_after_block,
+ nir_cursor_before_instr,
+ nir_cursor_after_instr,
+ } nir_cursor_option;
+
+ typedef struct {
+ nir_cursor_option option;
+ union {
+ nir_block *block;
+ nir_instr *instr;
+ };
+ } nir_cursor;
+
++static inline nir_block *
++nir_cursor_current_block(nir_cursor cursor)
++{
++ if (cursor.option == nir_cursor_before_instr ||
++ cursor.option == nir_cursor_after_instr) {
++ return cursor.instr->block;
++ } else {
++ return cursor.block;
++ }
++}
++
++bool nir_cursors_equal(nir_cursor a, nir_cursor b);
++
+ static inline nir_cursor
+ nir_before_block(nir_block *block)
+ {
+ nir_cursor cursor;
+ cursor.option = nir_cursor_before_block;
+ cursor.block = block;
+ return cursor;
+ }
+
+ static inline nir_cursor
+ nir_after_block(nir_block *block)
+ {
+ nir_cursor cursor;
+ cursor.option = nir_cursor_after_block;
+ cursor.block = block;
+ return cursor;
+ }
+
+ static inline nir_cursor
+ nir_before_instr(nir_instr *instr)
+ {
+ nir_cursor cursor;
+ cursor.option = nir_cursor_before_instr;
+ cursor.instr = instr;
+ return cursor;
+ }
+
+ static inline nir_cursor
+ nir_after_instr(nir_instr *instr)
+ {
+ nir_cursor cursor;
+ cursor.option = nir_cursor_after_instr;
+ cursor.instr = instr;
+ return cursor;
+ }
+
+ static inline nir_cursor
+ nir_after_block_before_jump(nir_block *block)
+ {
+ nir_instr *last_instr = nir_block_last_instr(block);
+ if (last_instr && last_instr->type == nir_instr_type_jump) {
+ return nir_before_instr(last_instr);
+ } else {
+ return nir_after_block(block);
+ }
+ }
+
+ static inline nir_cursor
+ nir_before_cf_node(nir_cf_node *node)
+ {
+ if (node->type == nir_cf_node_block)
+ return nir_before_block(nir_cf_node_as_block(node));
+
+ return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
+ }
+
+ static inline nir_cursor
+ nir_after_cf_node(nir_cf_node *node)
+ {
+ if (node->type == nir_cf_node_block)
+ return nir_after_block(nir_cf_node_as_block(node));
+
+ return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
+ }
+
++static inline nir_cursor
++nir_after_cf_node_and_phis(nir_cf_node *node)
++{
++ if (node->type == nir_cf_node_block)
++ return nir_after_block(nir_cf_node_as_block(node));
++
++ nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
++ assert(block->cf_node.type == nir_cf_node_block);
++
++ nir_foreach_instr(block, instr) {
++ if (instr->type != nir_instr_type_phi)
++ return nir_before_instr(instr);
++ }
++ return nir_after_block(block);
++}
++
+ static inline nir_cursor
+ nir_before_cf_list(struct exec_list *cf_list)
+ {
+ nir_cf_node *first_node = exec_node_data(nir_cf_node,
+ exec_list_get_head(cf_list), node);
+ return nir_before_cf_node(first_node);
+ }
+
+ static inline nir_cursor
+ nir_after_cf_list(struct exec_list *cf_list)
+ {
+ nir_cf_node *last_node = exec_node_data(nir_cf_node,
+ exec_list_get_tail(cf_list), node);
+ return nir_after_cf_node(last_node);
+ }
+
+ /**
+ * Insert a NIR instruction at the given cursor.
+ *
+ * Note: This does not update the cursor.
+ */
+ void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
+
+ static inline void
+ nir_instr_insert_before(nir_instr *instr, nir_instr *before)
+ {
+ nir_instr_insert(nir_before_instr(instr), before);
+ }
+
+ static inline void
+ nir_instr_insert_after(nir_instr *instr, nir_instr *after)
+ {
+ nir_instr_insert(nir_after_instr(instr), after);
+ }
+
+ static inline void
+ nir_instr_insert_before_block(nir_block *block, nir_instr *before)
+ {
+ nir_instr_insert(nir_before_block(block), before);
+ }
+
+ static inline void
+ nir_instr_insert_after_block(nir_block *block, nir_instr *after)
+ {
+ nir_instr_insert(nir_after_block(block), after);
+ }
+
+ static inline void
+ nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
+ {
+ nir_instr_insert(nir_before_cf_node(node), before);
+ }
+
+ static inline void
+ nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
+ {
+ nir_instr_insert(nir_after_cf_node(node), after);
+ }
+
+ static inline void
+ nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
+ {
+ nir_instr_insert(nir_before_cf_list(list), before);
+ }
+
+ static inline void
+ nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
+ {
+ nir_instr_insert(nir_after_cf_list(list), after);
+ }
+
+ void nir_instr_remove(nir_instr *instr);
+
+ /** @} */
+
+ typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
+ typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
+ typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
+ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
+ void *state);
+ bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
+ bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
+
+ nir_const_value *nir_src_as_const_value(nir_src src);
+ bool nir_src_is_dynamically_uniform(nir_src src);
+ bool nir_srcs_equal(nir_src src1, nir_src src2);
+ void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
+ void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
+ void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+ nir_dest new_dest);
+
+ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+ unsigned num_components, const char *name);
+ void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+ unsigned num_components, const char *name);
+ void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
+ void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me);
+
+ /* visits basic blocks in source-code order */
+ typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
+ bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb,
+ void *state);
+ bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+ void *state);
+ bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ void *state);
+
+ /* If the following CF node is an if, this function returns that if.
+ * Otherwise, it returns NULL.
+ */
+ nir_if *nir_block_get_following_if(nir_block *block);
+
+ nir_loop *nir_block_get_following_loop(nir_block *block);
+
+ void nir_index_local_regs(nir_function_impl *impl);
+ void nir_index_global_regs(nir_shader *shader);
+ void nir_index_ssa_defs(nir_function_impl *impl);
+ unsigned nir_index_instrs(nir_function_impl *impl);
+
+ void nir_index_blocks(nir_function_impl *impl);
+
+ void nir_print_shader(nir_shader *shader, FILE *fp);
+ void nir_print_instr(const nir_instr *instr, FILE *fp);
+
-void nir_lower_outputs_to_temporaries(nir_shader *shader);
++nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
++nir_function_impl *nir_function_impl_clone(const nir_function_impl *impl);
++nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
+
+ #ifdef DEBUG
+ void nir_validate_shader(nir_shader *shader);
+ void nir_metadata_set_validation_flag(nir_shader *shader);
+ void nir_metadata_check_validation_flag(nir_shader *shader);
+
+ #include "util/debug.h"
+ static inline bool
+ should_clone_nir(void)
+ {
+ static int should_clone = -1;
+ if (should_clone < 0)
+ should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
+
+ return should_clone;
+ }
+ #else
+ static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+ static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
+ static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
+ static inline bool should_clone_nir(void) { return false; }
+ #endif /* DEBUG */
+
+ #define _PASS(nir, do_pass) do { \
+ do_pass \
+ nir_validate_shader(nir); \
+ if (should_clone_nir()) { \
+ nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
+ ralloc_free(nir); \
+ nir = clone; \
+ } \
+ } while (0)
+
+ #define NIR_PASS(progress, nir, pass, ...) _PASS(nir, \
+ nir_metadata_set_validation_flag(nir); \
+ if (pass(nir, ##__VA_ARGS__)) { \
+ progress = true; \
+ nir_metadata_check_validation_flag(nir); \
+ } \
+ )
+
+ #define NIR_PASS_V(nir, pass, ...) _PASS(nir, \
+ pass(nir, ##__VA_ARGS__); \
+ )
+
+ void nir_calc_dominance_impl(nir_function_impl *impl);
+ void nir_calc_dominance(nir_shader *shader);
+
+ nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
+ bool nir_block_dominates(nir_block *parent, nir_block *child);
+
+ void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
+ void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
+
+ void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
+ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
+
+ void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
+ void nir_dump_cfg(nir_shader *shader, FILE *fp);
+
+ int nir_gs_count_vertices(const nir_shader *shader);
+
+ bool nir_split_var_copies(nir_shader *shader);
+
++bool nir_lower_returns_impl(nir_function_impl *impl);
++bool nir_lower_returns(nir_shader *shader);
++
++bool nir_inline_functions(nir_shader *shader);
++
+ void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
+ void nir_lower_var_copies(nir_shader *shader);
+
+ bool nir_lower_global_vars_to_local(nir_shader *shader);
+
++bool nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask);
++
+ bool nir_lower_locals_to_regs(nir_shader *shader);
+
-bool nir_remove_dead_variables(nir_shader *shader);
++void nir_lower_outputs_to_temporaries(nir_shader *shader,
++ nir_function *entrypoint);
++
++void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
+
+ void nir_assign_var_locations(struct exec_list *var_list,
+ unsigned *size,
+ int (*type_size)(const struct glsl_type *));
+
+ void nir_lower_io(nir_shader *shader,
+ nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *));
+ nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
+ nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
+
+ void nir_lower_vars_to_ssa(nir_shader *shader);
+
++bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode);
+
+ void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+ bool nir_lower_vec_to_movs(nir_shader *shader);
+ void nir_lower_alu_to_scalar(nir_shader *shader);
+ void nir_lower_load_const_to_scalar(nir_shader *shader);
+
+ void nir_lower_phis_to_scalar(nir_shader *shader);
+
+ void nir_lower_samplers(nir_shader *shader,
+ const struct gl_shader_program *shader_program);
+
+ bool nir_lower_system_values(nir_shader *shader);
+
+ typedef struct nir_lower_tex_options {
+ /**
+ * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+ * sampler types a texture projector is lowered.
+ */
+ unsigned lower_txp;
+
+ /**
+ * If true, lower rect textures to 2D, using txs to fetch the
+ * texture dimensions and dividing the texture coords by the
+ * texture dims to normalize.
+ */
+ bool lower_rect;
+
+ /**
+ * To emulate certain texture wrap modes, this can be used
+ * to saturate the specified tex coord to [0.0, 1.0]. The
+ * bits are according to sampler #, ie. if, for example:
+ *
+ * (conf->saturate_s & (1 << n))
+ *
+ * is true, then the s coord for sampler n is saturated.
+ *
+ * Note that clamping must happen *after* projector lowering
+ * so any projected texture sample instruction with a clamped
+ * coordinate gets automatically lowered, regardless of the
+ * 'lower_txp' setting.
+ */
+ unsigned saturate_s;
+ unsigned saturate_t;
+ unsigned saturate_r;
+
+ /* Bitmask of samplers that need swizzling.
+ *
+ * If (swizzle_result & (1 << sampler_index)), then the swizzle in
+ * swizzles[sampler_index] is applied to the result of the texturing
+ * operation.
+ */
+ unsigned swizzle_result;
+
+ /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles
+ * while 4 and 5 represent 0 and 1 respectively.
+ */
+ uint8_t swizzles[32][4];
+ } nir_lower_tex_options;
+
+ bool nir_lower_tex(nir_shader *shader,
+ const nir_lower_tex_options *options);
+
+ void nir_lower_idiv(nir_shader *shader);
+
+ void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+
+ void nir_lower_two_sided_color(nir_shader *shader);
+
+ void nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program);
+ void nir_lower_to_source_mods(nir_shader *shader);
+
+ bool nir_lower_gs_intrinsics(nir_shader *shader);
+
+ bool nir_normalize_cubemap_coords(nir_shader *shader);
+
+ void nir_live_ssa_defs_impl(nir_function_impl *impl);
+ bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
+
+ void nir_convert_to_ssa_impl(nir_function_impl *impl);
+ void nir_convert_to_ssa(nir_shader *shader);
++
++bool nir_repair_ssa_impl(nir_function_impl *impl);
++bool nir_repair_ssa(nir_shader *shader);
+
+ /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
+ * registers. If false, convert all values (even those not involved in a phi
+ * node) to registers.
+ */
+ void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
+
+ bool nir_opt_algebraic(nir_shader *shader);
+ bool nir_opt_algebraic_late(nir_shader *shader);
+ bool nir_opt_constant_folding(nir_shader *shader);
+
+ bool nir_opt_global_to_local(nir_shader *shader);
+
+ bool nir_copy_prop(nir_shader *shader);
+
+ bool nir_opt_cse(nir_shader *shader);
+
+ bool nir_opt_dce(nir_shader *shader);
+
+ bool nir_opt_dead_cf(nir_shader *shader);
+
+ void nir_opt_gcm(nir_shader *shader);
+
+ bool nir_opt_peephole_select(nir_shader *shader);
+
+ bool nir_opt_remove_phis(nir_shader *shader);
+
+ bool nir_opt_undef(nir_shader *shader);
+
+ void nir_sweep(nir_shader *shader);
+
+ nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+ gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+
+ #ifdef __cplusplus
+ } /* extern "C" */
+ #endif
--- /dev/null
- return hex(struct.unpack('I', struct.pack('i', self.value))[0])
+ #! /usr/bin/env python
+ #
+ # Copyright (C) 2014 Intel Corporation
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ #
+ # Authors:
+ # Jason Ekstrand (jason@jlekstrand.net)
+
+ import itertools
+ import struct
+ import sys
+ import mako.template
+ import re
+
+ # Represents a set of variables, each with a unique id
+ class VarSet(object):
+ def __init__(self):
+ self.names = {}
+ self.ids = itertools.count()
+ self.immutable = False;
+
+ def __getitem__(self, name):
+ if name not in self.names:
+ assert not self.immutable, "Unknown replacement variable: " + name
+ self.names[name] = self.ids.next()
+
+ return self.names[name]
+
+ def lock(self):
+ self.immutable = True
+
+ class Value(object):
+ @staticmethod
+ def create(val, name_base, varset):
+ if isinstance(val, tuple):
+ return Expression(val, name_base, varset)
+ elif isinstance(val, Expression):
+ return val
+ elif isinstance(val, (str, unicode)):
+ return Variable(val, name_base, varset)
+ elif isinstance(val, (bool, int, long, float)):
+ return Constant(val, name_base)
+
+ __template = mako.template.Template("""
+ static const ${val.c_type} ${val.name} = {
+ { ${val.type_enum} },
+ % if isinstance(val, Constant):
+ { ${hex(val)} /* ${val.value} */ },
+ % elif isinstance(val, Variable):
+ ${val.index}, /* ${val.var_name} */
+ ${'true' if val.is_constant else 'false'},
+ nir_type_${ val.required_type or 'invalid' },
+ % elif isinstance(val, Expression):
+ nir_op_${val.opcode},
+ { ${', '.join(src.c_ptr for src in val.sources)} },
+ % endif
+ };""")
+
+ def __init__(self, name, type_str):
+ self.name = name
+ self.type_str = type_str
+
+ @property
+ def type_enum(self):
+ return "nir_search_value_" + self.type_str
+
+ @property
+ def c_type(self):
+ return "nir_search_" + self.type_str
+
+ @property
+ def c_ptr(self):
+ return "&{0}.value".format(self.name)
+
+ def render(self):
+ return self.__template.render(val=self,
+ Constant=Constant,
+ Variable=Variable,
+ Expression=Expression)
+
+ class Constant(Value):
+ def __init__(self, val, name):
+ Value.__init__(self, name, "constant")
+ self.value = val
+
+ def __hex__(self):
+ # Even if it's an integer, we still need to unpack as an unsigned
+ # int. This is because, without C99, we can only assign to the first
+ # element of a union in an initializer.
+ if isinstance(self.value, (bool)):
+ return 'NIR_TRUE' if self.value else 'NIR_FALSE'
+ if isinstance(self.value, (int, long)):
++ return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 'I', self.value))[0])
+ elif isinstance(self.value, float):
+ return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+ else:
+ assert False
+
+ _var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
+
+ class Variable(Value):
+ def __init__(self, val, name, varset):
+ Value.__init__(self, name, "variable")
+
+ m = _var_name_re.match(val)
+ assert m and m.group('name') is not None
+
+ self.var_name = m.group('name')
+ self.is_constant = m.group('const') is not None
+ self.required_type = m.group('type')
+
+ if self.required_type is not None:
+ assert self.required_type in ('float', 'bool', 'int', 'unsigned')
+
+ self.index = varset[self.var_name]
+
+ class Expression(Value):
+ def __init__(self, expr, name_base, varset):
+ Value.__init__(self, name_base, "expression")
+ assert isinstance(expr, tuple)
+
+ self.opcode = expr[0]
+ self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
+ for (i, src) in enumerate(expr[1:]) ]
+
+ def render(self):
+ srcs = "\n".join(src.render() for src in self.sources)
+ return srcs + super(Expression, self).render()
+
+ _optimization_ids = itertools.count()
+
+ condition_list = ['true']
+
+ class SearchAndReplace(object):
+ def __init__(self, transform):
+ self.id = _optimization_ids.next()
+
+ search = transform[0]
+ replace = transform[1]
+ if len(transform) > 2:
+ self.condition = transform[2]
+ else:
+ self.condition = 'true'
+
+ if self.condition not in condition_list:
+ condition_list.append(self.condition)
+ self.condition_index = condition_list.index(self.condition)
+
+ varset = VarSet()
+ if isinstance(search, Expression):
+ self.search = search
+ else:
+ self.search = Expression(search, "search{0}".format(self.id), varset)
+
+ varset.lock()
+
+ if isinstance(replace, Value):
+ self.replace = replace
+ else:
+ self.replace = Value.create(replace, "replace{0}".format(self.id), varset)
+
+ _algebraic_pass_template = mako.template.Template("""
+ #include "nir.h"
+ #include "nir_search.h"
+
+ #ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+ #define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+
+ struct transform {
+ const nir_search_expression *search;
+ const nir_search_value *replace;
+ unsigned condition_offset;
+ };
+
+ struct opt_state {
+ void *mem_ctx;
+ bool progress;
+ const bool *condition_flags;
+ };
+
+ #endif
+
+ % for (opcode, xform_list) in xform_dict.iteritems():
+ % for xform in xform_list:
+ ${xform.search.render()}
+ ${xform.replace.render()}
+ % endfor
+
+ static const struct transform ${pass_name}_${opcode}_xforms[] = {
+ % for xform in xform_list:
+ { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
+ % endfor
+ };
+ % endfor
+
+ static bool
+ ${pass_name}_block(nir_block *block, void *void_state)
+ {
+ struct opt_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ if (!alu->dest.dest.is_ssa)
+ continue;
+
+ switch (alu->op) {
+ % for opcode in xform_dict.keys():
+ case nir_op_${opcode}:
+ for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
+ const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
+ if (state->condition_flags[xform->condition_offset] &&
+ nir_replace_instr(alu, xform->search, xform->replace,
+ state->mem_ctx)) {
+ state->progress = true;
+ break;
+ }
+ }
+ break;
+ % endfor
+ default:
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ static bool
+ ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
+ {
+ struct opt_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.progress = false;
+ state.condition_flags = condition_flags;
+
+ nir_foreach_block(impl, ${pass_name}_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+ }
+
+
+ bool
+ ${pass_name}(nir_shader *shader)
+ {
+ bool progress = false;
+ bool condition_flags[${len(condition_list)}];
+ const nir_shader_compiler_options *options = shader->options;
+
+ % for index, condition in enumerate(condition_list):
+ condition_flags[${index}] = ${condition};
+ % endfor
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress |= ${pass_name}_impl(function->impl, condition_flags);
+ }
+
+ return progress;
+ }
+ """)
+
+ class AlgebraicPass(object):
+ def __init__(self, pass_name, transforms):
+ self.xform_dict = {}
+ self.pass_name = pass_name
+
+ for xform in transforms:
+ if not isinstance(xform, SearchAndReplace):
+ xform = SearchAndReplace(xform)
+
+ if xform.search.opcode not in self.xform_dict:
+ self.xform_dict[xform.search.opcode] = []
+
+ self.xform_dict[xform.search.opcode].append(xform)
+
+ def render(self):
+ return _algebraic_pass_template.render(pass_name=self.pass_name,
+ xform_dict=self.xform_dict,
+ condition_list=condition_list)
--- /dev/null
+ /*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+ #ifndef NIR_BUILDER_H
+ #define NIR_BUILDER_H
+
+ #include "nir_control_flow.h"
+
+ struct exec_list;
+
+ typedef struct nir_builder {
+ nir_cursor cursor;
+
+ nir_shader *shader;
+ nir_function_impl *impl;
+ } nir_builder;
+
+ static inline void
+ nir_builder_init(nir_builder *build, nir_function_impl *impl)
+ {
+ memset(build, 0, sizeof(*build));
+ build->impl = impl;
+ build->shader = impl->function->shader;
+ }
+
+ static inline void
+ nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options)
+ {
+ build->shader = nir_shader_create(mem_ctx, stage, options);
+ nir_function *func = nir_function_create(build->shader, "main");
+ build->impl = nir_function_impl_create(func);
+ build->cursor = nir_after_cf_list(&build->impl->body);
+ }
+
+ static inline void
+ nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+ {
+ nir_instr_insert(build->cursor, instr);
+
+ /* Move the cursor forward. */
+ build->cursor = nir_after_instr(instr);
+ }
+
+ static inline void
+ nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)
+ {
+ nir_cf_node_insert(build->cursor, cf);
+ }
+
++static inline nir_ssa_def *
++nir_ssa_undef(nir_builder *build, unsigned num_components)
++{
++ nir_ssa_undef_instr *undef =
++ nir_ssa_undef_instr_create(build->shader, num_components);
++ if (!undef)
++ return NULL;
++
++ nir_instr_insert(nir_before_block(nir_start_block(build->impl)),
++ &undef->instr);
++
++ return &undef->def;
++}
++
+ static inline nir_ssa_def *
+ nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+ {
+ nir_load_const_instr *load_const =
+ nir_load_const_instr_create(build->shader, num_components);
+ if (!load_const)
+ return NULL;
+
+ load_const->value = value;
+
+ nir_builder_instr_insert(build, &load_const->instr);
+
+ return &load_const->def;
+ }
+
+ static inline nir_ssa_def *
+ nir_imm_float(nir_builder *build, float x)
+ {
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+
+ return nir_build_imm(build, 1, v);
+ }
+
+ static inline nir_ssa_def *
+ nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+ {
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+ v.f[1] = y;
+ v.f[2] = z;
+ v.f[3] = w;
+
+ return nir_build_imm(build, 4, v);
+ }
+
+ static inline nir_ssa_def *
+ nir_imm_int(nir_builder *build, int x)
+ {
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.i[0] = x;
+
+ return nir_build_imm(build, 1, v);
+ }
+
+ static inline nir_ssa_def *
+ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
+ {
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.i[0] = x;
+ v.i[1] = y;
+ v.i[2] = z;
+ v.i[3] = w;
+
+ return nir_build_imm(build, 4, v);
+ }
+
+ static inline nir_ssa_def *
+ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
+ nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
+ {
+ const nir_op_info *op_info = &nir_op_infos[op];
+ nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
+ if (!instr)
+ return NULL;
+
+ instr->src[0].src = nir_src_for_ssa(src0);
+ if (src1)
+ instr->src[1].src = nir_src_for_ssa(src1);
+ if (src2)
+ instr->src[2].src = nir_src_for_ssa(src2);
+ if (src3)
+ instr->src[3].src = nir_src_for_ssa(src3);
+
+ /* Guess the number of components the destination temporary should have
+ * based on our input sizes, if it's not fixed for the op.
+ */
+ unsigned num_components = op_info->output_size;
+ if (num_components == 0) {
+ for (unsigned i = 0; i < op_info->num_inputs; i++) {
+ if (op_info->input_sizes[i] == 0)
+ num_components = MAX2(num_components,
+ instr->src[i].src.ssa->num_components);
+ }
+ }
+ assert(num_components != 0);
+
+ /* Make sure we don't swizzle from outside of our source vector (like if a
+ * scalar value was passed into a multiply with a vector).
+ */
+ for (unsigned i = 0; i < op_info->num_inputs; i++) {
+ for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) {
+ instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
+ }
+ }
+
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ instr->dest.write_mask = (1 << num_components) - 1;
+
+ nir_builder_instr_insert(build, &instr->instr);
+
+ return &instr->dest.dest.ssa;
+ }
+
+ #define ALU1(op) \
+ static inline nir_ssa_def * \
+ nir_##op(nir_builder *build, nir_ssa_def *src0) \
+ { \
+ return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL); \
+ }
+
+ #define ALU2(op) \
+ static inline nir_ssa_def * \
+ nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) \
+ { \
+ return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL); \
+ }
+
+ #define ALU3(op) \
+ static inline nir_ssa_def * \
+ nir_##op(nir_builder *build, nir_ssa_def *src0, \
+ nir_ssa_def *src1, nir_ssa_def *src2) \
+ { \
+ return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL); \
+ }
+
+ #define ALU4(op) \
+ static inline nir_ssa_def * \
+ nir_##op(nir_builder *build, nir_ssa_def *src0, \
+ nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) \
+ { \
+ return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3); \
+ }
+
+ #include "nir_builder_opcodes.h"
+
+ static inline nir_ssa_def *
+ nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+ {
+ switch (num_components) {
+ case 4:
+ return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+ case 3:
+ return nir_vec3(build, comp[0], comp[1], comp[2]);
+ case 2:
+ return nir_vec2(build, comp[0], comp[1]);
+ case 1:
+ return comp[0];
+ default:
+ unreachable("bad component count");
+ return NULL;
+ }
+ }
+
+ /**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+ static inline nir_ssa_def *
+ nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+ {
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+ }
+
+ static inline nir_ssa_def *
+ nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+ {
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+ }
+
+ /**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+ static inline nir_ssa_def *
+ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+ unsigned num_components, bool use_fmov)
+ {
+ nir_alu_src alu_src = { NIR_SRC_INIT };
+ alu_src.src = nir_src_for_ssa(src);
+ for (unsigned i = 0; i < num_components; i++)
+ alu_src.swizzle[i] = swiz[i];
+
+ return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+ nir_imov_alu(build, alu_src, num_components);
+ }
+
++/* Selects the right fdot given the number of components in each source. */
++static inline nir_ssa_def *
++nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
++{
++ assert(src0->num_components == src1->num_components);
++ switch (src0->num_components) {
++ case 1: return nir_fmul(build, src0, src1);
++ case 2: return nir_fdot2(build, src0, src1);
++ case 3: return nir_fdot3(build, src0, src1);
++ case 4: return nir_fdot4(build, src0, src1);
++ default:
++ unreachable("bad component size");
++ }
++
++ return NULL;
++}
++
+ static inline nir_ssa_def *
+ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+ {
+ unsigned swizzle[4] = {c, c, c, c};
+ return nir_swizzle(b, def, swizzle, 1, false);
+ }
+
+ /**
+ * Turns a nir_src into a nir_ssa_def * so it can be passed to
+ * nir_build_alu()-based builder calls.
+ *
+ * See nir_ssa_for_alu_src() for alu instructions.
+ */
+ static inline nir_ssa_def *
+ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+ {
+ if (src.is_ssa && src.ssa->num_components == num_components)
+ return src.ssa;
+
+ nir_alu_src alu = { NIR_SRC_INIT };
+ alu.src = src;
+ for (int j = 0; j < 4; j++)
+ alu.swizzle[j] = j;
+
+ return nir_imov_alu(build, alu, num_components);
+ }
+
+ /**
+ * Similar to nir_ssa_for_src(), but for alu src's, respecting the
+ * nir_alu_src's swizzle.
+ */
+ static inline nir_ssa_def *
+ nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
+ {
+ static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 };
+ nir_alu_src *src = &instr->src[srcn];
+ unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
+
+ if (src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
+ !src->abs && !src->negate &&
+ (memcmp(src->swizzle, trivial_swizzle, num_components) == 0))
+ return src->src.ssa;
+
+ return nir_imov_alu(build, *src, num_components);
+ }
+
+ static inline nir_ssa_def *
+ nir_load_var(nir_builder *build, nir_variable *var)
+ {
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+ load->num_components = num_components;
+ load->variables[0] = nir_deref_var_create(load, var);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_builder_instr_insert(build, &load->instr);
+ return &load->dest.ssa;
+ }
+
+ static inline void
+ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
+ unsigned writemask)
+ {
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+ store->num_components = num_components;
+ store->const_index[0] = writemask;
+ store->variables[0] = nir_deref_var_create(store, var);
+ store->src[0] = nir_src_for_ssa(value);
+ nir_builder_instr_insert(build, &store->instr);
+ }
+
++static inline void
++nir_store_deref_var(nir_builder *build, nir_deref_var *deref,
++ nir_ssa_def *value, unsigned writemask)
++{
++ const unsigned num_components =
++ glsl_get_vector_elements(nir_deref_tail(&deref->deref)->type);
++
++ nir_intrinsic_instr *store =
++ nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
++ store->num_components = num_components;
++ store->const_index[0] = writemask & ((1 << num_components) - 1);
++ store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &deref->deref));
++ store->src[0] = nir_src_for_ssa(value);
++ nir_builder_instr_insert(build, &store->instr);
++}
++
++static inline void
++nir_copy_deref_var(nir_builder *build, nir_deref_var *dest, nir_deref_var *src)
++{
++ assert(nir_deref_tail(&dest->deref)->type ==
++ nir_deref_tail(&src->deref)->type);
++
++ nir_intrinsic_instr *copy =
++ nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var);
++ copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref));
++ copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref));
++ nir_builder_instr_insert(build, ©->instr);
++}
++
++static inline void
++nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)
++{
++ nir_intrinsic_instr *copy =
++ nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var);
++ copy->variables[0] = nir_deref_var_create(copy, dest);
++ copy->variables[1] = nir_deref_var_create(copy, src);
++ nir_builder_instr_insert(build, ©->instr);
++}
++
+ static inline nir_ssa_def *
+ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
+ {
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
+ load->num_components = nir_intrinsic_infos[op].dest_components;
+ load->const_index[0] = index;
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ nir_intrinsic_infos[op].dest_components, NULL);
+ nir_builder_instr_insert(build, &load->instr);
+ return &load->dest.ssa;
+ }
+
++static inline void
++nir_jump(nir_builder *build, nir_jump_type jump_type)
++{
++ nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type);
++ nir_builder_instr_insert(build, &jump->instr);
++}
++
+ #endif /* NIR_BUILDER_H */
--- /dev/null
- struct hash_table *ptr_table;
+ /*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+ #include "nir.h"
+ #include "nir_control_flow_private.h"
+
+ /* Secret Decoder Ring:
+ * clone_foo():
+ * Allocate and clone a foo.
+ * __clone_foo():
+ * Clone body of foo (ie. parent class, embedded struct, etc)
+ */
+
+ typedef struct {
++ /* True if we are cloning an entire shader. */
++ bool global_clone;
++
+ /* maps orig ptr -> cloned ptr: */
-init_clone_state(clone_state *state)
++ struct hash_table *remap_table;
+
+ /* List of phi sources. */
+ struct list_head phi_srcs;
+
+ /* new shader object, used as memctx for just about everything else: */
+ nir_shader *ns;
+ } clone_state;
+
+ static void
- state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
++init_clone_state(clone_state *state, bool global)
+ {
- _mesa_hash_table_destroy(state->ptr_table, NULL);
++ state->global_clone = global;
++ state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
++ _mesa_key_pointer_equal);
+ list_inithead(&state->phi_srcs);
+ }
+
+ static void
+ free_clone_state(clone_state *state)
+ {
-static void *
-lookup_ptr(clone_state *state, const void *ptr)
++ _mesa_hash_table_destroy(state->remap_table, NULL);
+ }
+
- entry = _mesa_hash_table_search(state->ptr_table, ptr);
++static inline void *
++_lookup_ptr(clone_state *state, const void *ptr, bool global)
+ {
+ struct hash_entry *entry;
+
+ if (!ptr)
+ return NULL;
+
-store_ptr(clone_state *state, void *nptr, const void *ptr)
++ if (!state->global_clone && global)
++ return (void *)ptr;
++
++ entry = _mesa_hash_table_search(state->remap_table, ptr);
+ assert(entry && "Failed to find pointer!");
+ if (!entry)
+ return NULL;
+
+ return entry->data;
+ }
+
+ static void
- _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
++add_remap(clone_state *state, void *nptr, const void *ptr)
++{
++ _mesa_hash_table_insert(state->remap_table, ptr, nptr);
++}
++
++static void *
++remap_local(clone_state *state, const void *ptr)
+ {
-static nir_constant *
-clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar)
++ return _lookup_ptr(state, ptr, false);
+ }
+
- nc->elements[i] = clone_constant(state, c->elements[i], nvar);
++static void *
++remap_global(clone_state *state, const void *ptr)
++{
++ return _lookup_ptr(state, ptr, true);
++}
++
++static nir_register *
++remap_reg(clone_state *state, const nir_register *reg)
++{
++ return _lookup_ptr(state, reg, reg->is_global);
++}
++
++static nir_variable *
++remap_var(clone_state *state, const nir_variable *var)
++{
++ return _lookup_ptr(state, var, var->data.mode != nir_var_local);
++}
++
++nir_constant *
++nir_constant_clone(const nir_constant *c, nir_variable *nvar)
+ {
+ nir_constant *nc = ralloc(nvar, nir_constant);
+
+ nc->value = c->value;
+ nc->num_elements = c->num_elements;
+ nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
+ for (unsigned i = 0; i < c->num_elements; i++) {
- store_ptr(state, nvar, var);
++ nc->elements[i] = nir_constant_clone(c->elements[i], nvar);
+ }
+
+ return nc;
+ }
+
+ /* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
+ * having to deal with locals and globals separately:
+ */
+ static nir_variable *
+ clone_variable(clone_state *state, const nir_variable *var)
+ {
+ nir_variable *nvar = rzalloc(state->ns, nir_variable);
- clone_constant(state, var->constant_initializer, nvar);
++ add_remap(state, nvar, var);
+
+ nvar->type = var->type;
+ nvar->name = ralloc_strdup(nvar, var->name);
+ nvar->data = var->data;
+ nvar->num_state_slots = var->num_state_slots;
+ nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+ memcpy(nvar->state_slots, var->state_slots,
+ var->num_state_slots * sizeof(nir_state_slot));
+ if (var->constant_initializer) {
+ nvar->constant_initializer =
- store_ptr(state, nreg, reg);
++ nir_constant_clone(var->constant_initializer, nvar);
+ }
+ nvar->interface_type = var->interface_type;
+
+ return nvar;
+ }
+
+ /* clone list of nir_variable: */
+ static void
+ clone_var_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+ {
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_variable, var, node, list) {
+ nir_variable *nvar = clone_variable(state, var);
+ exec_list_push_tail(dst, &nvar->node);
+ }
+ }
+
+ /* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
+ * to avoid having to deal with locals and globals separately:
+ */
+ static nir_register *
+ clone_register(clone_state *state, const nir_register *reg)
+ {
+ nir_register *nreg = rzalloc(state->ns, nir_register);
- nsrc->ssa = lookup_ptr(state, src->ssa);
++ add_remap(state, nreg, reg);
+
+ nreg->num_components = reg->num_components;
+ nreg->num_array_elems = reg->num_array_elems;
+ nreg->index = reg->index;
+ nreg->name = ralloc_strdup(nreg, reg->name);
+ nreg->is_global = reg->is_global;
+ nreg->is_packed = reg->is_packed;
+
+ /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
+ list_inithead(&nreg->uses);
+ list_inithead(&nreg->defs);
+ list_inithead(&nreg->if_uses);
+
+ return nreg;
+ }
+
+ /* clone list of nir_register: */
+ static void
+ clone_reg_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+ {
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_register, reg, node, list) {
+ nir_register *nreg = clone_register(state, reg);
+ exec_list_push_tail(dst, &nreg->node);
+ }
+ }
+
+ static void
+ __clone_src(clone_state *state, void *ninstr_or_if,
+ nir_src *nsrc, const nir_src *src)
+ {
+ nsrc->is_ssa = src->is_ssa;
+ if (src->is_ssa) {
- nsrc->reg.reg = lookup_ptr(state, src->reg.reg);
++ nsrc->ssa = remap_local(state, src->ssa);
+ } else {
- store_ptr(state, &ndst->ssa, &dst->ssa);
++ nsrc->reg.reg = remap_reg(state, src->reg.reg);
+ if (src->reg.indirect) {
+ nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
+ __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
+ }
+ nsrc->reg.base_offset = src->reg.base_offset;
+ }
+ }
+
+ static void
+ __clone_dst(clone_state *state, nir_instr *ninstr,
+ nir_dest *ndst, const nir_dest *dst)
+ {
+ ndst->is_ssa = dst->is_ssa;
+ if (dst->is_ssa) {
+ nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
- ndst->reg.reg = lookup_ptr(state, dst->reg.reg);
++ add_remap(state, &ndst->ssa, &dst->ssa);
+ } else {
- nir_variable *nvar = lookup_ptr(state, dvar->var);
++ ndst->reg.reg = remap_reg(state, dst->reg.reg);
+ if (dst->reg.indirect) {
+ ndst->reg.indirect = ralloc(ninstr, nir_src);
+ __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
+ }
+ ndst->reg.base_offset = dst->reg.base_offset;
+ }
+ }
+
+ static nir_deref *clone_deref(clone_state *state, const nir_deref *deref,
+ nir_instr *ninstr, nir_deref *parent);
+
+ static nir_deref_var *
+ clone_deref_var(clone_state *state, const nir_deref_var *dvar,
+ nir_instr *ninstr)
+ {
- store_ptr(state, &nlc->def, &lc->def);
++ nir_variable *nvar = remap_var(state, dvar->var);
+ nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar);
+
+ if (dvar->deref.child)
+ ndvar->deref.child = clone_deref(state, dvar->deref.child,
+ ninstr, &ndvar->deref);
+
+ return ndvar;
+ }
+
+ static nir_deref_array *
+ clone_deref_array(clone_state *state, const nir_deref_array *darr,
+ nir_instr *ninstr, nir_deref *parent)
+ {
+ nir_deref_array *ndarr = nir_deref_array_create(parent);
+
+ ndarr->deref.type = darr->deref.type;
+ if (darr->deref.child)
+ ndarr->deref.child = clone_deref(state, darr->deref.child,
+ ninstr, &ndarr->deref);
+
+ ndarr->deref_array_type = darr->deref_array_type;
+ ndarr->base_offset = darr->base_offset;
+ if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+ __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect);
+
+ return ndarr;
+ }
+
+ static nir_deref_struct *
+ clone_deref_struct(clone_state *state, const nir_deref_struct *dstr,
+ nir_instr *ninstr, nir_deref *parent)
+ {
+ nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index);
+
+ ndstr->deref.type = dstr->deref.type;
+ if (dstr->deref.child)
+ ndstr->deref.child = clone_deref(state, dstr->deref.child,
+ ninstr, &ndstr->deref);
+
+ return ndstr;
+ }
+
+ static nir_deref *
+ clone_deref(clone_state *state, const nir_deref *dref,
+ nir_instr *ninstr, nir_deref *parent)
+ {
+ switch (dref->deref_type) {
+ case nir_deref_type_array:
+ return &clone_deref_array(state, nir_deref_as_array(dref),
+ ninstr, parent)->deref;
+ case nir_deref_type_struct:
+ return &clone_deref_struct(state, nir_deref_as_struct(dref),
+ ninstr, parent)->deref;
+ default:
+ unreachable("bad deref type");
+ return NULL;
+ }
+ }
+
+ static nir_alu_instr *
+ clone_alu(clone_state *state, const nir_alu_instr *alu)
+ {
+ nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+
+ __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
+ nalu->dest.saturate = alu->dest.saturate;
+ nalu->dest.write_mask = alu->dest.write_mask;
+
+ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+ __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
+ nalu->src[i].negate = alu->src[i].negate;
+ nalu->src[i].abs = alu->src[i].abs;
+ memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+ sizeof(nalu->src[i].swizzle));
+ }
+
+ return nalu;
+ }
+
+ static nir_intrinsic_instr *
+ clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
+ {
+ nir_intrinsic_instr *nitr =
+ nir_intrinsic_instr_create(state->ns, itr->intrinsic);
+
+ unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables;
+ unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
+
+ if (nir_intrinsic_infos[itr->intrinsic].has_dest)
+ __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
+
+ nitr->num_components = itr->num_components;
+ memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
+
+ for (unsigned i = 0; i < num_variables; i++) {
+ nitr->variables[i] = clone_deref_var(state, itr->variables[i],
+ &nitr->instr);
+ }
+
+ for (unsigned i = 0; i < num_srcs; i++)
+ __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
+
+ return nitr;
+ }
+
+ static nir_load_const_instr *
+ clone_load_const(clone_state *state, const nir_load_const_instr *lc)
+ {
+ nir_load_const_instr *nlc =
+ nir_load_const_instr_create(state->ns, lc->def.num_components);
+
+ memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
+
- store_ptr(state, &nsa->def, &sa->def);
++ add_remap(state, &nlc->def, &lc->def);
+
+ return nlc;
+ }
+
+ static nir_ssa_undef_instr *
+ clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
+ {
+ nir_ssa_undef_instr *nsa =
+ nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+
- ntex->sampler_array_size = tex->sampler_array_size;
++ add_remap(state, &nsa->def, &sa->def);
+
+ return nsa;
+ }
+
+ static nir_tex_instr *
+ clone_tex(clone_state *state, const nir_tex_instr *tex)
+ {
+ nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
+
+ ntex->sampler_dim = tex->sampler_dim;
+ ntex->dest_type = tex->dest_type;
+ ntex->op = tex->op;
+ __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
+ for (unsigned i = 0; i < ntex->num_srcs; i++) {
+ ntex->src[i].src_type = tex->src[i].src_type;
+ __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
+ }
+ ntex->coord_components = tex->coord_components;
+ ntex->is_array = tex->is_array;
+ ntex->is_shadow = tex->is_shadow;
+ ntex->is_new_style_shadow = tex->is_new_style_shadow;
+ memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
+ ntex->component = tex->component;
++ ntex->texture_index = tex->texture_index;
++ ntex->texture_array_size = tex->texture_array_size;
++ if (tex->texture)
++ ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr);
+ ntex->sampler_index = tex->sampler_index;
- nir_function *ncallee = lookup_ptr(state, call->callee);
+ if (tex->sampler)
+ ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr);
+
+ return ntex;
+ }
+
+ static nir_phi_instr *
+ clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
+ {
+ nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
+
+ __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
+
+ /* Cloning a phi node is a bit different from other instructions. The
+ * sources of phi instructions are the only time where we can use an SSA
+ * def before it is defined. In order to handle this, we just copy over
+ * the sources from the old phi instruction directly and then fix them up
+ * in a second pass once all the instrutions in the function have been
+ * properly cloned.
+ *
+ * In order to ensure that the copied sources (which are the same as the
+ * old phi instruction's sources for now) don't get inserted into the old
+ * shader's use-def lists, we have to add the phi instruction *before* we
+ * set up its sources.
+ */
+ nir_instr_insert_after_block(nblk, &nphi->instr);
+
+ foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+ nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
+
+ /* Just copy the old source for now. */
+ memcpy(nsrc, src, sizeof(*src));
+
+ /* Since we're not letting nir_insert_instr handle use/def stuff for us,
+ * we have to set the parent_instr manually. It doesn't really matter
+ * when we do it, so we might as well do it here.
+ */
+ nsrc->src.parent_instr = &nphi->instr;
+
+ /* Stash it in the list of phi sources. We'll walk this list and fix up
+ * sources at the very end of clone_function_impl.
+ */
+ list_add(&nsrc->src.use_link, &state->phi_srcs);
+
+ exec_list_push_tail(&nphi->srcs, &nsrc->node);
+ }
+
+ return nphi;
+ }
+
+ static nir_jump_instr *
+ clone_jump(clone_state *state, const nir_jump_instr *jmp)
+ {
+ nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
+
+ return njmp;
+ }
+
+ static nir_call_instr *
+ clone_call(clone_state *state, const nir_call_instr *call)
+ {
- store_ptr(state, nblk, blk);
++ nir_function *ncallee = remap_global(state, call->callee);
+ nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
+
+ for (unsigned i = 0; i < ncall->num_params; i++)
+ ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
+
+ ncall->return_deref = clone_deref_var(state, call->return_deref,
+ &ncall->instr);
+
+ return ncall;
+ }
+
+ static nir_instr *
+ clone_instr(clone_state *state, const nir_instr *instr)
+ {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return &clone_alu(state, nir_instr_as_alu(instr))->instr;
+ case nir_instr_type_intrinsic:
+ return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
+ case nir_instr_type_load_const:
+ return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
+ case nir_instr_type_ssa_undef:
+ return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
+ case nir_instr_type_tex:
+ return &clone_tex(state, nir_instr_as_tex(instr))->instr;
+ case nir_instr_type_phi:
+ unreachable("Cannot clone phis with clone_instr");
+ case nir_instr_type_jump:
+ return &clone_jump(state, nir_instr_as_jump(instr))->instr;
+ case nir_instr_type_call:
+ return &clone_call(state, nir_instr_as_call(instr))->instr;
+ case nir_instr_type_parallel_copy:
+ unreachable("Cannot clone parallel copies");
+ default:
+ unreachable("bad instr type");
+ return NULL;
+ }
+ }
+
+ static nir_block *
+ clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
+ {
+ /* Don't actually create a new block. Just use the one from the tail of
+ * the list. NIR guarantees that the tail of the list is a block and that
+ * no two blocks are side-by-side in the IR; It should be empty.
+ */
+ nir_block *nblk =
+ exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
+ assert(nblk->cf_node.type == nir_cf_node_block);
+ assert(exec_list_is_empty(&nblk->instr_list));
+
+ /* We need this for phi sources */
-clone_function_impl(clone_state *state, const nir_function_impl *fi,
- nir_function *nfxn)
++ add_remap(state, nblk, blk);
+
+ nir_foreach_instr(blk, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ /* Phi instructions are a bit of a special case when cloning because
+ * we don't want inserting the instruction to automatically handle
+ * use/defs for us. Instead, we need to wait until all the
+ * blocks/instructions are in so that we can set their sources up.
+ */
+ clone_phi(state, nir_instr_as_phi(instr), nblk);
+ } else {
+ nir_instr *ninstr = clone_instr(state, instr);
+ nir_instr_insert_after_block(nblk, ninstr);
+ }
+ }
+
+ return nblk;
+ }
+
+ static void
+ clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list);
+
+ static nir_if *
+ clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
+ {
+ nir_if *ni = nir_if_create(state->ns);
+
+ __clone_src(state, ni, &ni->condition, &i->condition);
+
+ nir_cf_node_insert_end(cf_list, &ni->cf_node);
+
+ clone_cf_list(state, &ni->then_list, &i->then_list);
+ clone_cf_list(state, &ni->else_list, &i->else_list);
+
+ return ni;
+ }
+
+ static nir_loop *
+ clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
+ {
+ nir_loop *nloop = nir_loop_create(state->ns);
+
+ nir_cf_node_insert_end(cf_list, &nloop->cf_node);
+
+ clone_cf_list(state, &nloop->body, &loop->body);
+
+ return nloop;
+ }
+
+ /* clone list of nir_cf_node: */
+ static void
+ clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+ {
+ foreach_list_typed(nir_cf_node, cf, node, list) {
+ switch (cf->type) {
+ case nir_cf_node_block:
+ clone_block(state, dst, nir_cf_node_as_block(cf));
+ break;
+ case nir_cf_node_if:
+ clone_if(state, dst, nir_cf_node_as_if(cf));
+ break;
+ case nir_cf_node_loop:
+ clone_loop(state, dst, nir_cf_node_as_loop(cf));
+ break;
+ default:
+ unreachable("bad cf type");
+ }
+ }
+ }
+
+ static nir_function_impl *
- nir_function_impl *nfi = nir_function_impl_create(nfxn);
++clone_function_impl(clone_state *state, const nir_function_impl *fi)
+ {
- nfi->params[i] = lookup_ptr(state, fi->params[i]);
++ nir_function_impl *nfi = nir_function_impl_create_bare(state->ns);
+
+ clone_var_list(state, &nfi->locals, &fi->locals);
+ clone_reg_list(state, &nfi->registers, &fi->registers);
+ nfi->reg_alloc = fi->reg_alloc;
+
+ nfi->num_params = fi->num_params;
+ nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
+ for (unsigned i = 0; i < fi->num_params; i++) {
- nfi->return_var = lookup_ptr(state, fi->return_var);
++ nfi->params[i] = remap_local(state, fi->params[i]);
+ }
- src->pred = lookup_ptr(state, src->pred);
++ nfi->return_var = remap_local(state, fi->return_var);
+
+ assert(list_empty(&state->phi_srcs));
+
+ clone_cf_list(state, &nfi->body, &fi->body);
+
+ /* After we've cloned almost everything, we have to walk the list of phi
+ * sources and fix them up. Thanks to loops, the block and SSA value for a
+ * phi source may not be defined when we first encounter it. Instead, we
+ * add it to the phi_srcs list and we fix it up here.
+ */
+ list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
- src->src.ssa = lookup_ptr(state, src->src.ssa);
++ src->pred = remap_local(state, src->pred);
+ assert(src->src.is_ssa);
- store_ptr(state, nfxn, fxn);
++ src->src.ssa = remap_local(state, src->src.ssa);
+
+ /* Remove from this list and place in the uses of the SSA def */
+ list_del(&src->src.use_link);
+ list_addtail(&src->src.use_link, &src->src.ssa->uses);
+ }
+ assert(list_empty(&state->phi_srcs));
+
+ /* All metadata is invalidated in the cloning process */
+ nfi->valid_metadata = 0;
+
+ return nfi;
+ }
+
++nir_function_impl *
++nir_function_impl_clone(const nir_function_impl *fi)
++{
++ clone_state state;
++ init_clone_state(&state, false);
++
++ /* We use the same shader */
++ state.ns = fi->function->shader;
++
++ nir_function_impl *nfi = clone_function_impl(&state, fi);
++
++ free_clone_state(&state);
++
++ return nfi;
++}
++
+ static nir_function *
+ clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
+ {
+ assert(ns == state->ns);
+ nir_function *nfxn = nir_function_create(ns, fxn->name);
+
+ /* Needed for call instructions */
- init_clone_state(&state);
++ add_remap(state, nfxn, fxn);
+
+ nfxn->num_params = fxn->num_params;
+ nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params);
+ memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params);
+
+ nfxn->return_type = fxn->return_type;
+
+ /* At first glance, it looks like we should clone the function_impl here.
+ * However, call instructions need to be able to reference at least the
+ * function and those will get processed as we clone the function_impl's.
+ * We stop here and do function_impls as a second pass.
+ */
+
+ return nfxn;
+ }
+
+ nir_shader *
+ nir_shader_clone(void *mem_ctx, const nir_shader *s)
+ {
+ clone_state state;
- nir_function *nfxn = lookup_ptr(&state, fxn);
- clone_function_impl(&state, fxn->impl, nfxn);
++ init_clone_state(&state, true);
+
+ nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+ state.ns = ns;
+
+ clone_var_list(&state, &ns->uniforms, &s->uniforms);
+ clone_var_list(&state, &ns->inputs, &s->inputs);
+ clone_var_list(&state, &ns->outputs, &s->outputs);
++ clone_var_list(&state, &ns->shared, &s->shared);
+ clone_var_list(&state, &ns->globals, &s->globals);
+ clone_var_list(&state, &ns->system_values, &s->system_values);
+
+ /* Go through and clone functions */
+ foreach_list_typed(nir_function, fxn, node, &s->functions)
+ clone_function(&state, fxn, ns);
+
+ /* Only after all functions are cloned can we clone the actual function
+ * implementations. This is because nir_call_instr's need to reference the
+ * functions of other functions and we don't know what order the functions
+ * will have in the list.
+ */
+ nir_foreach_function(s, fxn) {
++ nir_function *nfxn = remap_global(&state, fxn);
++ nfxn->impl = clone_function_impl(&state, fxn->impl);
++ nfxn->impl->function = nfxn;
+ }
+
+ clone_reg_list(&state, &ns->registers, &s->registers);
+ ns->reg_alloc = s->reg_alloc;
+
+ ns->info = s->info;
+ ns->info.name = ralloc_strdup(ns, ns->info.name);
+ if (ns->info.label)
+ ns->info.label = ralloc_strdup(ns, ns->info.label);
+
+ ns->num_inputs = s->num_inputs;
+ ns->num_uniforms = s->num_uniforms;
+ ns->num_outputs = s->num_outputs;
++ ns->num_shared = s->num_shared;
+
+ free_clone_state(&state);
+
+ return ns;
+ }
--- /dev/null
- } else {
- assert(parent->type == nir_cf_node_loop);
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir_control_flow_private.h"
+
+ /**
+ * \name Control flow modification
+ *
+ * These functions modify the control flow tree while keeping the control flow
+ * graph up-to-date. The invariants respected are:
+ * 1. Each then statement, else statement, or loop body must have at least one
+ * control flow node.
+ * 2. Each if-statement and loop must have one basic block before it and one
+ * after.
+ * 3. Two basic blocks cannot be directly next to each other.
+ * 4. If a basic block has a jump instruction, there must be only one and it
+ * must be at the end of the block.
+ * 5. The CFG must always be connected - this means that we must insert a fake
+ * CFG edge for loops with no break statement.
+ *
+ * The purpose of the second one is so that we have places to insert code during
+ * GCM, as well as eliminating the possibility of critical edges.
+ */
+ /*@{*/
+
+ static bool
+ block_ends_in_jump(nir_block *block)
+ {
+ return !exec_list_is_empty(&block->instr_list) &&
+ nir_block_last_instr(block)->type == nir_instr_type_jump;
+ }
+
+ static inline void
+ block_add_pred(nir_block *block, nir_block *pred)
+ {
+ _mesa_set_add(block->predecessors, pred);
+ }
+
+ static inline void
+ block_remove_pred(nir_block *block, nir_block *pred)
+ {
+ struct set_entry *entry = _mesa_set_search(block->predecessors, pred);
+
+ assert(entry);
+
+ _mesa_set_remove(block->predecessors, entry);
+ }
+
+ static void
+ link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
+ {
+ pred->successors[0] = succ1;
+ if (succ1 != NULL)
+ block_add_pred(succ1, pred);
+
+ pred->successors[1] = succ2;
+ if (succ2 != NULL)
+ block_add_pred(succ2, pred);
+ }
+
+ static void
+ unlink_blocks(nir_block *pred, nir_block *succ)
+ {
+ if (pred->successors[0] == succ) {
+ pred->successors[0] = pred->successors[1];
+ pred->successors[1] = NULL;
+ } else {
+ assert(pred->successors[1] == succ);
+ pred->successors[1] = NULL;
+ }
+
+ block_remove_pred(succ, pred);
+ }
+
+ static void
+ unlink_block_successors(nir_block *block)
+ {
+ if (block->successors[1] != NULL)
+ unlink_blocks(block, block->successors[1]);
+ if (block->successors[0] != NULL)
+ unlink_blocks(block, block->successors[0]);
+ }
+
+ static void
+ link_non_block_to_block(nir_cf_node *node, nir_block *block)
+ {
+ if (node->type == nir_cf_node_if) {
+ /*
+ * We're trying to link an if to a block after it; this just means linking
+ * the last block of the then and else branches.
+ */
+
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+
+ nir_cf_node *last_then = nir_if_last_then_node(if_stmt);
+ assert(last_then->type == nir_cf_node_block);
+ nir_block *last_then_block = nir_cf_node_as_block(last_then);
+
+ nir_cf_node *last_else = nir_if_last_else_node(if_stmt);
+ assert(last_else->type == nir_cf_node_block);
+ nir_block *last_else_block = nir_cf_node_as_block(last_else);
+
+ if (!block_ends_in_jump(last_then_block)) {
+ unlink_block_successors(last_then_block);
+ link_blocks(last_then_block, block, NULL);
+ }
+
+ if (!block_ends_in_jump(last_else_block)) {
+ unlink_block_successors(last_else_block);
+ link_blocks(last_else_block, block, NULL);
+ }
+ } else {
+ assert(node->type == nir_cf_node_loop);
+
+ /*
+ * We can only get to this codepath if we're inserting a new loop, or
+ * at least a loop with no break statements; we can't insert break
+ * statements into a loop when we haven't inserted it into the CFG
+ * because we wouldn't know which block comes after the loop
+ * and therefore, which block should be the successor of the block with
+ * the break). Therefore, we need to insert a fake edge (see invariant
+ * #5).
+ */
+
+ nir_loop *loop = nir_cf_node_as_loop(node);
+
+ nir_cf_node *last = nir_loop_last_cf_node(loop);
+ assert(last->type == nir_cf_node_block);
+ nir_block *last_block = nir_cf_node_as_block(last);
+
+ last_block->successors[1] = block;
+ block_add_pred(block, last_block);
+ }
+ }
+
+ static void
+ link_block_to_non_block(nir_block *block, nir_cf_node *node)
+ {
+ if (node->type == nir_cf_node_if) {
+ /*
+ * We're trying to link a block to an if after it; this just means linking
+ * the block to the first block of the then and else branches.
+ */
+
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+
+ nir_cf_node *first_then = nir_if_first_then_node(if_stmt);
+ assert(first_then->type == nir_cf_node_block);
+ nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+ nir_cf_node *first_else = nir_if_first_else_node(if_stmt);
+ assert(first_else->type == nir_cf_node_block);
+ nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+ unlink_block_successors(block);
+ link_blocks(block, first_then_block, first_else_block);
+ } else {
+ /*
+ * For similar reasons as the corresponding case in
+ * link_non_block_to_block(), don't worry about if the loop header has
+ * any predecessors that need to be unlinked.
+ */
+
+ assert(node->type == nir_cf_node_loop);
+
+ nir_loop *loop = nir_cf_node_as_loop(node);
+
+ nir_cf_node *loop_header = nir_loop_first_cf_node(loop);
+ assert(loop_header->type == nir_cf_node_block);
+ nir_block *loop_header_block = nir_cf_node_as_block(loop_header);
+
+ unlink_block_successors(block);
+ link_blocks(block, loop_header_block, NULL);
+ }
+
+ }
+
+ /**
+ * Replace a block's successor with a different one.
+ */
+ static void
+ replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ)
+ {
+ if (block->successors[0] == old_succ) {
+ block->successors[0] = new_succ;
+ } else {
+ assert(block->successors[1] == old_succ);
+ block->successors[1] = new_succ;
+ }
+
+ block_remove_pred(old_succ, block);
+ block_add_pred(new_succ, block);
+ }
+
+ /**
+ * Takes a basic block and inserts a new empty basic block before it, making its
+ * predecessors point to the new block. This essentially splits the block into
+ * an empty header and a body so that another non-block CF node can be inserted
+ * between the two. Note that this does *not* link the two basic blocks, so
+ * some kind of cleanup *must* be performed after this call.
+ */
+
+ static nir_block *
+ split_block_beginning(nir_block *block)
+ {
+ nir_block *new_block = nir_block_create(ralloc_parent(block));
+ new_block->cf_node.parent = block->cf_node.parent;
+ exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node);
+
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+ replace_successor(pred, block, new_block);
+ }
+
+ /* Any phi nodes must stay part of the new block, or else their
+ * sourcse will be messed up. This will reverse the order of the phi's, but
+ * order shouldn't matter.
+ */
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ exec_node_remove(&instr->node);
+ instr->block = new_block;
+ exec_list_push_head(&new_block->instr_list, &instr->node);
+ }
+
+ return new_block;
+ }
+
+ static void
+ rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred)
+ {
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src(phi, src) {
+ if (src->pred == old_pred) {
+ src->pred = new_pred;
+ break;
+ }
+ }
+ }
+ }
+
+ static void
+ insert_phi_undef(nir_block *block, nir_block *pred)
+ {
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(ralloc_parent(phi),
+ phi->dest.ssa.num_components);
+ nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
+ nir_phi_src *src = ralloc(phi, nir_phi_src);
+ src->pred = pred;
+ src->src.parent_instr = &phi->instr;
+ src->src.is_ssa = true;
+ src->src.ssa = &undef->def;
+
+ list_addtail(&src->src.use_link, &undef->def.uses);
+
+ exec_list_push_tail(&phi->srcs, &src->node);
+ }
+ }
+
+ /**
+ * Moves the successors of source to the successors of dest, leaving both
+ * successors of source NULL.
+ */
+
+ static void
+ move_successors(nir_block *source, nir_block *dest)
+ {
+ nir_block *succ1 = source->successors[0];
+ nir_block *succ2 = source->successors[1];
+
+ if (succ1) {
+ unlink_blocks(source, succ1);
+ rewrite_phi_preds(succ1, source, dest);
+ }
+
+ if (succ2) {
+ unlink_blocks(source, succ2);
+ rewrite_phi_preds(succ2, source, dest);
+ }
+
+ unlink_block_successors(dest);
+ link_blocks(dest, succ1, succ2);
+ }
+
+ /* Given a basic block with no successors that has been inserted into the
+ * control flow tree, gives it the successors it would normally have assuming
+ * it doesn't end in a jump instruction. Also inserts phi sources with undefs
+ * if necessary.
+ */
+ static void
+ block_add_normal_succs(nir_block *block)
+ {
+ if (exec_node_is_tail_sentinel(block->cf_node.node.next)) {
+ nir_cf_node *parent = block->cf_node.parent;
+ if (parent->type == nir_cf_node_if) {
+ nir_cf_node *next = nir_cf_node_next(parent);
+ assert(next->type == nir_cf_node_block);
+ nir_block *next_block = nir_cf_node_as_block(next);
+
+ link_blocks(block, next_block, NULL);
++ } else if (parent->type == nir_cf_node_loop) {
+ nir_loop *loop = nir_cf_node_as_loop(parent);
+
+ nir_cf_node *head = nir_loop_first_cf_node(loop);
+ assert(head->type == nir_cf_node_block);
+ nir_block *head_block = nir_cf_node_as_block(head);
+
+ link_blocks(block, head_block, NULL);
+ insert_phi_undef(head_block, block);
++ } else {
++ assert(parent->type == nir_cf_node_function);
++ nir_function_impl *impl = nir_cf_node_as_function(parent);
++ link_blocks(block, impl->end_block, NULL);
+ }
+ } else {
+ nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+ if (next->type == nir_cf_node_if) {
+ nir_if *next_if = nir_cf_node_as_if(next);
+
+ nir_cf_node *first_then = nir_if_first_then_node(next_if);
+ assert(first_then->type == nir_cf_node_block);
+ nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+ nir_cf_node *first_else = nir_if_first_else_node(next_if);
+ assert(first_else->type == nir_cf_node_block);
+ nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+ link_blocks(block, first_then_block, first_else_block);
+ } else {
+ assert(next->type == nir_cf_node_loop);
+ nir_loop *next_loop = nir_cf_node_as_loop(next);
+
+ nir_cf_node *first = nir_loop_first_cf_node(next_loop);
+ assert(first->type == nir_cf_node_block);
+ nir_block *first_block = nir_cf_node_as_block(first);
+
+ link_blocks(block, first_block, NULL);
+ insert_phi_undef(first_block, block);
+ }
+ }
+ }
+
+ static nir_block *
+ split_block_end(nir_block *block)
+ {
+ nir_block *new_block = nir_block_create(ralloc_parent(block));
+ new_block->cf_node.parent = block->cf_node.parent;
+ exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node);
+
+ if (block_ends_in_jump(block)) {
+ /* Figure out what successor block would've had if it didn't have a jump
+ * instruction, and make new_block have that successor.
+ */
+ block_add_normal_succs(new_block);
+ } else {
+ move_successors(block, new_block);
+ }
+
+ return new_block;
+ }
+
+ static nir_block *
+ split_block_before_instr(nir_instr *instr)
+ {
+ assert(instr->type != nir_instr_type_phi);
+ nir_block *new_block = split_block_beginning(instr->block);
+
+ nir_foreach_instr_safe(instr->block, cur_instr) {
+ if (cur_instr == instr)
+ break;
+
+ exec_node_remove(&cur_instr->node);
+ cur_instr->block = new_block;
+ exec_list_push_tail(&new_block->instr_list, &cur_instr->node);
+ }
+
+ return new_block;
+ }
+
+ /* Splits a basic block at the point specified by the cursor. The "before" and
+ * "after" arguments are filled out with the blocks resulting from the split
+ * if non-NULL. Note that the "beginning" of the block is actually interpreted
+ * as before the first non-phi instruction, and it's illegal to split a block
+ * before a phi instruction.
+ */
+
+ static void
+ split_block_cursor(nir_cursor cursor,
+ nir_block **_before, nir_block **_after)
+ {
+ nir_block *before, *after;
+ switch (cursor.option) {
+ case nir_cursor_before_block:
+ after = cursor.block;
+ before = split_block_beginning(cursor.block);
+ break;
+
+ case nir_cursor_after_block:
+ before = cursor.block;
+ after = split_block_end(cursor.block);
+ break;
+
+ case nir_cursor_before_instr:
+ after = cursor.instr->block;
+ before = split_block_before_instr(cursor.instr);
+ break;
+
+ case nir_cursor_after_instr:
+ /* We lower this to split_block_before_instr() so that we can keep the
+ * after-a-jump-instr case contained to split_block_end().
+ */
+ if (nir_instr_is_last(cursor.instr)) {
+ before = cursor.instr->block;
+ after = split_block_end(cursor.instr->block);
+ } else {
+ after = cursor.instr->block;
+ before = split_block_before_instr(nir_instr_next(cursor.instr));
+ }
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ if (_before)
+ *_before = before;
+ if (_after)
+ *_after = after;
+ }
+
+ /**
+ * Inserts a non-basic block between two basic blocks and links them together.
+ */
+
+ static void
+ insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after)
+ {
+ node->parent = before->cf_node.parent;
+ exec_node_insert_after(&before->cf_node.node, &node->node);
+ link_block_to_non_block(before, node);
+ link_non_block_to_block(node, after);
+ }
+
+ /* walk up the control flow tree to find the innermost enclosed loop */
+ static nir_loop *
+ nearest_loop(nir_cf_node *node)
+ {
+ while (node->type != nir_cf_node_loop) {
+ node = node->parent;
+ }
+
+ return nir_cf_node_as_loop(node);
+ }
+
+ /*
+ * update the CFG after a jump instruction has been added to the end of a block
+ */
+
+ void
+ nir_handle_add_jump(nir_block *block)
+ {
+ nir_instr *instr = nir_block_last_instr(block);
+ nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+
+ unlink_block_successors(block);
+
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_metadata_preserve(impl, nir_metadata_none);
+
+ if (jump_instr->type == nir_jump_break ||
+ jump_instr->type == nir_jump_continue) {
+ nir_loop *loop = nearest_loop(&block->cf_node);
+
+ if (jump_instr->type == nir_jump_continue) {
+ nir_cf_node *first_node = nir_loop_first_cf_node(loop);
+ assert(first_node->type == nir_cf_node_block);
+ nir_block *first_block = nir_cf_node_as_block(first_node);
+ link_blocks(block, first_block, NULL);
+ } else {
+ nir_cf_node *after = nir_cf_node_next(&loop->cf_node);
+ assert(after->type == nir_cf_node_block);
+ nir_block *after_block = nir_cf_node_as_block(after);
+ link_blocks(block, after_block, NULL);
+
+ /* If we inserted a fake link, remove it */
+ nir_cf_node *last = nir_loop_last_cf_node(loop);
+ assert(last->type == nir_cf_node_block);
+ nir_block *last_block = nir_cf_node_as_block(last);
+ if (last_block->successors[1] != NULL)
+ unlink_blocks(last_block, after_block);
+ }
+ } else {
+ assert(jump_instr->type == nir_jump_return);
+ link_blocks(block, impl->end_block, NULL);
+ }
+ }
+
+ static void
+ remove_phi_src(nir_block *block, nir_block *pred)
+ {
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src_safe(phi, src) {
+ if (src->pred == pred) {
+ list_del(&src->src.use_link);
+ exec_node_remove(&src->node);
+ }
+ }
+ }
+ }
+
+ /* Removes the successor of a block with a jump, and inserts a fake edge for
+ * infinite loops. Note that the jump to be eliminated may be free-floating.
+ */
+
+ static void
+ unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors)
+ {
+ nir_block *next = block->successors[0];
+
+ if (block->successors[0])
+ remove_phi_src(block->successors[0], block);
+ if (block->successors[1])
+ remove_phi_src(block->successors[1], block);
+
+ unlink_block_successors(block);
+ if (add_normal_successors)
+ block_add_normal_succs(block);
+
+ /* If we've just removed a break, and the block we were jumping to (after
+ * the loop) now has zero predecessors, we've created a new infinite loop.
+ *
+ * NIR doesn't allow blocks (other than the start block) to have zero
+ * predecessors. In particular, dominance assumes all blocks are reachable.
+ * So, we insert a "fake link" by making successors[1] point after the loop.
+ *
+ * Note that we have to do this after unlinking/recreating the block's
+ * successors. If we removed a "break" at the end of the loop, then
+ * block == last_block, so block->successors[0] would already be "next",
+ * and adding a fake link would create two identical successors. Doing
+ * this afterward works, as we'll have changed block->successors[0] to
+ * be the top of the loop.
+ */
+ if (type == nir_jump_break && next->predecessors->entries == 0) {
+ nir_loop *loop =
+ nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
+
+ /* insert fake link */
+ nir_cf_node *last = nir_loop_last_cf_node(loop);
+ assert(last->type == nir_cf_node_block);
+ nir_block *last_block = nir_cf_node_as_block(last);
+
+ last_block->successors[1] = next;
+ block_add_pred(next, last_block);
+ }
+ }
+
+ void
+ nir_handle_remove_jump(nir_block *block, nir_jump_type type)
+ {
+ unlink_jump(block, type, true);
+
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_metadata_preserve(impl, nir_metadata_none);
+ }
+
+ static void
+ update_if_uses(nir_cf_node *node)
+ {
+ if (node->type != nir_cf_node_if)
+ return;
+
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+
+ if_stmt->condition.parent_if = if_stmt;
+ if (if_stmt->condition.is_ssa) {
+ list_addtail(&if_stmt->condition.use_link,
+ &if_stmt->condition.ssa->if_uses);
+ } else {
+ list_addtail(&if_stmt->condition.use_link,
+ &if_stmt->condition.reg.reg->if_uses);
+ }
+ }
+
+ /**
+ * Stitch two basic blocks together into one. The aggregate must have the same
+ * predecessors as the first and the same successors as the second.
+ */
+
+ static void
+ stitch_blocks(nir_block *before, nir_block *after)
+ {
+ /*
+ * We move after into before, so we have to deal with up to 2 successors vs.
+ * possibly a large number of predecessors.
+ *
+ * TODO: special case when before is empty and after isn't?
+ */
+
+ if (block_ends_in_jump(before)) {
+ assert(exec_list_is_empty(&after->instr_list));
+ if (after->successors[0])
+ remove_phi_src(after->successors[0], after);
+ if (after->successors[1])
+ remove_phi_src(after->successors[1], after);
+ unlink_block_successors(after);
+ exec_node_remove(&after->cf_node.node);
+ } else {
+ move_successors(after, before);
+
+ foreach_list_typed(nir_instr, instr, node, &after->instr_list) {
+ instr->block = before;
+ }
+
+ exec_list_append(&before->instr_list, &after->instr_list);
+ exec_node_remove(&after->cf_node.node);
+ }
+ }
+
+ void
+ nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node)
+ {
+ nir_block *before, *after;
+
+ split_block_cursor(cursor, &before, &after);
+
+ if (node->type == nir_cf_node_block) {
+ nir_block *block = nir_cf_node_as_block(node);
+ exec_node_insert_after(&before->cf_node.node, &block->cf_node.node);
+ block->cf_node.parent = before->cf_node.parent;
+ /* stitch_blocks() assumes that any block that ends with a jump has
+ * already been setup with the correct successors, so we need to set
+ * up jumps here as the block is being inserted.
+ */
+ if (block_ends_in_jump(block))
+ nir_handle_add_jump(block);
+
+ stitch_blocks(block, after);
+ stitch_blocks(before, block);
+ } else {
+ update_if_uses(node);
+ insert_non_block(before, node, after);
+ }
+ }
+
+ static bool
+ replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
+ {
+ nir_function_impl *impl = void_impl;
+ void *mem_ctx = ralloc_parent(impl);
+
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(mem_ctx, def->num_components);
+ nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
+ nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
+ return true;
+ }
+
+ static void
+ cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl)
+ {
+ switch (node->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(node);
+ /* We need to walk the instructions and clean up defs/uses */
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_jump) {
+ nir_jump_type jump_type = nir_instr_as_jump(instr)->type;
+ unlink_jump(block, jump_type, false);
+ } else {
+ nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl);
+ nir_instr_remove(instr);
+ }
+ }
+ break;
+ }
+
+ case nir_cf_node_if: {
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+ foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
+ cleanup_cf_node(child, impl);
+ foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
+ cleanup_cf_node(child, impl);
+
+ list_del(&if_stmt->condition.use_link);
+ break;
+ }
+
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(node);
+ foreach_list_typed(nir_cf_node, child, node, &loop->body)
+ cleanup_cf_node(child, impl);
+ break;
+ }
+ case nir_cf_node_function: {
+ nir_function_impl *impl = nir_cf_node_as_function(node);
+ foreach_list_typed(nir_cf_node, child, node, &impl->body)
+ cleanup_cf_node(child, impl);
+ break;
+ }
+ default:
+ unreachable("Invalid CF node type");
+ }
+ }
+
+ void
+ nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end)
+ {
+ nir_block *block_begin, *block_end, *block_before, *block_after;
+
++ if (nir_cursors_equal(begin, end)) {
++ exec_list_make_empty(&extracted->list);
++ extracted->impl = NULL; /* we shouldn't need this */
++ return;
++ }
++
+ /* In the case where begin points to an instruction in some basic block and
+ * end points to the end of the same basic block, we rely on the fact that
+ * splitting on an instruction moves earlier instructions into a new basic
+ * block. If the later instructions were moved instead, then the end cursor
+ * would be pointing to the same place that begin used to point to, which
+ * is obviously not what we want.
+ */
+ split_block_cursor(begin, &block_before, &block_begin);
+ split_block_cursor(end, &block_end, &block_after);
+
+ extracted->impl = nir_cf_node_get_function(&block_begin->cf_node);
+ exec_list_make_empty(&extracted->list);
+
+ /* Dominance and other block-related information is toast. */
+ nir_metadata_preserve(extracted->impl, nir_metadata_none);
+
+ nir_cf_node *cf_node = &block_begin->cf_node;
+ nir_cf_node *cf_node_end = &block_end->cf_node;
+ while (true) {
+ nir_cf_node *next = nir_cf_node_next(cf_node);
+
+ exec_node_remove(&cf_node->node);
+ cf_node->parent = NULL;
+ exec_list_push_tail(&extracted->list, &cf_node->node);
+
+ if (cf_node == cf_node_end)
+ break;
+
+ cf_node = next;
+ }
+
+ stitch_blocks(block_before, block_after);
+ }
+
+ void
+ nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor)
+ {
+ nir_block *before, *after;
+
++ if (exec_list_is_empty(&cf_list->list))
++ return;
++
+ split_block_cursor(cursor, &before, &after);
+
+ foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) {
+ exec_node_remove(&node->node);
+ node->parent = before->cf_node.parent;
+ exec_node_insert_node_before(&after->cf_node.node, &node->node);
+ }
+
+ stitch_blocks(before,
+ nir_cf_node_as_block(nir_cf_node_next(&before->cf_node)));
+ stitch_blocks(nir_cf_node_as_block(nir_cf_node_prev(&after->cf_node)),
+ after);
+ }
+
+ void
+ nir_cf_delete(nir_cf_list *cf_list)
+ {
+ foreach_list_typed(nir_cf_node, node, node, &cf_list->list) {
+ cleanup_cf_node(node, cf_list->impl);
+ }
+ }
--- /dev/null
- assert(new_idom);
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir.h"
+
+ /*
+ * Implements the algorithms for computing the dominance tree and the
+ * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper,
+ * Harvey, and Kennedy.
+ */
+
+ typedef struct {
+ nir_function_impl *impl;
+ bool progress;
+ } dom_state;
+
+ static bool
+ init_block_cb(nir_block *block, void *_state)
+ {
+ dom_state *state = (dom_state *) _state;
+ if (block == nir_start_block(state->impl))
+ block->imm_dom = block;
+ else
+ block->imm_dom = NULL;
+ block->num_dom_children = 0;
+
+ struct set_entry *entry;
+ set_foreach(block->dom_frontier, entry) {
+ _mesa_set_remove(block->dom_frontier, entry);
+ }
+
+ return true;
+ }
+
+ static nir_block *
+ intersect(nir_block *b1, nir_block *b2)
+ {
+ while (b1 != b2) {
+ /*
+ * Note, the comparisons here are the opposite of what the paper says
+ * because we index blocks from beginning -> end (i.e. reverse
+ * post-order) instead of post-order like they assume.
+ */
+ while (b1->index > b2->index)
+ b1 = b1->imm_dom;
+ while (b2->index > b1->index)
+ b2 = b2->imm_dom;
+ }
+
+ return b1;
+ }
+
+ static bool
+ calc_dominance_cb(nir_block *block, void *_state)
+ {
+ dom_state *state = (dom_state *) _state;
+ if (block == nir_start_block(state->impl))
+ return true;
+
+ nir_block *new_idom = NULL;
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+
+ if (pred->imm_dom) {
+ if (new_idom)
+ new_idom = intersect(pred, new_idom);
+ else
+ new_idom = pred;
+ }
+ }
+
+ if (block->imm_dom != new_idom) {
+ block->imm_dom = new_idom;
+ state->progress = true;
+ }
+
+ return true;
+ }
+
+ static bool
+ calc_dom_frontier_cb(nir_block *block, void *state)
+ {
+ (void) state;
+
+ if (block->predecessors->entries > 1) {
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *runner = (nir_block *) entry->key;
++
++ /* Skip unreachable predecessors */
++ if (runner->imm_dom == NULL)
++ continue;
++
+ while (runner != block->imm_dom) {
+ _mesa_set_add(runner->dom_frontier, block);
+ runner = runner->imm_dom;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ /*
+ * Compute each node's children in the dominance tree from the immediate
+ * dominator information. We do this in three stages:
+ *
+ * 1. Calculate the number of children each node has
+ * 2. Allocate arrays, setting the number of children to 0 again
+ * 3. For each node, add itself to its parent's list of children, using
+ * num_dom_children as an index - at the end of this step, num_dom_children
+ * for each node will be the same as it was at the end of step #1.
+ */
+
+ static bool
+ block_count_children(nir_block *block, void *state)
+ {
+ (void) state;
+
+ if (block->imm_dom)
+ block->imm_dom->num_dom_children++;
+
+ return true;
+ }
+
+ static bool
+ block_alloc_children(nir_block *block, void *state)
+ {
+ void *mem_ctx = state;
+
+ block->dom_children = ralloc_array(mem_ctx, nir_block *,
+ block->num_dom_children);
+ block->num_dom_children = 0;
+
+ return true;
+ }
+
+ static bool
+ block_add_child(nir_block *block, void *state)
+ {
+ (void) state;
+
+ if (block->imm_dom)
+ block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block;
+
+ return true;
+ }
+
+ static void
+ calc_dom_children(nir_function_impl* impl)
+ {
+ void *mem_ctx = ralloc_parent(impl);
+
+ nir_foreach_block(impl, block_count_children, NULL);
+ nir_foreach_block(impl, block_alloc_children, mem_ctx);
+ nir_foreach_block(impl, block_add_child, NULL);
+ }
+
+ static void
+ calc_dfs_indicies(nir_block *block, unsigned *index)
+ {
+ block->dom_pre_index = (*index)++;
+
+ for (unsigned i = 0; i < block->num_dom_children; i++)
+ calc_dfs_indicies(block->dom_children[i], index);
+
+ block->dom_post_index = (*index)++;
+ }
+
+ void
+ nir_calc_dominance_impl(nir_function_impl *impl)
+ {
+ if (impl->valid_metadata & nir_metadata_dominance)
+ return;
+
+ nir_metadata_require(impl, nir_metadata_block_index);
+
+ dom_state state;
+ state.impl = impl;
+ state.progress = true;
+
+ nir_foreach_block(impl, init_block_cb, &state);
+
+ while (state.progress) {
+ state.progress = false;
+ nir_foreach_block(impl, calc_dominance_cb, &state);
+ }
+
+ nir_foreach_block(impl, calc_dom_frontier_cb, &state);
+
+ nir_block *start_block = nir_start_block(impl);
+ start_block->imm_dom = NULL;
+
+ calc_dom_children(impl);
+
+ unsigned dfs_index = 0;
+ calc_dfs_indicies(start_block, &dfs_index);
+ }
+
+ void
+ nir_calc_dominance(nir_shader *shader)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_calc_dominance_impl(function->impl);
+ }
+ }
+
+ /**
+ * Computes the least common anscestor of two blocks. If one of the blocks
+ * is null, the other block is returned.
+ */
+ nir_block *
+ nir_dominance_lca(nir_block *b1, nir_block *b2)
+ {
+ if (b1 == NULL)
+ return b2;
+
+ if (b2 == NULL)
+ return b1;
+
+ assert(nir_cf_node_get_function(&b1->cf_node) ==
+ nir_cf_node_get_function(&b2->cf_node));
+
+ assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
+ nir_metadata_dominance);
+
+ return intersect(b1, b2);
+ }
+
+ /**
+ * Returns true if parent dominates child
+ */
+ bool
+ nir_block_dominates(nir_block *parent, nir_block *child)
+ {
+ assert(nir_cf_node_get_function(&parent->cf_node) ==
+ nir_cf_node_get_function(&child->cf_node));
+
+ assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata &
+ nir_metadata_dominance);
+
+ return child->dom_pre_index >= parent->dom_pre_index &&
+ child->dom_post_index <= parent->dom_post_index;
+ }
+
+ static bool
+ dump_block_dom(nir_block *block, void *state)
+ {
+ FILE *fp = state;
+ if (block->imm_dom)
+ fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index);
+ return true;
+ }
+
+ void
+ nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp)
+ {
+ fprintf(fp, "digraph doms_%s {\n", impl->function->name);
+ nir_foreach_block(impl, dump_block_dom, fp);
+ fprintf(fp, "}\n\n");
+ }
+
+ void
+ nir_dump_dom_tree(nir_shader *shader, FILE *fp)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_dump_dom_tree_impl(function->impl, fp);
+ }
+ }
+
+ static bool
+ dump_block_dom_frontier(nir_block *block, void *state)
+ {
+ FILE *fp = state;
+
+ fprintf(fp, "DF(%u) = {", block->index);
+ struct set_entry *entry;
+ set_foreach(block->dom_frontier, entry) {
+ nir_block *df = (nir_block *) entry->key;
+ fprintf(fp, "%u, ", df->index);
+ }
+ fprintf(fp, "}\n");
+ return true;
+ }
+
+ void
+ nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp)
+ {
+ nir_foreach_block(impl, dump_block_dom_frontier, fp);
+ }
+
+ void
+ nir_dump_dom_frontier(nir_shader *shader, FILE *fp)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_dump_dom_frontier_impl(function->impl, fp);
+ }
+ }
+
+ static bool
+ dump_block_succs(nir_block *block, void *state)
+ {
+ FILE *fp = state;
+ if (block->successors[0])
+ fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index);
+ if (block->successors[1])
+ fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index);
+ return true;
+ }
+
+ void
+ nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp)
+ {
+ fprintf(fp, "digraph cfg_%s {\n", impl->function->name);
+ nir_foreach_block(impl, dump_block_succs, fp);
+ fprintf(fp, "}\n\n");
+ }
+
+ void
+ nir_dump_cfg(nir_shader *shader, FILE *fp)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_dump_cfg_impl(function->impl, fp);
+ }
+ }
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++
++static void
++gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader)
++{
++ switch (instr->intrinsic) {
++ case nir_intrinsic_discard:
++ assert(shader->stage == MESA_SHADER_FRAGMENT);
++ shader->info.fs.uses_discard = true;
++ break;
++
++ case nir_intrinsic_load_front_face:
++ case nir_intrinsic_load_vertex_id:
++ case nir_intrinsic_load_vertex_id_zero_base:
++ case nir_intrinsic_load_base_vertex:
++ case nir_intrinsic_load_instance_id:
++ case nir_intrinsic_load_sample_id:
++ case nir_intrinsic_load_sample_pos:
++ case nir_intrinsic_load_sample_mask_in:
++ case nir_intrinsic_load_primitive_id:
++ case nir_intrinsic_load_invocation_id:
++ case nir_intrinsic_load_local_invocation_id:
++ case nir_intrinsic_load_work_group_id:
++ case nir_intrinsic_load_num_work_groups:
++ shader->info.system_values_read |=
++ (1 << nir_system_value_from_intrinsic(instr->intrinsic));
++ break;
++
++ case nir_intrinsic_end_primitive:
++ case nir_intrinsic_end_primitive_with_counter:
++ assert(shader->stage == MESA_SHADER_GEOMETRY);
++ shader->info.gs.uses_end_primitive = 1;
++ break;
++
++ default:
++ break;
++ }
++}
++
++static void
++gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
++{
++ if (instr->op == nir_texop_tg4)
++ shader->info.uses_texture_gather = true;
++}
++
++static bool
++gather_info_block(nir_block *block, void *shader)
++{
++ nir_foreach_instr(block, instr) {
++ switch (instr->type) {
++ case nir_instr_type_intrinsic:
++ gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader);
++ break;
++ case nir_instr_type_tex:
++ gather_tex_info(nir_instr_as_tex(instr), shader);
++ break;
++ case nir_instr_type_call:
++ assert(!"nir_shader_gather_info only works if functions are inlined");
++ break;
++ default:
++ break;
++ }
++ }
++
++ return true;
++}
++
++void
++nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
++{
++ shader->info.inputs_read = 0;
++ foreach_list_typed(nir_variable, var, node, &shader->inputs)
++ shader->info.inputs_read |= nir_variable_get_io_mask(var, shader->stage);
++
++ /* TODO: Some day we may need to add stream support to NIR */
++ shader->info.outputs_written = 0;
++ foreach_list_typed(nir_variable, var, node, &shader->outputs)
++ shader->info.outputs_written |= nir_variable_get_io_mask(var, shader->stage);
++
++ shader->info.system_values_read = 0;
++ foreach_list_typed(nir_variable, var, node, &shader->system_values)
++ shader->info.system_values_read |= nir_variable_get_io_mask(var, shader->stage);
++
++ nir_foreach_block(entrypoint, gather_info_block, shader);
++}
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_builder.h"
++#include "nir_control_flow.h"
++
++struct inline_functions_state {
++ struct set *inlined;
++ nir_builder builder;
++ bool progress;
++};
++
++static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);
++
++static bool
++inline_functions_block(nir_block *block, void *void_state)
++{
++ struct inline_functions_state *state = void_state;
++
++ nir_builder *b = &state->builder;
++
++ /* This is tricky. We're iterating over instructions in a block but, as
++ * we go, the block and its instruction list are being split into
++ * pieces. However, this *should* be safe since foreach_safe always
++ * stashes the next thing in the iteration. That next thing will
++ * properly get moved to the next block when it gets split, and we
++ * continue iterating there.
++ */
++ nir_foreach_instr_safe(block, instr) {
++ if (instr->type != nir_instr_type_call)
++ continue;
++
++ state->progress = true;
++
++ nir_call_instr *call = nir_instr_as_call(instr);
++ assert(call->callee->impl);
++
++ inline_function_impl(call->callee->impl, state->inlined);
++
++ nir_function_impl *callee_copy =
++ nir_function_impl_clone(call->callee->impl);
++
++ exec_list_append(&b->impl->locals, &callee_copy->locals);
++ exec_list_append(&b->impl->registers, &callee_copy->registers);
++
++ b->cursor = nir_before_instr(&call->instr);
++
++ /* Add copies of all in parameters */
++ assert(call->num_params == callee_copy->num_params);
++ for (unsigned i = 0; i < callee_copy->num_params; i++) {
++ /* Only in or inout parameters */
++ if (call->callee->params[i].param_type == nir_parameter_out)
++ continue;
++
++ nir_copy_deref_var(b, nir_deref_var_create(b->shader,
++ callee_copy->params[i]),
++ call->params[i]);
++ }
++
++ /* Pluck the body out of the function and place it here */
++ nir_cf_list body;
++ nir_cf_list_extract(&body, &callee_copy->body);
++ nir_cf_reinsert(&body, b->cursor);
++
++ b->cursor = nir_before_instr(&call->instr);
++
++ /* Add copies of all out parameters and the return */
++ assert(call->num_params == callee_copy->num_params);
++ for (unsigned i = 0; i < callee_copy->num_params; i++) {
++ /* Only out or inout parameters */
++ if (call->callee->params[i].param_type == nir_parameter_in)
++ continue;
++
++ nir_copy_deref_var(b, call->params[i],
++ nir_deref_var_create(b->shader,
++ callee_copy->params[i]));
++ }
++ if (!glsl_type_is_void(call->callee->return_type)) {
++ nir_copy_deref_var(b, call->return_deref,
++ nir_deref_var_create(b->shader,
++ callee_copy->return_var));
++ }
++
++ nir_instr_remove(&call->instr);
++ }
++
++ return true;
++}
++
++static bool
++inline_function_impl(nir_function_impl *impl, struct set *inlined)
++{
++ if (_mesa_set_search(inlined, impl))
++ return false; /* Already inlined */
++
++ struct inline_functions_state state;
++
++ state.inlined = inlined;
++ state.progress = false;
++ nir_builder_init(&state.builder, impl);
++
++ nir_foreach_block(impl, inline_functions_block, &state);
++
++ if (state.progress) {
++ /* SSA and register indices are completely messed up now */
++ nir_index_ssa_defs(impl);
++ nir_index_local_regs(impl);
++
++ nir_metadata_preserve(impl, nir_metadata_none);
++ }
++
++ _mesa_set_add(inlined, impl);
++
++ return state.progress;
++}
++
++bool
++nir_inline_functions(nir_shader *shader)
++{
++ struct set *inlined = _mesa_set_create(NULL, _mesa_hash_pointer,
++ _mesa_key_pointer_equal);
++ bool progress = false;
++
++ nir_foreach_function(shader, function) {
++ if (function->impl)
++ progress = inline_function_impl(function->impl, inlined) || progress;
++ }
++
++ _mesa_set_destroy(inlined, NULL);
++
++ return progress;
++}
--- /dev/null
- hash = HASH(hash, instr->sampler_array_size);
+ /*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+ #include "nir_instr_set.h"
+ #include "nir_vla.h"
+
+ #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
+
+ static uint32_t
+ hash_src(uint32_t hash, const nir_src *src)
+ {
+ assert(src->is_ssa);
+ hash = HASH(hash, src->ssa);
+ return hash;
+ }
+
+ static uint32_t
+ hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components)
+ {
+ hash = HASH(hash, src->abs);
+ hash = HASH(hash, src->negate);
+
+ for (unsigned i = 0; i < num_components; i++)
+ hash = HASH(hash, src->swizzle[i]);
+
+ hash = hash_src(hash, &src->src);
+ return hash;
+ }
+
+ static uint32_t
+ hash_alu(uint32_t hash, const nir_alu_instr *instr)
+ {
+ hash = HASH(hash, instr->op);
+ hash = HASH(hash, instr->dest.dest.ssa.num_components);
+
+ if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[instr->op].num_inputs == 2);
+ uint32_t hash0 = hash_alu_src(hash, &instr->src[0],
+ nir_ssa_alu_instr_src_components(instr, 0));
+ uint32_t hash1 = hash_alu_src(hash, &instr->src[1],
+ nir_ssa_alu_instr_src_components(instr, 1));
+ /* For commutative operations, we need some commutative way of
+ * combining the hashes. One option would be to XOR them but that
+ * means that anything with two identical sources will hash to 0 and
+ * that's common enough we probably don't want the guaranteed
+ * collision. Either addition or multiplication will also work.
+ */
+ hash = hash0 * hash1;
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ hash = hash_alu_src(hash, &instr->src[i],
+ nir_ssa_alu_instr_src_components(instr, i));
+ }
+ }
+
+ return hash;
+ }
+
+ static uint32_t
+ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
+ {
+ hash = HASH(hash, instr->def.num_components);
+
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
+ instr->def.num_components
+ * sizeof(instr->value.f[0]));
+
+ return hash;
+ }
+
+ static int
+ cmp_phi_src(const void *data1, const void *data2)
+ {
+ nir_phi_src *src1 = *(nir_phi_src **)data1;
+ nir_phi_src *src2 = *(nir_phi_src **)data2;
+ return src1->pred - src2->pred;
+ }
+
+ static uint32_t
+ hash_phi(uint32_t hash, const nir_phi_instr *instr)
+ {
+ hash = HASH(hash, instr->instr.block);
+
+ /* sort sources by predecessor, since the order shouldn't matter */
+ unsigned num_preds = instr->instr.block->predecessors->entries;
+ NIR_VLA(nir_phi_src *, srcs, num_preds);
+ unsigned i = 0;
+ nir_foreach_phi_src(instr, src) {
+ srcs[i++] = src;
+ }
+
+ qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src);
+
+ for (i = 0; i < num_preds; i++) {
+ hash = hash_src(hash, &srcs[i]->src);
+ hash = HASH(hash, srcs[i]->pred);
+ }
+
+ return hash;
+ }
+
+ static uint32_t
+ hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr)
+ {
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+ hash = HASH(hash, instr->intrinsic);
+
+ if (info->has_dest)
+ hash = HASH(hash, instr->dest.ssa.num_components);
+
+ assert(info->num_variables == 0);
+
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index,
+ info->num_indices
+ * sizeof(instr->const_index[0]));
+ return hash;
+ }
+
+ static uint32_t
+ hash_tex(uint32_t hash, const nir_tex_instr *instr)
+ {
+ hash = HASH(hash, instr->op);
+ hash = HASH(hash, instr->num_srcs);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ hash = HASH(hash, instr->src[i].src_type);
+ hash = hash_src(hash, &instr->src[i].src);
+ }
+
+ hash = HASH(hash, instr->coord_components);
+ hash = HASH(hash, instr->sampler_dim);
+ hash = HASH(hash, instr->is_array);
+ hash = HASH(hash, instr->is_shadow);
+ hash = HASH(hash, instr->is_new_style_shadow);
+ hash = HASH(hash, instr->const_offset);
+ unsigned component = instr->component;
+ hash = HASH(hash, component);
++ hash = HASH(hash, instr->texture_index);
++ hash = HASH(hash, instr->texture_array_size);
+ hash = HASH(hash, instr->sampler_index);
- tex1->sampler_index != tex2->sampler_index ||
- tex1->sampler_array_size != tex2->sampler_array_size) {
+
+ assert(!instr->sampler);
+
+ return hash;
+ }
+
+ /* Computes a hash of an instruction for use in a hash table. Note that this
+ * will only work for instructions where instr_can_rewrite() returns true, and
+ * it should return identical hashes for two instructions that are the same
+ * according nir_instrs_equal().
+ */
+
+ static uint32_t
+ hash_instr(const void *data)
+ {
+ const nir_instr *instr = data;
+ uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ hash = hash_alu(hash, nir_instr_as_alu(instr));
+ break;
+ case nir_instr_type_load_const:
+ hash = hash_load_const(hash, nir_instr_as_load_const(instr));
+ break;
+ case nir_instr_type_phi:
+ hash = hash_phi(hash, nir_instr_as_phi(instr));
+ break;
+ case nir_instr_type_intrinsic:
+ hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_tex:
+ hash = hash_tex(hash, nir_instr_as_tex(instr));
+ break;
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return hash;
+ }
+
+ bool
+ nir_srcs_equal(nir_src src1, nir_src src2)
+ {
+ if (src1.is_ssa) {
+ if (src2.is_ssa) {
+ return src1.ssa == src2.ssa;
+ } else {
+ return false;
+ }
+ } else {
+ if (src2.is_ssa) {
+ return false;
+ } else {
+ if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL))
+ return false;
+
+ if (src1.reg.indirect) {
+ if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect))
+ return false;
+ }
+
+ return src1.reg.reg == src2.reg.reg &&
+ src1.reg.base_offset == src2.reg.base_offset;
+ }
+ }
+ }
+
+ static bool
+ nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
+ unsigned src1, unsigned src2)
+ {
+ if (alu1->src[src1].abs != alu2->src[src2].abs ||
+ alu1->src[src1].negate != alu2->src[src2].negate)
+ return false;
+
+ for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+ if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
+ return false;
+ }
+
+ return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
+ }
+
+ /* Returns "true" if two instructions are equal. Note that this will only
+ * work for the subset of instructions defined by instr_can_rewrite(). Also,
+ * it should only return "true" for instructions that hash_instr() will return
+ * the same hash for (ignoring collisions, of course).
+ */
+
+ static bool
+ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
+ {
+ if (instr1->type != instr2->type)
+ return false;
+
+ switch (instr1->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
+ nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
+
+ if (alu1->op != alu2->op)
+ return false;
+
+ /* TODO: We can probably acutally do something more inteligent such
+ * as allowing different numbers and taking a maximum or something
+ * here */
+ if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
+ return false;
+
+ if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[alu1->op].num_inputs == 2);
+ return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+ (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 0));
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+ if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+ return false;
+ }
+ }
+ return true;
+ }
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex1 = nir_instr_as_tex(instr1);
+ nir_tex_instr *tex2 = nir_instr_as_tex(instr2);
+
+ if (tex1->op != tex2->op)
+ return false;
+
+ if (tex1->num_srcs != tex2->num_srcs)
+ return false;
+ for (unsigned i = 0; i < tex1->num_srcs; i++) {
+ if (tex1->src[i].src_type != tex2->src[i].src_type ||
+ !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) {
+ return false;
+ }
+ }
+
+ if (tex1->coord_components != tex2->coord_components ||
+ tex1->sampler_dim != tex2->sampler_dim ||
+ tex1->is_array != tex2->is_array ||
+ tex1->is_shadow != tex2->is_shadow ||
+ tex1->is_new_style_shadow != tex2->is_new_style_shadow ||
+ memcmp(tex1->const_offset, tex2->const_offset,
+ sizeof(tex1->const_offset)) != 0 ||
+ tex1->component != tex2->component ||
- assert(!tex1->sampler && !tex2->sampler);
++ tex1->texture_index != tex2->texture_index ||
++ tex1->texture_array_size != tex2->texture_array_size ||
++ tex1->sampler_index != tex2->sampler_index) {
+ return false;
+ }
+
+ /* Don't support un-lowered sampler derefs currently. */
- if (tex->sampler)
++ assert(!tex1->texture && !tex1->sampler &&
++ !tex2->texture && !tex2->sampler);
+
+ return true;
+ }
+ case nir_instr_type_load_const: {
+ nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
+ nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
+
+ if (load1->def.num_components != load2->def.num_components)
+ return false;
+
+ return memcmp(load1->value.f, load2->value.f,
+ load1->def.num_components * sizeof(*load2->value.f)) == 0;
+ }
+ case nir_instr_type_phi: {
+ nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
+ nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
+
+ if (phi1->instr.block != phi2->instr.block)
+ return false;
+
+ nir_foreach_phi_src(phi1, src1) {
+ nir_foreach_phi_src(phi2, src2) {
+ if (src1->pred == src2->pred) {
+ if (!nir_srcs_equal(src1->src, src2->src))
+ return false;
+
+ break;
+ }
+ }
+ }
+
+ return true;
+ }
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
+ nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[intrinsic1->intrinsic];
+
+ if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
+ intrinsic1->num_components != intrinsic2->num_components)
+ return false;
+
+ if (info->has_dest && intrinsic1->dest.ssa.num_components !=
+ intrinsic2->dest.ssa.num_components)
+ return false;
+
+ for (unsigned i = 0; i < info->num_srcs; i++) {
+ if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
+ return false;
+ }
+
+ assert(info->num_variables == 0);
+
+ for (unsigned i = 0; i < info->num_indices; i++) {
+ if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
+ return false;
+ }
+
+ return true;
+ }
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_parallel_copy:
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return false;
+ }
+
+ static bool
+ src_is_ssa(nir_src *src, void *data)
+ {
+ (void) data;
+ return src->is_ssa;
+ }
+
+ static bool
+ dest_is_ssa(nir_dest *dest, void *data)
+ {
+ (void) data;
+ return dest->is_ssa;
+ }
+
+ /* This function determines if uses of an instruction can safely be rewritten
+ * to use another identical instruction instead. Note that this function must
+ * be kept in sync with hash_instr() and nir_instrs_equal() -- only
+ * instructions that pass this test will be handed on to those functions, and
+ * conversely they must handle everything that this function returns true for.
+ */
+
+ static bool
+ instr_can_rewrite(nir_instr *instr)
+ {
+ /* We only handle SSA. */
+ if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
+ !nir_foreach_src(instr, src_is_ssa, NULL))
+ return false;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ case nir_instr_type_load_const:
+ case nir_instr_type_phi:
+ return true;
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ /* Don't support un-lowered sampler derefs currently. */
++ if (tex->texture || tex->sampler)
+ return false;
+
+ return true;
+ }
+ case nir_instr_type_intrinsic: {
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+ return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+ (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
+ info->num_variables == 0; /* not implemented yet */
+ }
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ return false;
+ case nir_instr_type_parallel_copy:
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return false;
+ }
+
+ static nir_ssa_def *
+ nir_instr_get_dest_ssa_def(nir_instr *instr)
+ {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
+ return &nir_instr_as_alu(instr)->dest.dest.ssa;
+ case nir_instr_type_load_const:
+ return &nir_instr_as_load_const(instr)->def;
+ case nir_instr_type_phi:
+ assert(nir_instr_as_phi(instr)->dest.is_ssa);
+ return &nir_instr_as_phi(instr)->dest.ssa;
+ case nir_instr_type_intrinsic:
+ assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
+ return &nir_instr_as_intrinsic(instr)->dest.ssa;
+ case nir_instr_type_tex:
+ assert(nir_instr_as_tex(instr)->dest.is_ssa);
+ return &nir_instr_as_tex(instr)->dest.ssa;
+ default:
+ unreachable("We never ask for any of these");
+ }
+ }
+
+ static bool
+ cmp_func(const void *data1, const void *data2)
+ {
+ return nir_instrs_equal(data1, data2);
+ }
+
+ struct set *
+ nir_instr_set_create(void *mem_ctx)
+ {
+ return _mesa_set_create(mem_ctx, hash_instr, cmp_func);
+ }
+
+ void
+ nir_instr_set_destroy(struct set *instr_set)
+ {
+ _mesa_set_destroy(instr_set, NULL);
+ }
+
+ bool
+ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
+ {
+ if (!instr_can_rewrite(instr))
+ return false;
+
+ struct set_entry *entry = _mesa_set_search(instr_set, instr);
+ if (entry) {
+ nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
+ nir_ssa_def *new_def =
+ nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
+ nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
+ return true;
+ }
+
+ _mesa_set_add(instr_set, instr);
+ return false;
+ }
+
+ void
+ nir_instr_set_remove(struct set *instr_set, nir_instr *instr)
+ {
+ if (!instr_can_rewrite(instr))
+ return;
+
+ struct set_entry *entry = _mesa_set_search(instr_set, instr);
+ if (entry)
+ _mesa_set_remove(instr_set, entry);
+ }
+
--- /dev/null
-/* src[] = { offset }. const_index[] = { base } */
-LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ /**
+ * This header file defines all the available intrinsics in one place. It
+ * expands to a list of macros of the form:
+ *
+ * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
+ * num_variables, num_indices, flags)
+ *
+ * Which should correspond one-to-one with the nir_intrinsic_info structure. It
+ * is included in both ir.h to create the nir_intrinsic enum (with members of
+ * the form nir_intrinsic_(name)) and and in opcodes.c to create
+ * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures
+ * for each intrinsic.
+ */
+
+ #define ARR(...) { __VA_ARGS__ }
+
+
+ INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+ INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
+ INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
+
+ /*
+ * Interpolation of input. The interp_var_at* intrinsics are similar to the
+ * load_var intrinsic acting an a shader input except that they interpolate
+ * the input differently. The at_sample and at_offset intrinsics take an
+ * aditional source that is a integer sample id or a vec2 position offset
+ * respectively.
+ */
+
+ INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+ /*
+ * Ask the driver for the size of a given buffer. It takes the buffer index
+ * as source.
+ */
+ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+ /*
+ * a barrier is an intrinsic with no inputs/outputs but which can't be moved
+ * around/optimized in general
+ */
+ #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
+
+ BARRIER(barrier)
+ BARRIER(discard)
+
+ /*
+ * Memory barrier with semantics analogous to the memoryBarrier() GLSL
+ * intrinsic.
+ */
+ BARRIER(memory_barrier)
+
+ /*
+ * Shader clock intrinsic with semantics analogous to the clock2x32ARB()
+ * GLSL intrinsic.
+ * The latter can be used as code motion barrier, which is currently not
+ * feasible with NIR.
+ */
+ INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+
+ /*
+ * Memory barrier with semantics analogous to the compute shader
+ * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
+ * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics.
+ */
+ BARRIER(group_memory_barrier)
+ BARRIER(memory_barrier_atomic_counter)
+ BARRIER(memory_barrier_buffer)
+ BARRIER(memory_barrier_image)
+ BARRIER(memory_barrier_shared)
+
+ /** A conditional discard, with a single boolean source. */
+ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
+
+ /**
+ * Basic Geometry Shader intrinsics.
+ *
+ * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single
+ * index, which is the stream ID to write to.
+ *
+ * end_primitive implements GLSL's EndPrimitive() built-in.
+ */
+ INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0)
+ INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+
+ /**
+ * Geometry Shader intrinsics with a vertex count.
+ *
+ * Alternatively, drivers may implement these intrinsics, and use
+ * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+ *
+ * These maintain a count of the number of vertices emitted, as an additional
+ * unsigned integer source.
+ */
+ INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+
+ /*
+ * Atomic counters
+ *
+ * The *_var variants take an atomic_uint nir_variable, while the other,
+ * lowered, variants take a constant buffer index and register offset.
+ */
+
+ #define ATOMIC(name, flags) \
+ INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \
+ INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
+
+ ATOMIC(inc, 0)
+ ATOMIC(dec, 0)
+ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
+
+ /*
+ * Image load, store and atomic intrinsics.
+ *
+ * All image intrinsics take an image target passed as a nir_variable. Image
+ * variables contain a number of memory and layout qualifiers that influence
+ * the semantics of the intrinsic.
+ *
+ * All image intrinsics take a four-coordinate vector and a sample index as
+ * first two sources, determining the location within the image that will be
+ * accessed by the intrinsic. Components not applicable to the image target
+ * in use are undefined. Image store takes an additional four-component
+ * argument with the value to be written, and image atomic operations take
+ * either one or two additional scalar arguments with the same meaning as in
+ * the ARB_shader_image_load_store specification.
+ */
+ INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE)
+ INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)
+ INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
++/*
++ * Vulkan descriptor set intrinsic
++ *
++ * The Vulkan API uses a different binding model from GL. In the Vulkan
++ * API, all external resources are represented by a tripple:
++ *
++ * (descriptor set, binding, array index)
++ *
++ * where the array index is the only thing allowed to be indirect. The
++ * vulkan_surface_index intrinsic takes the descriptor set and binding as
++ * its first two indices and the array index as its source. The third
++ * index is a nir_variable_mode in case that's useful to the backend.
++ *
++ * The intended usage is that the shader will call vulkan_surface_index to
++ * get an index and then pass that as the buffer index ubo/ssbo calls.
++ */
++INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3,
++ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
++
++/*
++ * variable atomic intrinsics
++ *
++ * All of these variable atomic memory operations read a value from memory,
++ * compute a new value using one of the operations below, write the new value
++ * to memory, and return the original value read.
++ *
++ * All operations take 1 source except CompSwap that takes 2. These sources
++ * represent:
++ *
++ * 0: The data parameter to the atomic function (i.e. the value to add
++ * in shared_atomic_add, etc).
++ * 1: For CompSwap only: the second data parameter.
++ *
++ * All operations take 1 variable deref.
++ */
++INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0)
++
+ /*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ * operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ * in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+ INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+
+ /*
+ * CS shared variable atomic intrinsics
+ *
+ * All of the shared variable atomic memory operations read a value from
+ * memory, compute a new value using one of the operations below, write the
+ * new value to memory, and return the original value read.
+ *
+ * All operations take 2 sources except CompSwap that takes 3. These
+ * sources represent:
+ *
+ * 0: The offset into the shared variable storage region that the atomic
+ * operation will operate on.
+ * 1: The data parameter to the atomic function (i.e. the value to add
+ * in shared_atomic_add, etc).
+ * 2: For CompSwap only: the second data parameter.
+ */
+ INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+
+ #define SYSTEM_VALUE(name, components, num_indices) \
+ INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+ SYSTEM_VALUE(front_face, 1, 0)
+ SYSTEM_VALUE(vertex_id, 1, 0)
+ SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+ SYSTEM_VALUE(base_vertex, 1, 0)
+ SYSTEM_VALUE(instance_id, 1, 0)
+ SYSTEM_VALUE(base_instance, 1, 0)
+ SYSTEM_VALUE(draw_id, 1, 0)
+ SYSTEM_VALUE(sample_id, 1, 0)
+ SYSTEM_VALUE(sample_pos, 2, 0)
+ SYSTEM_VALUE(sample_mask_in, 1, 0)
+ SYSTEM_VALUE(primitive_id, 1, 0)
+ SYSTEM_VALUE(invocation_id, 1, 0)
+ SYSTEM_VALUE(tess_coord, 3, 0)
+ SYSTEM_VALUE(tess_level_outer, 4, 0)
+ SYSTEM_VALUE(tess_level_inner, 2, 0)
+ SYSTEM_VALUE(patch_vertices_in, 1, 0)
+ SYSTEM_VALUE(local_invocation_id, 3, 0)
+ SYSTEM_VALUE(work_group_id, 3, 0)
+ SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+ SYSTEM_VALUE(num_work_groups, 3, 0)
+ SYSTEM_VALUE(helper_invocation, 1, 0)
+
+ /*
+ * Load operations pull data from some piece of GPU memory. All load
+ * operations operate in terms of offsets into some piece of theoretical
+ * memory. Loads from externally visible memory (UBO and SSBO) simply take a
+ * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.)
+ * take a base+offset pair where the base (const_index[0]) gives the location
+ * of the start of the variable being loaded and and the offset source is a
+ * offset into that variable.
+ *
++ * Uniform load operations have a second index that specifies the size of the
++ * variable being loaded. If const_index[1] == 0, then the size is unknown.
++ *
+ * Some load operations such as UBO/SSBO load and per_vertex loads take an
+ * additional source to specify which UBO/SSBO/vertex to load from.
+ *
+ * The exact address type depends on the lowering pass that generates the
+ * load/store intrinsics. Typically, this is vec4 units for things such as
+ * varying slots and float units for fragment shader inputs. UBO and SSBO
+ * offsets are always in bytes.
+ */
+
+ #define LOAD(name, srcs, indices, flags) \
+ INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
+
++/* src[] = { offset }. const_index[] = { base, size } */
++LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { buffer_index, offset }. No const_index */
+ LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { offset }. const_index[] = { base } */
+ LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { vertex, offset }. const_index[] = { base } */
+ LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { buffer_index, offset }. No const_index */
+ LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+ /* src[] = { offset }. const_index[] = { base } */
+ LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+ /* src[] = { vertex, offset }. const_index[] = { base } */
+ LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+ /* src[] = { offset }. const_index[] = { base } */
+ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
++/* src[] = { offset }. const_index[] = { base, size } */
++LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+ /*
+ * Stores work the same way as loads, except now the first source is the value
+ * to store and the second (and possibly third) source specify where to store
+ * the value. SSBO and shared memory stores also have a write mask as
+ * const_index[0].
+ */
+
+ #define STORE(name, srcs, indices, flags) \
+ INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
+
+ /* src[] = { value, offset }. const_index[] = { base, write_mask } */
+ STORE(output, 2, 2, 0)
+ /* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
+ STORE(per_vertex_output, 3, 2, 0)
+ /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
+ STORE(ssbo, 3, 1, 0)
+ /* src[] = { value, offset }. const_index[] = { base, write_mask } */
+ STORE(shared, 2, 2, 0)
+
+ LAST_INTRINSIC(store_shared)
--- /dev/null
- case nir_op_unpack_half_2x16:
- /* We could split this into unpack_half_2x16_split_[xy], but should
- * we?
- */
+ /*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+ #include "nir.h"
+ #include "nir_builder.h"
+
+ /** @file nir_lower_alu_to_scalar.c
+ *
+ * Replaces nir_alu_instr operations with more than one channel used in the
+ * arguments with individual per-channel operations.
+ */
+
+ static void
+ nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+ {
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ instr->dest.write_mask = (1 << num_components) - 1;
+ }
+
+ static void
+ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
+ nir_builder *builder)
+ {
+ unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
+
+ nir_ssa_def *last = NULL;
+ for (unsigned i = 0; i < num_components; i++) {
+ nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
+ nir_alu_ssa_dest_init(chan, 1);
+ nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
+ chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
+ if (nir_op_infos[chan_op].num_inputs > 1) {
+ assert(nir_op_infos[chan_op].num_inputs == 2);
+ nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
+ chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
+ }
+
+ nir_builder_instr_insert(builder, &chan->instr);
+
+ if (i == 0) {
+ last = &chan->dest.dest.ssa;
+ } else {
+ last = nir_build_alu(builder, merge_op,
+ last, &chan->dest.dest.ssa, NULL, NULL);
+ }
+ }
+
+ assert(instr->dest.write_mask == 1);
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last));
+ nir_instr_remove(&instr->instr);
+ }
+
+ static void
+ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
+ {
+ unsigned num_src = nir_op_infos[instr->op].num_inputs;
+ unsigned i, chan;
+
+ assert(instr->dest.dest.is_ssa);
+ assert(instr->dest.write_mask != 0);
+
+ b->cursor = nir_before_instr(&instr->instr);
+
+ #define LOWER_REDUCTION(name, chan, merge) \
+ case name##2: \
+ case name##3: \
+ case name##4: \
+ lower_reduction(instr, chan, merge, b); \
+ return;
+
+ switch (instr->op) {
+ case nir_op_vec4:
+ case nir_op_vec3:
+ case nir_op_vec2:
+ /* We don't need to scalarize these ops, they're the ones generated to
+ * group up outputs into a value that can be SSAed.
+ */
+ return;
+
++ case nir_op_pack_half_2x16:
++ if (!b->shader->options->lower_pack_half_2x16)
++ return;
++
++ nir_ssa_def *val =
++ nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa,
++ instr->src[0].swizzle[0]),
++ nir_channel(b, instr->src[0].src.ssa,
++ instr->src[0].swizzle[1]));
++
++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
++ nir_instr_remove(&instr->instr);
++ return;
++
+ case nir_op_unpack_unorm_4x8:
+ case nir_op_unpack_snorm_4x8:
+ case nir_op_unpack_unorm_2x16:
+ case nir_op_unpack_snorm_2x16:
+ /* There is no scalar version of these ops, unless we were to break it
+ * down to bitshifts and math (which is definitely not intended).
+ */
+ return;
+
++ case nir_op_unpack_half_2x16: {
++ if (!b->shader->options->lower_unpack_half_2x16)
++ return;
++
++ nir_ssa_def *comps[2];
++ comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa);
++ comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa);
++ nir_ssa_def *vec = nir_vec(b, comps, 2);
++
++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
++ nir_instr_remove(&instr->instr);
+ return;
++ }
++
++ case nir_op_pack_uvec2_to_uint: {
++ assert(b->shader->options->lower_pack_snorm_2x16 ||
++ b->shader->options->lower_pack_unorm_2x16);
++
++ nir_ssa_def *word =
++ nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
++ nir_ssa_def *val =
++ nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
++ nir_channel(b, word, 0));
++
++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
++ nir_instr_remove(&instr->instr);
++ break;
++ }
++
++ case nir_op_pack_uvec4_to_uint: {
++ assert(b->shader->options->lower_pack_snorm_4x8 ||
++ b->shader->options->lower_pack_unorm_4x8);
++
++ nir_ssa_def *byte =
++ nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
++ nir_ssa_def *val =
++ nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
++ nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
++ nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)),
++ nir_channel(b, byte, 0)));
++
++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
++ nir_instr_remove(&instr->instr);
++ break;
++ }
+
+ case nir_op_fdph: {
+ nir_ssa_def *sum[4];
+ for (unsigned i = 0; i < 3; i++) {
+ sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
+ instr->src[0].swizzle[i]),
+ nir_channel(b, instr->src[1].src.ssa,
+ instr->src[1].swizzle[i]));
+ }
+ sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+
+ nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
+ nir_fadd(b, sum[2], sum[3]));
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+ nir_instr_remove(&instr->instr);
+ return;
+ }
+
+ LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
+ LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
+ LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
+ LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
+ LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
+ LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
+ LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
+
+ default:
+ break;
+ }
+
+ if (instr->dest.dest.ssa.num_components == 1)
+ return;
+
+ unsigned num_components = instr->dest.dest.ssa.num_components;
+ nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL };
+
+ for (chan = 0; chan < 4; chan++) {
+ if (!(instr->dest.write_mask & (1 << chan)))
+ continue;
+
+ nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op);
+ for (i = 0; i < num_src; i++) {
+ /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
+ * args (input_sizes[] == 1).
+ */
+ assert(nir_op_infos[instr->op].input_sizes[i] < 2);
+ unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
+ 0 : chan);
+
+ nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
+ for (int j = 0; j < 4; j++)
+ lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
+ }
+
+ nir_alu_ssa_dest_init(lower, 1);
+ lower->dest.saturate = instr->dest.saturate;
+ comps[chan] = &lower->dest.dest.ssa;
+
+ nir_builder_instr_insert(b, &lower->instr);
+ }
+
+ nir_ssa_def *vec = nir_vec(b, comps, num_components);
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
+
+ nir_instr_remove(&instr->instr);
+ }
+
+ static bool
+ lower_alu_to_scalar_block(nir_block *block, void *builder)
+ {
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_alu)
+ lower_alu_instr_scalar(nir_instr_as_alu(instr), builder);
+ }
+
+ return true;
+ }
+
+ static void
+ nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
+ {
+ nir_builder builder;
+ nir_builder_init(&builder, impl);
+
+ nir_foreach_block(impl, lower_alu_to_scalar_block, &builder);
+ }
+
+ void
+ nir_lower_alu_to_scalar(nir_shader *shader)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_alu_to_scalar_impl(function->impl);
+ }
+ }
--- /dev/null
- instr->variables[0]->var->data.mode != nir_var_shader_storage)
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "compiler/glsl/ir_uniform.h"
+ #include "nir.h"
+ #include "main/config.h"
+ #include <assert.h>
+
+ typedef struct {
+ const struct gl_shader_program *shader_program;
+ nir_shader *shader;
+ } lower_atomic_state;
+
+ /*
+ * replace atomic counter intrinsics that use a variable with intrinsics
+ * that directly store the buffer index and byte offset
+ */
+
+ static void
+ lower_instr(nir_intrinsic_instr *instr,
+ lower_atomic_state *state)
+ {
+ nir_intrinsic_op op;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_atomic_counter_read_var:
+ op = nir_intrinsic_atomic_counter_read;
+ break;
+
+ case nir_intrinsic_atomic_counter_inc_var:
+ op = nir_intrinsic_atomic_counter_inc;
+ break;
+
+ case nir_intrinsic_atomic_counter_dec_var:
+ op = nir_intrinsic_atomic_counter_dec;
+ break;
+
+ default:
+ return;
+ }
+
+ if (instr->variables[0]->var->data.mode != nir_var_uniform &&
++ instr->variables[0]->var->data.mode != nir_var_shader_storage &&
++ instr->variables[0]->var->data.mode != nir_var_shared)
+ return; /* atomics passed as function arguments can't be lowered */
+
+ void *mem_ctx = ralloc_parent(instr);
+ unsigned uniform_loc = instr->variables[0]->var->data.location;
+
+ nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
+ new_instr->const_index[0] =
+ state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
+
+ nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
+ offset_const->value.u[0] = instr->variables[0]->var->data.offset;
+
+ nir_instr_insert_before(&instr->instr, &offset_const->instr);
+
+ nir_ssa_def *offset_def = &offset_const->def;
+
+ nir_deref *tail = &instr->variables[0]->deref;
+ while (tail->child != NULL) {
+ assert(tail->child->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+ tail = tail->child;
+
+ unsigned child_array_elements = tail->child != NULL ?
+ glsl_get_aoa_size(tail->type) : 1;
+
+ offset_const->value.u[0] += deref_array->base_offset *
+ child_array_elements * ATOMIC_COUNTER_SIZE;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_load_const_instr *atomic_counter_size =
+ nir_load_const_instr_create(mem_ctx, 1);
+ atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
+ nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
+
+ nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
+ nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+ mul->dest.write_mask = 0x1;
+ nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
+ mul->src[1].src.is_ssa = true;
+ mul->src[1].src.ssa = &atomic_counter_size->def;
+ nir_instr_insert_before(&instr->instr, &mul->instr);
+
+ nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
+ nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+ add->dest.write_mask = 0x1;
+ add->src[0].src.is_ssa = true;
+ add->src[0].src.ssa = &mul->dest.dest.ssa;
+ add->src[1].src.is_ssa = true;
+ add->src[1].src.ssa = offset_def;
+ nir_instr_insert_before(&instr->instr, &add->instr);
+
+ offset_def = &add->dest.dest.ssa;
+ }
+ }
+
+ new_instr->src[0].is_ssa = true;
+ new_instr->src[0].ssa = offset_def;
+
+ if (instr->dest.is_ssa) {
+ nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
+ instr->dest.ssa.num_components, NULL);
+ nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+ nir_src_for_ssa(&new_instr->dest.ssa));
+ } else {
+ nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
+ }
+
+ nir_instr_insert_before(&instr->instr, &new_instr->instr);
+ nir_instr_remove(&instr->instr);
+ }
+
+ static bool
+ lower_block(nir_block *block, void *state)
+ {
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_intrinsic)
+ lower_instr(nir_instr_as_intrinsic(instr),
+ (lower_atomic_state *) state);
+ }
+
+ return true;
+ }
+
+ void
+ nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
+ {
+ lower_atomic_state state = {
+ .shader = shader,
+ .shader_program = shader_program,
+ };
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_foreach_block(function->impl, lower_block, (void *) &state);
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+ }
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_builder.h"
++
++static void
++emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
++ nir_deref_var *deref, nir_deref *tail,
++ nir_ssa_def **dest, nir_ssa_def *src);
++
++static void
++emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
++ nir_deref_var *deref, nir_deref *arr_parent,
++ int start, int end,
++ nir_ssa_def **dest, nir_ssa_def *src)
++{
++ assert(arr_parent->child &&
++ arr_parent->child->deref_type == nir_deref_type_array);
++ nir_deref_array *arr = nir_deref_as_array(arr_parent->child);
++ assert(arr->deref_array_type == nir_deref_array_type_indirect);
++ assert(arr->indirect.is_ssa);
++
++ assert(start < end);
++ if (start == end - 1) {
++ /* Base case. Just emit the load/store op */
++ nir_deref_array direct = *arr;
++ direct.deref_array_type = nir_deref_array_type_direct;
++ direct.base_offset += start;
++ direct.indirect = NIR_SRC_INIT;
++
++ arr_parent->child = &direct.deref;
++ emit_load_store(b, orig_instr, deref, &arr->deref, dest, src);
++ arr_parent->child = &arr->deref;
++ } else {
++ int mid = start + (end - start) / 2;
++
++ nir_ssa_def *then_dest, *else_dest;
++
++ nir_if *if_stmt = nir_if_create(b->shader);
++ if_stmt->condition = nir_src_for_ssa(nir_ilt(b, arr->indirect.ssa,
++ nir_imm_int(b, mid)));
++ nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
++
++ b->cursor = nir_after_cf_list(&if_stmt->then_list);
++ emit_indirect_load_store(b, orig_instr, deref, arr_parent,
++ start, mid, &then_dest, src);
++
++ b->cursor = nir_after_cf_list(&if_stmt->else_list);
++ emit_indirect_load_store(b, orig_instr, deref, arr_parent,
++ mid, end, &else_dest, src);
++
++ b->cursor = nir_after_cf_node(&if_stmt->cf_node);
++
++ if (src == NULL) {
++ /* We're a load. We need to insert a phi node */
++ nir_phi_instr *phi = nir_phi_instr_create(b->shader);
++ nir_ssa_dest_init(&phi->instr, &phi->dest,
++ then_dest->num_components, NULL);
++
++ nir_phi_src *src0 = ralloc(phi, nir_phi_src);
++ src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt));
++ src0->src = nir_src_for_ssa(then_dest);
++ exec_list_push_tail(&phi->srcs, &src0->node);
++
++ nir_phi_src *src1 = ralloc(phi, nir_phi_src);
++ src1->pred = nir_cf_node_as_block(nir_if_last_else_node(if_stmt));
++ src1->src = nir_src_for_ssa(else_dest);
++ exec_list_push_tail(&phi->srcs, &src1->node);
++
++ nir_builder_instr_insert(b, &phi->instr);
++ *dest = &phi->dest.ssa;
++ }
++ }
++}
++
++static void
++emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
++ nir_deref_var *deref, nir_deref *tail,
++ nir_ssa_def **dest, nir_ssa_def *src)
++{
++ for (; tail->child; tail = tail->child) {
++ if (tail->child->deref_type != nir_deref_type_array)
++ continue;
++
++ nir_deref_array *arr = nir_deref_as_array(tail->child);
++ if (arr->deref_array_type != nir_deref_array_type_indirect)
++ continue;
++
++ int length = glsl_get_length(tail->type);
++
++ emit_indirect_load_store(b, orig_instr, deref, tail, -arr->base_offset,
++ length - arr->base_offset, dest, src);
++ return;
++ }
++
++ assert(tail && tail->child == NULL);
++
++ /* We reached the end of the deref chain. Emit the instruction */
++
++ if (src == NULL) {
++ /* This is a load instruction */
++ nir_intrinsic_instr *load =
++ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
++ load->num_components = orig_instr->num_components;
++ load->variables[0] =
++ nir_deref_as_var(nir_copy_deref(load, &deref->deref));
++ nir_ssa_dest_init(&load->instr, &load->dest,
++ load->num_components, NULL);
++ nir_builder_instr_insert(b, &load->instr);
++ *dest = &load->dest.ssa;
++ } else {
++ /* This is a store instruction */
++ nir_intrinsic_instr *store =
++ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
++ store->num_components = orig_instr->num_components;
++ store->const_index[0] = orig_instr->const_index[0]; /* writemask */
++ store->variables[0] =
++ nir_deref_as_var(nir_copy_deref(store, &deref->deref));
++ store->src[0] = nir_src_for_ssa(src);
++ nir_builder_instr_insert(b, &store->instr);
++ }
++}
++
++static bool
++deref_has_indirect(nir_deref_var *deref)
++{
++ for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
++ if (tail->deref_type != nir_deref_type_array)
++ continue;
++
++ nir_deref_array *arr = nir_deref_as_array(tail);
++ if (arr->deref_array_type == nir_deref_array_type_indirect)
++ return true;
++ }
++
++ return false;
++}
++
++struct lower_indirect_state {
++ nir_builder builder;
++ uint32_t mode_mask;
++ bool progress;
++};
++
++static bool
++lower_indirect_block(nir_block *block, void *void_state)
++{
++ struct lower_indirect_state *state = void_state;
++
++ nir_foreach_instr_safe(block, instr) {
++ if (instr->type != nir_instr_type_intrinsic)
++ continue;
++
++ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
++ if (intrin->intrinsic != nir_intrinsic_load_var &&
++ intrin->intrinsic != nir_intrinsic_store_var)
++ continue;
++
++ if (!deref_has_indirect(intrin->variables[0]))
++ continue;
++
++ /* Only lower variables whose mode is in the mask */
++ if (!(state->mode_mask & (1 << intrin->variables[0]->var->data.mode)))
++ continue;
++
++ state->builder.cursor = nir_before_instr(&intrin->instr);
++
++ if (intrin->intrinsic == nir_intrinsic_load_var) {
++ nir_ssa_def *result;
++ emit_load_store(&state->builder, intrin, intrin->variables[0],
++ &intrin->variables[0]->deref, &result, NULL);
++ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(result));
++ } else {
++ assert(intrin->src[0].is_ssa);
++ emit_load_store(&state->builder, intrin, intrin->variables[0],
++ &intrin->variables[0]->deref, NULL, intrin->src[0].ssa);
++ }
++ nir_instr_remove(&intrin->instr);
++ state->progress = true;
++ }
++
++ return true;
++}
++
++static bool
++lower_indirects_impl(nir_function_impl *impl, uint32_t mode_mask)
++{
++ struct lower_indirect_state state;
++
++ state.progress = false;
++ state.mode_mask = mode_mask;
++ nir_builder_init(&state.builder, impl);
++
++ nir_foreach_block(impl, lower_indirect_block, &state);
++
++ if (state.progress)
++ nir_metadata_preserve(impl, nir_metadata_none);
++
++ return state.progress;
++}
++
++/** Lowers indirect variable loads/stores to direct loads/stores.
++ *
++ * The pass works by replacing any indirect load or store with an if-ladder
++ * that does a binary search on the array index.
++ */
++bool
++nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask)
++{
++ bool progress = false;
++
++ nir_foreach_function(shader, function) {
++ if (function->impl)
++ progress = lower_indirects_impl(function->impl, mode_mask) || progress;
++ }
++
++ return progress;
++}
--- /dev/null
- if (intrin->intrinsic != nir_intrinsic_load_var &&
- intrin->intrinsic != nir_intrinsic_store_var)
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+ /*
+ * This lowering pass converts references to input/output variables with
+ * loads/stores to actual input/output intrinsics.
+ */
+
+ #include "nir.h"
+ #include "nir_builder.h"
+
+ struct lower_io_state {
+ nir_builder builder;
+ void *mem_ctx;
+ int (*type_size)(const struct glsl_type *type);
+ nir_variable_mode mode;
+ };
+
+ void
+ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
+ int (*type_size)(const struct glsl_type *))
+ {
+ unsigned location = 0;
+
+ nir_foreach_variable(var, var_list) {
+ /*
+ * UBO's have their own address spaces, so don't count them towards the
+ * number of global uniforms
+ */
+ if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
+ var->interface_type != NULL)
+ continue;
+
+ var->data.driver_location = location;
+ location += type_size(var->type);
+ }
+
+ *size = location;
+ }
+
+ /**
+ * Returns true if we're processing a stage whose inputs are arrays indexed
+ * by a vertex number (such as geometry shader inputs).
+ */
+ static bool
+ is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
+ {
+ gl_shader_stage stage = state->builder.shader->stage;
+
+ return var->data.mode == nir_var_shader_in && !var->data.patch &&
+ (stage == MESA_SHADER_TESS_CTRL ||
+ stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY);
+ }
+
+ static bool
+ is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
+ {
+ gl_shader_stage stage = state->builder.shader->stage;
+ return var->data.mode == nir_var_shader_out && !var->data.patch &&
+ stage == MESA_SHADER_TESS_CTRL;
+ }
+
+ static nir_ssa_def *
+ get_io_offset(nir_builder *b, nir_deref_var *deref,
+ nir_ssa_def **vertex_index,
+ int (*type_size)(const struct glsl_type *))
+ {
+ nir_deref *tail = &deref->deref;
+
+ /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
+ * outermost array index separate. Process the rest normally.
+ */
+ if (vertex_index != NULL) {
+ tail = tail->child;
+ assert(tail->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(tail);
+
+ nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
+ }
+ *vertex_index = vtx;
+ }
+
+ /* Just emit code and let constant-folding go to town */
+ nir_ssa_def *offset = nir_imm_int(b, 0);
+
+ while (tail->child != NULL) {
+ const struct glsl_type *parent_type = tail->type;
+ tail = tail->child;
+
+ if (tail->deref_type == nir_deref_type_array) {
+ nir_deref_array *deref_array = nir_deref_as_array(tail);
+ unsigned size = type_size(tail->type);
+
+ offset = nir_iadd(b, offset,
+ nir_imm_int(b, size * deref_array->base_offset));
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_ssa_def *mul =
+ nir_imul(b, nir_imm_int(b, size),
+ nir_ssa_for_src(b, deref_array->indirect, 1));
+
+ offset = nir_iadd(b, offset, mul);
+ }
+ } else if (tail->deref_type == nir_deref_type_struct) {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+ unsigned field_offset = 0;
+ for (unsigned i = 0; i < deref_struct->index; i++) {
+ field_offset += type_size(glsl_get_struct_field(parent_type, i));
+ }
+ offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
+ }
+ }
+
+ return offset;
+ }
+
+ static nir_intrinsic_op
+ load_op(struct lower_io_state *state,
+ nir_variable_mode mode, bool per_vertex)
+ {
+ nir_intrinsic_op op;
+ switch (mode) {
+ case nir_var_shader_in:
+ op = per_vertex ? nir_intrinsic_load_per_vertex_input :
+ nir_intrinsic_load_input;
+ break;
+ case nir_var_shader_out:
+ op = per_vertex ? nir_intrinsic_load_per_vertex_output :
+ nir_intrinsic_load_output;
+ break;
+ case nir_var_uniform:
+ op = nir_intrinsic_load_uniform;
+ break;
++ case nir_var_shared:
++ op = nir_intrinsic_load_shared;
++ break;
+ default:
+ unreachable("Unknown variable mode");
+ }
+ return op;
+ }
+
++static nir_intrinsic_op
++store_op(struct lower_io_state *state,
++ nir_variable_mode mode, bool per_vertex)
++{
++ nir_intrinsic_op op;
++ switch (mode) {
++ case nir_var_shader_in:
++ case nir_var_shader_out:
++ op = per_vertex ? nir_intrinsic_store_per_vertex_output :
++ nir_intrinsic_store_output;
++ break;
++ case nir_var_shared:
++ op = nir_intrinsic_store_shared;
++ break;
++ default:
++ unreachable("Unknown variable mode");
++ }
++ return op;
++}
++
++static nir_intrinsic_op
++atomic_op(nir_intrinsic_op opcode)
++{
++ switch (opcode) {
++#define OP(O) case nir_intrinsic_var_##O: return nir_intrinsic_shared_##O;
++ OP(atomic_exchange)
++ OP(atomic_comp_swap)
++ OP(atomic_add)
++ OP(atomic_imin)
++ OP(atomic_umin)
++ OP(atomic_imax)
++ OP(atomic_umax)
++ OP(atomic_and)
++ OP(atomic_or)
++ OP(atomic_xor)
++#undef OP
++ default:
++ unreachable("Invalid atomic");
++ }
++}
++
+ static bool
+ nir_lower_io_block(nir_block *block, void *void_state)
+ {
+ struct lower_io_state *state = void_state;
+
+ nir_builder *b = &state->builder;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
- assert(mode == nir_var_shader_out);
++ switch (intrin->intrinsic) {
++ case nir_intrinsic_load_var:
++ case nir_intrinsic_store_var:
++ case nir_intrinsic_var_atomic_add:
++ case nir_intrinsic_var_atomic_imin:
++ case nir_intrinsic_var_atomic_umin:
++ case nir_intrinsic_var_atomic_imax:
++ case nir_intrinsic_var_atomic_umax:
++ case nir_intrinsic_var_atomic_and:
++ case nir_intrinsic_var_atomic_or:
++ case nir_intrinsic_var_atomic_xor:
++ case nir_intrinsic_var_atomic_exchange:
++ case nir_intrinsic_var_atomic_comp_swap:
++ /* We can lower the io for this nir instrinsic */
++ break;
++ default:
++ /* We can't lower the io for this nir instrinsic, so skip it */
+ continue;
++ }
+
+ nir_variable_mode mode = intrin->variables[0]->var->data.mode;
+
+ if (state->mode != nir_var_all && state->mode != mode)
+ continue;
+
+ if (mode != nir_var_shader_in &&
+ mode != nir_var_shader_out &&
++ mode != nir_var_shared &&
+ mode != nir_var_uniform)
+ continue;
+
+ b->cursor = nir_before_instr(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var: {
+ bool per_vertex =
+ is_per_vertex_input(state, intrin->variables[0]->var) ||
+ is_per_vertex_output(state, intrin->variables[0]->var);
+
+ nir_ssa_def *offset;
+ nir_ssa_def *vertex_index;
+
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(state->mem_ctx,
+ load_op(state, mode, per_vertex));
+ load->num_components = intrin->num_components;
+
+ load->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
+
++ if (load->intrinsic == nir_intrinsic_load_uniform) {
++ load->const_index[1] =
++ state->type_size(intrin->variables[0]->var->type);
++ }
++
+ if (per_vertex)
+ load->src[0] = nir_src_for_ssa(vertex_index);
+
+ load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset);
+
+ if (intrin->dest.is_ssa) {
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ intrin->num_components, NULL);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&load->dest.ssa));
+ } else {
+ nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
+ }
+
+ nir_instr_insert_before(&intrin->instr, &load->instr);
+ nir_instr_remove(&intrin->instr);
+ break;
+ }
+
+ case nir_intrinsic_store_var: {
- nir_intrinsic_op store_op =
- per_vertex ? nir_intrinsic_store_per_vertex_output :
- nir_intrinsic_store_output;
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
- store_op);
++ assert(mode == nir_var_shader_out || mode == nir_var_shared);
+
+ nir_ssa_def *offset;
+ nir_ssa_def *vertex_index;
+
+ bool per_vertex =
+ is_per_vertex_output(state, intrin->variables[0]->var);
+
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
+
++ nir_intrinsic_instr *store =
++ nir_intrinsic_instr_create(state->mem_ctx,
++ store_op(state, mode, per_vertex));
+ store->num_components = intrin->num_components;
+
+ nir_src_copy(&store->src[0], &intrin->src[0], store);
+
+ store->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
+
+ /* Copy the writemask */
+ store->const_index[1] = intrin->const_index[0];
+
+ if (per_vertex)
+ store->src[1] = nir_src_for_ssa(vertex_index);
+
+ store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset);
+
+ nir_instr_insert_before(&intrin->instr, &store->instr);
+ nir_instr_remove(&intrin->instr);
+ break;
+ }
+
++ case nir_intrinsic_var_atomic_add:
++ case nir_intrinsic_var_atomic_imin:
++ case nir_intrinsic_var_atomic_umin:
++ case nir_intrinsic_var_atomic_imax:
++ case nir_intrinsic_var_atomic_umax:
++ case nir_intrinsic_var_atomic_and:
++ case nir_intrinsic_var_atomic_or:
++ case nir_intrinsic_var_atomic_xor:
++ case nir_intrinsic_var_atomic_exchange:
++ case nir_intrinsic_var_atomic_comp_swap: {
++ assert(mode == nir_var_shared);
++
++ nir_ssa_def *offset;
++
++ offset = get_io_offset(b, intrin->variables[0],
++ NULL, state->type_size);
++
++ nir_intrinsic_instr *atomic =
++ nir_intrinsic_instr_create(state->mem_ctx,
++ atomic_op(intrin->intrinsic));
++
++ atomic->src[0] = nir_src_for_ssa(offset);
++
++ atomic->const_index[0] =
++ intrin->variables[0]->var->data.driver_location;
++
++ nir_src_copy(&atomic->src[1], &intrin->src[0], atomic);
++
++ if (intrin->intrinsic == nir_intrinsic_var_atomic_comp_swap)
++ nir_src_copy(&atomic->src[2], &intrin->src[1], atomic);
++
++ if (intrin->dest.is_ssa) {
++ nir_ssa_dest_init(&atomic->instr, &atomic->dest,
++ intrin->dest.ssa.num_components, NULL);
++ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
++ nir_src_for_ssa(&atomic->dest.ssa));
++ } else {
++ nir_dest_copy(&atomic->dest, &intrin->dest, state->mem_ctx);
++ }
++
++ nir_instr_insert_before(&intrin->instr, &atomic->instr);
++ nir_instr_remove(&intrin->instr);
++ break;
++ }
++
+ default:
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ static void
+ nir_lower_io_impl(nir_function_impl *impl,
+ nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *))
+ {
+ struct lower_io_state state;
+
+ nir_builder_init(&state.builder, impl);
+ state.mem_ctx = ralloc_parent(impl);
+ state.mode = mode;
+ state.type_size = type_size;
+
+ nir_foreach_block(impl, nir_lower_io_block, &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ void
+ nir_lower_io(nir_shader *shader, nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *))
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_io_impl(function->impl, mode, type_size);
+ }
+ }
+
+ /**
+ * Return the offset soruce for a load/store intrinsic.
+ */
+ nir_src *
+ nir_get_io_offset_src(nir_intrinsic_instr *instr)
+ {
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_uniform:
+ return &instr->src[0];
++ case nir_intrinsic_load_ubo:
++ case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_store_output:
+ return &instr->src[1];
++ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_store_per_vertex_output:
+ return &instr->src[2];
+ default:
+ return NULL;
+ }
+ }
+
+ /**
+ * Return the vertex index source for a load/store per_vertex intrinsic.
+ */
+ nir_src *
+ nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
+ {
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ return &instr->src[0];
+ case nir_intrinsic_store_per_vertex_output:
+ return &instr->src[1];
+ default:
+ return NULL;
+ }
+ }
--- /dev/null
-nir_lower_outputs_to_temporaries(nir_shader *shader)
+ /*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+ /*
+ * Implements a pass that lowers output variables to a temporary plus an
+ * output variable with a single copy at each exit point of the shader.
+ * This way the output variable is only ever written.
+ *
+ * Because valid NIR requires that output variables are never read, this
+ * pass is more of a helper for NIR producers and must be run before the
+ * shader is ever validated.
+ */
+
+ #include "nir.h"
+
+ struct lower_outputs_state {
+ nir_shader *shader;
+ struct exec_list old_outputs;
+ };
+
+ static void
+ emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state)
+ {
+ assert(exec_list_length(&state->shader->outputs) ==
+ exec_list_length(&state->old_outputs));
+
+ foreach_two_lists(out_node, &state->shader->outputs,
+ temp_node, &state->old_outputs) {
+ nir_variable *output = exec_node_data(nir_variable, out_node, node);
+ nir_variable *temp = exec_node_data(nir_variable, temp_node, node);
+
+ nir_intrinsic_instr *copy =
+ nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var);
+ copy->variables[0] = nir_deref_var_create(copy, output);
+ copy->variables[1] = nir_deref_var_create(copy, temp);
+
+ nir_instr_insert(cursor, ©->instr);
+ }
+ }
+
+ static bool
+ emit_output_copies_block(nir_block *block, void *state)
+ {
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == nir_intrinsic_emit_vertex)
+ emit_output_copies(nir_before_instr(&intrin->instr), state);
+ }
+
+ return true;
+ }
+
+ void
- } else if (strcmp(function->name, "main") == 0) {
++nir_lower_outputs_to_temporaries(nir_shader *shader, nir_function *entrypoint)
+ {
+ struct lower_outputs_state state;
+
+ if (shader->stage == MESA_SHADER_TESS_CTRL)
+ return;
+
+ state.shader = shader;
+ exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
+
+ /* Walk over all of the outputs turn each output into a temporary and
+ * make a new variable for the actual output.
+ */
+ nir_foreach_variable(var, &state.old_outputs) {
+ nir_variable *output = ralloc(shader, nir_variable);
+ memcpy(output, var, sizeof *output);
+
+ /* The orignal is now the temporary */
+ nir_variable *temp = var;
+
+ /* Reparent the name to the new variable */
+ ralloc_steal(output, output->name);
+
++ /* Reparent the constant initializer (if any) */
++ ralloc_steal(output, output->constant_initializer);
++
+ /* Give the output a new name with @out-temp appended */
+ temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
+ temp->data.mode = nir_var_global;
+ temp->constant_initializer = NULL;
+
+ exec_list_push_tail(&shader->outputs, &output->node);
+ }
+
+ nir_foreach_function(shader, function) {
+ if (function->impl == NULL)
+ continue;
+
+ if (shader->stage == MESA_SHADER_GEOMETRY) {
+ /* For geometry shaders, we have to emit the output copies right
+ * before each EmitVertex call.
+ */
+ nir_foreach_block(function->impl, emit_output_copies_block, &state);
++ } else if (function == entrypoint) {
+ /* For all other shader types, we need to do the copies right before
+ * the jumps to the end block.
+ */
+ struct set_entry *block_entry;
+ set_foreach(function->impl->end_block->predecessors, block_entry) {
+ struct nir_block *block = (void *)block_entry->key;
+ emit_output_copies(nir_after_block_before_jump(block), &state);
+ }
+ }
+
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ exec_list_append(&shader->globals, &state.old_outputs);
+ }
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_builder.h"
++#include "nir_control_flow.h"
++
++struct lower_returns_state {
++ nir_builder builder;
++ struct exec_list *cf_list;
++ nir_loop *loop;
++ nir_variable *return_flag;
++};
++
++static bool lower_returns_in_cf_list(struct exec_list *cf_list,
++ struct lower_returns_state *state);
++
++static void
++predicate_following(nir_cf_node *node, struct lower_returns_state *state)
++{
++ nir_builder *b = &state->builder;
++ b->cursor = nir_after_cf_node_and_phis(node);
++
++ if (nir_cursors_equal(b->cursor, nir_after_cf_list(state->cf_list)))
++ return; /* Nothing to predicate */
++
++ assert(state->return_flag);
++
++ nir_if *if_stmt = nir_if_create(b->shader);
++ if_stmt->condition = nir_src_for_ssa(nir_load_var(b, state->return_flag));
++ nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
++
++ if (state->loop) {
++ /* If we're inside of a loop, then all we need to do is insert a
++ * conditional break.
++ */
++ nir_jump_instr *brk =
++ nir_jump_instr_create(state->builder.shader, nir_jump_break);
++ nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &brk->instr);
++ } else {
++ /* Otherwise, we need to actually move everything into the else case
++ * of the if statement.
++ */
++ nir_cf_list list;
++ nir_cf_extract(&list, nir_after_cf_node(&if_stmt->cf_node),
++ nir_after_cf_list(state->cf_list));
++ assert(!exec_list_is_empty(&list.list));
++ nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list));
++ }
++}
++
++static bool
++lower_returns_in_loop(nir_loop *loop, struct lower_returns_state *state)
++{
++ nir_loop *parent = state->loop;
++ state->loop = loop;
++ bool progress = lower_returns_in_cf_list(&loop->body, state);
++ state->loop = parent;
++
++ /* If the recursive call made progress, then there were returns inside
++ * of the loop. These would have been lowered to breaks with the return
++ * flag set to true. We need to predicate everything following the loop
++ * on the return flag.
++ */
++ if (progress)
++ predicate_following(&loop->cf_node, state);
++
++ return progress;
++}
++
++static bool
++lower_returns_in_if(nir_if *if_stmt, struct lower_returns_state *state)
++{
++ bool progress;
++
++ progress = lower_returns_in_cf_list(&if_stmt->then_list, state);
++ progress = lower_returns_in_cf_list(&if_stmt->else_list, state) || progress;
++
++ /* If either of the recursive calls made progress, then there were
++ * returns inside of the body of the if. If we're in a loop, then these
++ * were lowered to breaks which automatically skip to the end of the
++ * loop so we don't have to do anything. If we're not in a loop, then
++ * all we know is that the return flag is set appropreately and that the
++ * recursive calls ensured that nothing gets executed *inside* the if
++ * after a return. In order to ensure nothing outside gets executed
++ * after a return, we need to predicate everything following on the
++ * return flag.
++ */
++ if (progress && !state->loop)
++ predicate_following(&if_stmt->cf_node, state);
++
++ return progress;
++}
++
++static bool
++lower_returns_in_block(nir_block *block, struct lower_returns_state *state)
++{
++ if (block->predecessors->entries == 0 &&
++ block != nir_start_block(state->builder.impl)) {
++ /* This block is unreachable. Delete it and everything after it. */
++ nir_cf_list list;
++ nir_cf_extract(&list, nir_before_cf_node(&block->cf_node),
++ nir_after_cf_list(state->cf_list));
++
++ if (exec_list_is_empty(&list.list)) {
++ /* There's nothing here, which also means there's nothing in this
++ * block so we have nothing to do.
++ */
++ return false;
++ } else {
++ nir_cf_delete(&list);
++ return true;
++ }
++ }
++
++ nir_instr *last_instr = nir_block_last_instr(block);
++ if (last_instr == NULL)
++ return false;
++
++ if (last_instr->type != nir_instr_type_jump)
++ return false;
++
++ nir_jump_instr *jump = nir_instr_as_jump(last_instr);
++ if (jump->type != nir_jump_return)
++ return false;
++
++ nir_instr_remove(&jump->instr);
++
++ nir_builder *b = &state->builder;
++ b->cursor = nir_after_block(block);
++
++ /* Set the return flag */
++ if (state->return_flag == NULL) {
++ state->return_flag =
++ nir_local_variable_create(b->impl, glsl_bool_type(), "return");
++
++ /* Set a default value of false */
++ state->return_flag->constant_initializer =
++ rzalloc(state->return_flag, nir_constant);
++ }
++ nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE), 1);
++
++ if (state->loop) {
++ /* We're in a loop; we need to break out of it. */
++ nir_jump(b, nir_jump_break);
++ } else {
++ /* Not in a loop; we'll deal with predicating later*/
++ assert(nir_cf_node_next(&block->cf_node) == NULL);
++ }
++
++ return true;
++}
++
++static bool
++lower_returns_in_cf_list(struct exec_list *cf_list,
++ struct lower_returns_state *state)
++{
++ bool progress = false;
++
++ struct exec_list *parent_list = state->cf_list;
++ state->cf_list = cf_list;
++
++ /* We iterate over the list backwards because any given lower call may
++ * take everything following the given CF node and predicate it. In
++ * order to avoid recursion/iteration problems, we want everything after
++ * a given node to already be lowered before this happens.
++ */
++ foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) {
++ switch (node->type) {
++ case nir_cf_node_block:
++ if (lower_returns_in_block(nir_cf_node_as_block(node), state))
++ progress = true;
++ break;
++
++ case nir_cf_node_if:
++ if (lower_returns_in_if(nir_cf_node_as_if(node), state))
++ progress = true;
++ break;
++
++ case nir_cf_node_loop:
++ if (lower_returns_in_loop(nir_cf_node_as_loop(node), state))
++ progress = true;
++ break;
++
++ default:
++ unreachable("Invalid inner CF node type");
++ }
++ }
++
++ state->cf_list = parent_list;
++
++ return progress;
++}
++
++bool
++nir_lower_returns_impl(nir_function_impl *impl)
++{
++ struct lower_returns_state state;
++
++ state.cf_list = &impl->body;
++ state.loop = NULL;
++ state.return_flag = NULL;
++ nir_builder_init(&state.builder, impl);
++
++ bool progress = lower_returns_in_cf_list(&impl->body, &state);
++
++ if (progress) {
++ nir_metadata_preserve(impl, nir_metadata_none);
++ nir_repair_ssa_impl(impl);
++ }
++
++ return progress;
++}
++
++bool
++nir_lower_returns(nir_shader *shader)
++{
++ bool progress = false;
++
++ nir_foreach_function(shader, function) {
++ if (function->impl)
++ progress = nir_lower_returns_impl(function->impl) || progress;
++ }
++
++ return progress;
++}
--- /dev/null
- instr->num_srcs + 1);
+ /*
+ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
+ * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #include "nir.h"
+ #include "nir_builder.h"
+ #include "program/hash_table.h"
+ #include "compiler/glsl/ir_uniform.h"
+
+ #include "main/compiler.h"
+ #include "main/mtypes.h"
+ #include "program/prog_parameter.h"
+ #include "program/program.h"
+
+ /* Calculate the sampler index based on array indicies and also
+ * calculate the base uniform location for struct members.
+ */
+ static void
+ calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+ unsigned *array_elements, nir_ssa_def **indirect,
+ nir_builder *b, unsigned *location)
+ {
+ if (tail->child == NULL)
+ return;
+
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+
+ assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ instr->sampler_index += deref_array->base_offset * *array_elements;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_ssa_def *mul =
+ nir_imul(b, nir_imm_int(b, *array_elements),
+ nir_ssa_for_src(b, deref_array->indirect, 1));
+
+ nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+ NIR_SRC_INIT);
+
+ if (*indirect) {
+ *indirect = nir_iadd(b, *indirect, mul);
+ } else {
+ *indirect = mul;
+ }
+ }
+
+ *array_elements *= glsl_get_length(tail->type);
+ break;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+ *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ break;
+ }
+
+ default:
+ unreachable("Invalid deref type");
+ break;
+ }
+ }
+
+ static void
+ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, nir_builder *builder)
+ {
+ if (instr->sampler == NULL)
+ return;
+
++ /* GLSL only has combined textures/samplers */
++ assert(instr->texture == NULL);
++
+ instr->sampler_index = 0;
+ unsigned location = instr->sampler->var->data.location;
+ unsigned array_elements = 1;
+ nir_ssa_def *indirect = NULL;
+
+ builder->cursor = nir_before_instr(&instr->instr);
+ calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+ &indirect, builder, &location);
+
+ if (indirect) {
+ /* First, we have to resize the array of texture sources */
+ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
- instr->sampler_array_size = array_elements;
++ instr->num_srcs + 2);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ new_srcs[i].src_type = instr->src[i].src_type;
+ nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+ &instr->src[i].src);
+ }
+
+ ralloc_free(instr->src);
+ instr->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
++ instr->src[instr->num_srcs].src_type = nir_tex_src_texture_offset;
++ instr->num_srcs++;
++ nir_instr_rewrite_src(&instr->instr,
++ &instr->src[instr->num_srcs - 1].src,
++ nir_src_for_ssa(indirect));
++
+ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+ instr->num_srcs++;
+ nir_instr_rewrite_src(&instr->instr,
+ &instr->src[instr->num_srcs - 1].src,
+ nir_src_for_ssa(indirect));
+
++ instr->texture_array_size = array_elements;
+ }
+
+ if (location > shader_program->NumUniformStorage - 1 ||
+ !shader_program->UniformStorage[location].opaque[stage].active) {
+ assert(!"cannot return a sampler");
+ return;
+ }
+
+ instr->sampler_index +=
+ shader_program->UniformStorage[location].opaque[stage].index;
+
+ instr->sampler = NULL;
++
++ instr->texture_index = instr->sampler_index;
+ }
+
+ typedef struct {
+ nir_builder builder;
+ const struct gl_shader_program *shader_program;
+ gl_shader_stage stage;
+ } lower_state;
+
+ static bool
+ lower_block_cb(nir_block *block, void *_state)
+ {
+ lower_state *state = (lower_state *) _state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+ lower_sampler(tex_instr, state->shader_program, state->stage,
+ &state->builder);
+ }
+ }
+
+ return true;
+ }
+
+ static void
+ lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
+ {
+ lower_state state;
+
+ nir_builder_init(&state.builder, impl);
+ state.shader_program = shader_program;
+ state.stage = stage;
+
+ nir_foreach_block(impl, lower_block_cb, &state);
+ }
+
+ void
+ nir_lower_samplers(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ lower_impl(function->impl, shader_program, shader->stage);
+ }
+ }
--- /dev/null
- nir_intrinsic_op sysval_op =
- nir_intrinsic_from_system_value(var->data.location);
- nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0);
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir.h"
+ #include "nir_builder.h"
+
+ struct lower_system_values_state {
+ nir_builder builder;
+ bool progress;
+ };
+
+ static bool
+ convert_block(nir_block *block, void *void_state)
+ {
+ struct lower_system_values_state *state = void_state;
+
+ nir_builder *b = &state->builder;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr);
+
+ if (load_var->intrinsic != nir_intrinsic_load_var)
+ continue;
+
+ nir_variable *var = load_var->variables[0]->var;
+ if (var->data.mode != nir_var_system_value)
+ continue;
+
+ b->cursor = nir_after_instr(&load_var->instr);
+
++ nir_ssa_def *sysval;
++ switch (var->data.location) {
++ case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
++ /* From the GLSL man page for gl_GlobalInvocationID:
++ *
++ * "The value of gl_GlobalInvocationID is equal to
++ * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
++ */
++
++ nir_const_value local_size;
++ local_size.u[0] = b->shader->info.cs.local_size[0];
++ local_size.u[1] = b->shader->info.cs.local_size[1];
++ local_size.u[2] = b->shader->info.cs.local_size[2];
++
++ nir_ssa_def *group_id =
++ nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
++ nir_ssa_def *local_id =
++ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
++
++ sysval = nir_iadd(b, nir_imul(b, group_id,
++ nir_build_imm(b, 3, local_size)),
++ local_id);
++ break;
++ }
++
++ case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
++ /* From the GLSL man page for gl_LocalInvocationIndex:
++ *
++ * ?The value of gl_LocalInvocationIndex is equal to
++ * gl_LocalInvocationID.z * gl_WorkGroupSize.x *
++ * gl_WorkGroupSize.y + gl_LocalInvocationID.y *
++ * gl_WorkGroupSize.x + gl_LocalInvocationID.x"
++ */
++ nir_ssa_def *local_id =
++ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
++
++ unsigned stride_y = b->shader->info.cs.local_size[0];
++ unsigned stride_z = b->shader->info.cs.local_size[0] *
++ b->shader->info.cs.local_size[1];
++
++ sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2),
++ nir_imm_int(b, stride_z)),
++ nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1),
++ nir_imm_int(b, stride_y)),
++ nir_channel(b, local_id, 0)));
++ break;
++ }
++
++ case SYSTEM_VALUE_VERTEX_ID:
++ if (b->shader->options->vertex_id_zero_based) {
++ sysval = nir_iadd(b,
++ nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0),
++ nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0));
++ } else {
++ sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0);
++ }
++ break;
++
++ case SYSTEM_VALUE_INSTANCE_INDEX:
++ sysval = nir_iadd(b,
++ nir_load_system_value(b, nir_intrinsic_load_instance_id, 0),
++ nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
++ break;
++
++ default: {
++ nir_intrinsic_op sysval_op =
++ nir_intrinsic_from_system_value(var->data.location);
++ sysval = nir_load_system_value(b, sysval_op, 0);
++ break;
++ } /* default */
++ }
+
+ nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
+ nir_instr_remove(&load_var->instr);
+
+ state->progress = true;
+ }
+
+ return true;
+ }
+
+ static bool
+ convert_impl(nir_function_impl *impl)
+ {
+ struct lower_system_values_state state;
+
+ state.progress = false;
+ nir_builder_init(&state.builder, impl);
+
+ nir_foreach_block(impl, convert_block, &state);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ return state.progress;
+ }
+
+ bool
+ nir_lower_system_values(nir_shader *shader)
+ {
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress = convert_impl(function->impl) || progress;
+ }
+
+ exec_list_make_empty(&shader->system_values);
+
+ return progress;
+ }
--- /dev/null
- nir_ssa_def **def_stack;
- nir_ssa_def **def_stack_tail;
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+ #include "nir.h"
+ #include "nir_builder.h"
++#include "nir_phi_builder.h"
+ #include "nir_vla.h"
+
+
+ struct deref_node {
+ struct deref_node *parent;
+ const struct glsl_type *type;
+
+ bool lower_to_ssa;
+
+ /* Only valid for things that end up in the direct list.
+ * Note that multiple nir_deref_vars may correspond to this node, but they
+ * will all be equivalent, so any is as good as the other.
+ */
+ nir_deref_var *deref;
+ struct exec_node direct_derefs_link;
+
+ struct set *loads;
+ struct set *stores;
+ struct set *copies;
+
- /* A hash table mapping phi nodes to deref_state data */
- struct hash_table *phi_table;
++ struct nir_phi_builder_value *pb_value;
+
+ struct deref_node *wildcard;
+ struct deref_node *indirect;
+ struct deref_node *children[0];
+ };
+
+ struct lower_variables_state {
+ nir_shader *shader;
+ void *dead_ctx;
+ nir_function_impl *impl;
+
+ /* A hash table mapping variables to deref_node data */
+ struct hash_table *deref_var_nodes;
+
+ /* A hash table mapping fully-qualified direct dereferences, i.e.
+ * dereferences with no indirect or wildcard array dereferences, to
+ * deref_node data.
+ *
+ * At the moment, we only lower loads, stores, and copies that can be
+ * trivially lowered to loads and stores, i.e. copies with no indirects
+ * and no wildcards. If a part of a variable that is being loaded from
+ * and/or stored into is also involved in a copy operation with
+ * wildcards, then we lower that copy operation to loads and stores, but
+ * otherwise we leave copies with wildcards alone. Since the only derefs
+ * used in these loads, stores, and trivial copies are ones with no
+ * wildcards and no indirects, these are precisely the derefs that we
+ * can actually consider lowering.
+ */
+ struct exec_list direct_deref_nodes;
+
+ /* Controls whether get_deref_node will add variables to the
+ * direct_deref_nodes table. This is turned on when we are initially
+ * scanning for load/store instructions. It is then turned off so we
+ * don't accidentally change the direct_deref_nodes table while we're
+ * iterating throug it.
+ */
+ bool add_to_direct_deref_nodes;
+
-/** Pushes an SSA def onto the def stack for the given node
- *
- * Each node is potentially associated with a stack of SSA definitions.
- * This stack is used for determining what SSA definition reaches a given
- * point in the program for variable renaming. The stack is always kept in
- * dominance-order with at most one SSA def per block. If the SSA
- * definition on the top of the stack is in the same block as the one being
- * pushed, the top element is replaced.
- */
-static void
-def_stack_push(struct deref_node *node, nir_ssa_def *def,
- struct lower_variables_state *state)
-{
- if (node->def_stack == NULL) {
- node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *,
- state->impl->num_blocks);
- node->def_stack_tail = node->def_stack - 1;
- }
-
- if (node->def_stack_tail >= node->def_stack) {
- nir_ssa_def *top_def = *node->def_stack_tail;
-
- if (def->parent_instr->block == top_def->parent_instr->block) {
- /* They're in the same block, just replace the top */
- *node->def_stack_tail = def;
- return;
- }
- }
-
- *(++node->def_stack_tail) = def;
-}
-
-/* Pop the top of the def stack if it's in the given block */
-static void
-def_stack_pop_if_in_block(struct deref_node *node, nir_block *block)
-{
- /* If we're popping, then we have presumably pushed at some time in the
- * past so this should exist.
- */
- assert(node->def_stack != NULL);
-
- /* The stack is already empty. Do nothing. */
- if (node->def_stack_tail < node->def_stack)
- return;
-
- nir_ssa_def *def = *node->def_stack_tail;
- if (def->parent_instr->block == block)
- node->def_stack_tail--;
-}
-
-/** Retrieves the SSA definition on the top of the stack for the given
- * node, if one exists. If the stack is empty, then we return the constant
- * initializer (if it exists) or an SSA undef.
- */
-static nir_ssa_def *
-get_ssa_def_for_block(struct deref_node *node, nir_block *block,
- struct lower_variables_state *state)
-{
- /* If we have something on the stack, go ahead and return it. We're
- * assuming that the top of the stack dominates the given block.
- */
- if (node->def_stack && node->def_stack_tail >= node->def_stack)
- return *node->def_stack_tail;
-
- /* If we got here then we don't have a definition that dominates the
- * given block. This means that we need to add an undef and use that.
- */
- nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(state->shader,
- glsl_get_vector_elements(node->type));
- nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr);
- def_stack_push(node, &undef->def, state);
- return &undef->def;
-}
-
-/* Given a block and one of its predecessors, this function fills in the
- * souces of the phi nodes to take SSA defs from the given predecessor.
- * This function must be called exactly once per block/predecessor pair.
- */
-static void
-add_phi_sources(nir_block *block, nir_block *pred,
- struct lower_variables_state *state)
-{
- nir_foreach_instr(block, instr) {
- if (instr->type != nir_instr_type_phi)
- break;
-
- nir_phi_instr *phi = nir_instr_as_phi(instr);
-
- struct hash_entry *entry =
- _mesa_hash_table_search(state->phi_table, phi);
- if (!entry)
- continue;
-
- struct deref_node *node = entry->data;
-
- nir_phi_src *src = ralloc(phi, nir_phi_src);
- src->pred = pred;
- src->src.parent_instr = &phi->instr;
- src->src.is_ssa = true;
- src->src.ssa = get_ssa_def_for_block(node, pred, state);
-
- list_addtail(&src->src.use_link, &src->src.ssa->uses);
-
- exec_list_push_tail(&phi->srcs, &src->node);
- }
-}
-
++ struct nir_phi_builder *phi_builder;
+ };
+
+ static struct deref_node *
+ deref_node_create(struct deref_node *parent,
+ const struct glsl_type *type, nir_shader *shader)
+ {
+ size_t size = sizeof(struct deref_node) +
+ glsl_get_length(type) * sizeof(struct deref_node *);
+
+ struct deref_node *node = rzalloc_size(shader, size);
+ node->type = type;
+ node->parent = parent;
+ node->deref = NULL;
+ exec_node_init(&node->direct_derefs_link);
+
+ return node;
+ }
+
+ /* Returns the deref node associated with the given variable. This will be
+ * the root of the tree representing all of the derefs of the given variable.
+ */
+ static struct deref_node *
+ get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state)
+ {
+ struct deref_node *node;
+
+ struct hash_entry *var_entry =
+ _mesa_hash_table_search(state->deref_var_nodes, var);
+
+ if (var_entry) {
+ return var_entry->data;
+ } else {
+ node = deref_node_create(NULL, var->type, state->dead_ctx);
+ _mesa_hash_table_insert(state->deref_var_nodes, var, node);
+ return node;
+ }
+ }
+
+ /* Gets the deref_node for the given deref chain and creates it if it
+ * doesn't yet exist. If the deref is fully-qualified and direct and
+ * state->add_to_direct_deref_nodes is true, it will be added to the hash
+ * table of of fully-qualified direct derefs.
+ */
+ static struct deref_node *
+ get_deref_node(nir_deref_var *deref, struct lower_variables_state *state)
+ {
+ bool is_direct = true;
+
+ /* Start at the base of the chain. */
+ struct deref_node *node = get_deref_node_for_var(deref->var, state);
+ assert(deref->deref.type == node->type);
+
+ for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+ switch (tail->deref_type) {
+ case nir_deref_type_struct: {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+ assert(deref_struct->index < glsl_get_length(node->type));
+
+ if (node->children[deref_struct->index] == NULL)
+ node->children[deref_struct->index] =
+ deref_node_create(node, tail->type, state->dead_ctx);
+
+ node = node->children[deref_struct->index];
+ break;
+ }
+
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(tail);
+
+ switch (arr->deref_array_type) {
+ case nir_deref_array_type_direct:
+ /* This is possible if a loop unrolls and generates an
+ * out-of-bounds offset. We need to handle this at least
+ * somewhat gracefully.
+ */
+ if (arr->base_offset >= glsl_get_length(node->type))
+ return NULL;
+
+ if (node->children[arr->base_offset] == NULL)
+ node->children[arr->base_offset] =
+ deref_node_create(node, tail->type, state->dead_ctx);
+
+ node = node->children[arr->base_offset];
+ break;
+
+ case nir_deref_array_type_indirect:
+ if (node->indirect == NULL)
+ node->indirect = deref_node_create(node, tail->type,
+ state->dead_ctx);
+
+ node = node->indirect;
+ is_direct = false;
+ break;
+
+ case nir_deref_array_type_wildcard:
+ if (node->wildcard == NULL)
+ node->wildcard = deref_node_create(node, tail->type,
+ state->dead_ctx);
+
+ node = node->wildcard;
+ is_direct = false;
+ break;
+
+ default:
+ unreachable("Invalid array deref type");
+ }
+ break;
+ }
+ default:
+ unreachable("Invalid deref type");
+ }
+ }
+
+ assert(node);
+
+ /* Only insert if it isn't already in the list. */
+ if (is_direct && state->add_to_direct_deref_nodes &&
+ node->direct_derefs_link.next == NULL) {
+ node->deref = deref;
+ assert(deref->var != NULL);
+ exec_list_push_tail(&state->direct_deref_nodes,
+ &node->direct_derefs_link);
+ }
+
+ return node;
+ }
+
+ /* \sa foreach_deref_node_match */
+ static bool
+ foreach_deref_node_worker(struct deref_node *node, nir_deref *deref,
+ bool (* cb)(struct deref_node *node,
+ struct lower_variables_state *state),
+ struct lower_variables_state *state)
+ {
+ if (deref->child == NULL) {
+ return cb(node, state);
+ } else {
+ switch (deref->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(deref->child);
+ assert(arr->deref_array_type == nir_deref_array_type_direct);
+ if (node->children[arr->base_offset] &&
+ !foreach_deref_node_worker(node->children[arr->base_offset],
+ deref->child, cb, state))
+ return false;
+
+ if (node->wildcard &&
+ !foreach_deref_node_worker(node->wildcard,
+ deref->child, cb, state))
+ return false;
+
+ return true;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *str = nir_deref_as_struct(deref->child);
+ return foreach_deref_node_worker(node->children[str->index],
+ deref->child, cb, state);
+ }
+
+ default:
+ unreachable("Invalid deref child type");
+ }
+ }
+ }
+
+ /* Walks over every "matching" deref_node and calls the callback. A node
+ * is considered to "match" if either refers to that deref or matches up t
+ * a wildcard. In other words, the following would match a[6].foo[3].bar:
+ *
+ * a[6].foo[3].bar
+ * a[*].foo[3].bar
+ * a[6].foo[*].bar
+ * a[*].foo[*].bar
+ *
+ * The given deref must be a full-length and fully qualified (no wildcards
+ * or indirects) deref chain.
+ */
+ static bool
+ foreach_deref_node_match(nir_deref_var *deref,
+ bool (* cb)(struct deref_node *node,
+ struct lower_variables_state *state),
+ struct lower_variables_state *state)
+ {
+ nir_deref_var var_deref = *deref;
+ var_deref.deref.child = NULL;
+ struct deref_node *node = get_deref_node(&var_deref, state);
+
+ if (node == NULL)
+ return false;
+
+ return foreach_deref_node_worker(node, &deref->deref, cb, state);
+ }
+
+ /* \sa deref_may_be_aliased */
+ static bool
+ deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref,
+ struct lower_variables_state *state)
+ {
+ if (deref->child == NULL) {
+ return false;
+ } else {
+ switch (deref->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(deref->child);
+ if (arr->deref_array_type == nir_deref_array_type_indirect)
+ return true;
+
+ /* If there is an indirect at this level, we're aliased. */
+ if (node->indirect)
+ return true;
+
+ assert(arr->deref_array_type == nir_deref_array_type_direct);
+
+ if (node->children[arr->base_offset] &&
+ deref_may_be_aliased_node(node->children[arr->base_offset],
+ deref->child, state))
+ return true;
+
+ if (node->wildcard &&
+ deref_may_be_aliased_node(node->wildcard, deref->child, state))
+ return true;
+
+ return false;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *str = nir_deref_as_struct(deref->child);
+ if (node->children[str->index]) {
+ return deref_may_be_aliased_node(node->children[str->index],
+ deref->child, state);
+ } else {
+ return false;
+ }
+ }
+
+ default:
+ unreachable("Invalid nir_deref child type");
+ }
+ }
+ }
+
+ /* Returns true if there are no indirects that can ever touch this deref.
+ *
+ * For example, if the given deref is a[6].foo, then any uses of a[i].foo
+ * would cause this to return false, but a[i].bar would not affect it
+ * because it's a different structure member. A var_copy involving of
+ * a[*].bar also doesn't affect it because that can be lowered to entirely
+ * direct load/stores.
+ *
+ * We only support asking this question about fully-qualified derefs.
+ * Obviously, it's pointless to ask this about indirects, but we also
+ * rule-out wildcards. Handling Wildcard dereferences would involve
+ * checking each array index to make sure that there aren't any indirect
+ * references.
+ */
+ static bool
+ deref_may_be_aliased(nir_deref_var *deref,
+ struct lower_variables_state *state)
+ {
+ return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state),
+ &deref->deref, state);
+ }
+
+ static void
+ register_load_instr(nir_intrinsic_instr *load_instr,
+ struct lower_variables_state *state)
+ {
+ struct deref_node *node = get_deref_node(load_instr->variables[0], state);
+ if (node == NULL)
+ return;
+
+ if (node->loads == NULL)
+ node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ _mesa_set_add(node->loads, load_instr);
+ }
+
+ static void
+ register_store_instr(nir_intrinsic_instr *store_instr,
+ struct lower_variables_state *state)
+ {
+ struct deref_node *node = get_deref_node(store_instr->variables[0], state);
+ if (node == NULL)
+ return;
+
+ if (node->stores == NULL)
+ node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ _mesa_set_add(node->stores, store_instr);
+ }
+
+ static void
+ register_copy_instr(nir_intrinsic_instr *copy_instr,
+ struct lower_variables_state *state)
+ {
+ for (unsigned idx = 0; idx < 2; idx++) {
+ struct deref_node *node =
+ get_deref_node(copy_instr->variables[idx], state);
+
+ if (node == NULL)
+ continue;
+
+ if (node->copies == NULL)
+ node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ _mesa_set_add(node->copies, copy_instr);
+ }
+ }
+
+ /* Registers all variable uses in the given block. */
+ static bool
+ register_variable_uses_block(nir_block *block, void *void_state)
+ {
+ struct lower_variables_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var:
+ register_load_instr(intrin, state);
+ break;
+
+ case nir_intrinsic_store_var:
+ register_store_instr(intrin, state);
+ break;
+
+ case nir_intrinsic_copy_var:
+ register_copy_instr(intrin, state);
+ break;
+
+ default:
+ continue;
+ }
+ }
+
+ return true;
+ }
+
+ /* Walks over all of the copy instructions to or from the given deref_node
+ * and lowers them to load/store intrinsics.
+ */
+ static bool
+ lower_copies_to_load_store(struct deref_node *node,
+ struct lower_variables_state *state)
+ {
+ if (!node->copies)
+ return true;
+
+ struct set_entry *copy_entry;
+ set_foreach(node->copies, copy_entry) {
+ nir_intrinsic_instr *copy = (void *)copy_entry->key;
+
+ nir_lower_var_copy_instr(copy, state->shader);
+
+ for (unsigned i = 0; i < 2; ++i) {
+ struct deref_node *arg_node =
+ get_deref_node(copy->variables[i], state);
+
+ /* Only bother removing copy entries for other nodes */
+ if (arg_node == NULL || arg_node == node)
+ continue;
+
+ struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
+ assert(arg_entry);
+ _mesa_set_remove(node->copies, arg_entry);
+ }
+
+ nir_instr_remove(©->instr);
+ }
+
+ node->copies = NULL;
+
+ return true;
+ }
+
- if (instr->type == nir_instr_type_phi) {
- nir_phi_instr *phi = nir_instr_as_phi(instr);
-
- struct hash_entry *entry =
- _mesa_hash_table_search(state->phi_table, phi);
-
- /* This can happen if we already have phi nodes in the program
- * that were not created in this pass.
- */
- if (!entry)
- continue;
-
- struct deref_node *node = entry->data;
-
- def_stack_push(node, &phi->dest.ssa, state);
- } else if (instr->type == nir_instr_type_intrinsic) {
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
- switch (intrin->intrinsic) {
- case nir_intrinsic_load_var: {
- struct deref_node *node =
- get_deref_node(intrin->variables[0], state);
-
- if (node == NULL) {
- /* If we hit this path then we are referencing an invalid
- * value. Most likely, we unrolled something and are
- * reading past the end of some array. In any case, this
- * should result in an undefined value.
- */
- nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(state->shader,
- intrin->num_components);
-
- nir_instr_insert_before(&intrin->instr, &undef->instr);
- nir_instr_remove(&intrin->instr);
-
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&undef->def));
- continue;
- }
-
- if (!node->lower_to_ssa)
- continue;
-
- nir_alu_instr *mov = nir_alu_instr_create(state->shader,
- nir_op_imov);
- mov->src[0].src.is_ssa = true;
- mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
- for (unsigned i = intrin->num_components; i < 4; i++)
- mov->src[0].swizzle[i] = 0;
+ /* Performs variable renaming by doing a DFS of the dominance tree
+ *
+ * This algorithm is very similar to the one outlined in "Efficiently
+ * Computing Static Single Assignment Form and the Control Dependence
+ * Graph" by Cytron et. al. The primary difference is that we only put one
+ * SSA def on the stack per block.
+ */
+ static bool
+ rename_variables_block(nir_block *block, struct lower_variables_state *state)
+ {
+ nir_builder b;
+ nir_builder_init(&b, state->impl);
+
+ nir_foreach_instr_safe(block, instr) {
- assert(intrin->dest.is_ssa);
++ if (instr->type != nir_instr_type_intrinsic)
++ continue;
+
- mov->dest.write_mask = (1 << intrin->num_components) - 1;
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components, NULL);
++ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
- nir_instr_insert_before(&intrin->instr, &mov->instr);
++ switch (intrin->intrinsic) {
++ case nir_intrinsic_load_var: {
++ struct deref_node *node =
++ get_deref_node(intrin->variables[0], state);
++
++ if (node == NULL) {
++ /* If we hit this path then we are referencing an invalid
++ * value. Most likely, we unrolled something and are
++ * reading past the end of some array. In any case, this
++ * should result in an undefined value.
++ */
++ nir_ssa_undef_instr *undef =
++ nir_ssa_undef_instr_create(state->shader,
++ intrin->num_components);
+
- nir_src_for_ssa(&mov->dest.dest.ssa));
- break;
++ nir_instr_insert_before(&intrin->instr, &undef->instr);
+ nir_instr_remove(&intrin->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- case nir_intrinsic_store_var: {
- struct deref_node *node =
- get_deref_node(intrin->variables[0], state);
++ nir_src_for_ssa(&undef->def));
++ continue;
+ }
+
- if (node == NULL) {
- /* Probably an out-of-bounds array store. That should be a
- * no-op. */
- nir_instr_remove(&intrin->instr);
- continue;
- }
++ if (!node->lower_to_ssa)
++ continue;
+
- if (!node->lower_to_ssa)
- continue;
-
- assert(intrin->num_components ==
- glsl_get_vector_elements(node->type));
-
- assert(intrin->src[0].is_ssa);
-
- nir_ssa_def *new_def;
- b.cursor = nir_before_instr(&intrin->instr);
-
- if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
- /* Whole variable store - just copy the source. Note that
- * intrin->num_components and intrin->src[0].ssa->num_components
- * may differ.
- */
- unsigned swiz[4];
- for (unsigned i = 0; i < 4; i++)
- swiz[i] = i < intrin->num_components ? i : 0;
-
- new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
- intrin->num_components, false);
- } else {
- nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);
- /* For writemasked store_var intrinsics, we combine the newly
- * written values with the existing contents of unwritten
- * channels, creating a new SSA value for the whole vector.
- */
- nir_ssa_def *srcs[4];
- for (unsigned i = 0; i < intrin->num_components; i++) {
- if (intrin->const_index[0] & (1 << i)) {
- srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
- } else {
- srcs[i] = nir_channel(&b, old_def, i);
- }
- }
- new_def = nir_vec(&b, srcs, intrin->num_components);
- }
-
- assert(new_def->num_components == intrin->num_components);
++ nir_alu_instr *mov = nir_alu_instr_create(state->shader,
++ nir_op_imov);
++ mov->src[0].src = nir_src_for_ssa(
++ nir_phi_builder_value_get_block_def(node->pb_value, block));
++ for (unsigned i = intrin->num_components; i < 4; i++)
++ mov->src[0].swizzle[i] = 0;
+
- def_stack_push(node, new_def, state);
++ assert(intrin->dest.is_ssa);
+
- /* We'll wait to remove the instruction until the next pass
- * where we pop the node we just pushed back off the stack.
- */
- break;
- }
++ mov->dest.write_mask = (1 << intrin->num_components) - 1;
++ nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
++ intrin->num_components, NULL);
+
- default:
- break;
- }
++ nir_instr_insert_before(&intrin->instr, &mov->instr);
++ nir_instr_remove(&intrin->instr);
+
- }
-
- if (block->successors[0])
- add_phi_sources(block->successors[0], block, state);
- if (block->successors[1])
- add_phi_sources(block->successors[1], block, state);
-
- for (unsigned i = 0; i < block->num_dom_children; ++i)
- rename_variables_block(block->dom_children[i], state);
-
- /* Now we iterate over the instructions and pop off any SSA defs that we
- * pushed in the first loop.
- */
- nir_foreach_instr_safe(block, instr) {
- if (instr->type == nir_instr_type_phi) {
- nir_phi_instr *phi = nir_instr_as_phi(instr);
-
- struct hash_entry *entry =
- _mesa_hash_table_search(state->phi_table, phi);
-
- /* This can happen if we already have phi nodes in the program
- * that were not created in this pass.
- */
- if (!entry)
- continue;
-
- struct deref_node *node = entry->data;
++ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
++ nir_src_for_ssa(&mov->dest.dest.ssa));
++ break;
+ }
- def_stack_pop_if_in_block(node, block);
- } else if (instr->type == nir_instr_type_intrinsic) {
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
- if (intrin->intrinsic != nir_intrinsic_store_var)
- continue;
-
- struct deref_node *node = get_deref_node(intrin->variables[0], state);
- if (!node)
++ case nir_intrinsic_store_var: {
++ struct deref_node *node =
++ get_deref_node(intrin->variables[0], state);
+
- def_stack_pop_if_in_block(node, block);
- nir_instr_remove(&intrin->instr);
- }
- }
-
- return true;
-}
-
-/* Inserts phi nodes for all variables marked lower_to_ssa
- *
- * This is the same algorithm as presented in "Efficiently Computing Static
- * Single Assignment Form and the Control Dependence Graph" by Cytron et.
- * al.
- */
-static void
-insert_phi_nodes(struct lower_variables_state *state)
-{
- NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks);
- NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks);
-
- /*
- * Since the work flags already prevent us from inserting a node that has
- * ever been inserted into W, we don't need to use a set to represent W.
- * Also, since no block can ever be inserted into W more than once, we know
- * that the maximum size of W is the number of basic blocks in the
- * function. So all we need to handle W is an array and a pointer to the
- * next element to be inserted and the next element to be removed.
- */
- NIR_VLA(nir_block *, W, state->impl->num_blocks);
-
- unsigned w_start, w_end;
- unsigned iter_count = 0;
-
- foreach_list_typed(struct deref_node, node, direct_derefs_link,
- &state->direct_deref_nodes) {
- if (node->stores == NULL)
- continue;
++ if (node == NULL) {
++ /* Probably an out-of-bounds array store. That should be a
++ * no-op. */
++ nir_instr_remove(&intrin->instr);
+ continue;
++ }
+
+ if (!node->lower_to_ssa)
+ continue;
+
- if (!node->lower_to_ssa)
- continue;
++ assert(intrin->num_components ==
++ glsl_get_vector_elements(node->type));
+
- w_start = w_end = 0;
- iter_count++;
++ assert(intrin->src[0].is_ssa);
+
- struct set_entry *store_entry;
- set_foreach(node->stores, store_entry) {
- nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key;
- if (work[store->instr.block->index] < iter_count)
- W[w_end++] = store->instr.block;
- work[store->instr.block->index] = iter_count;
- }
-
- while (w_start != w_end) {
- nir_block *cur = W[w_start++];
- struct set_entry *dom_entry;
- set_foreach(cur->dom_frontier, dom_entry) {
- nir_block *next = (nir_block *) dom_entry->key;
-
- /*
- * If there's more than one return statement, then the end block
- * can be a join point for some definitions. However, there are
- * no instructions in the end block, so nothing would use those
- * phi nodes. Of course, we couldn't place those phi nodes
- * anyways due to the restriction of having no instructions in the
- * end block...
++ nir_ssa_def *new_def;
++ b.cursor = nir_before_instr(&intrin->instr);
+
- if (next == state->impl->end_block)
- continue;
-
- if (has_already[next->index] < iter_count) {
- nir_phi_instr *phi = nir_phi_instr_create(state->shader);
- nir_ssa_dest_init(&phi->instr, &phi->dest,
- glsl_get_vector_elements(node->type), NULL);
- nir_instr_insert_before_block(next, &phi->instr);
++ if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
++ /* Whole variable store - just copy the source. Note that
++ * intrin->num_components and intrin->src[0].ssa->num_components
++ * may differ.
+ */
- _mesa_hash_table_insert(state->phi_table, phi, node);
-
- has_already[next->index] = iter_count;
- if (work[next->index] < iter_count) {
- work[next->index] = iter_count;
- W[w_end++] = next;
++ unsigned swiz[4];
++ for (unsigned i = 0; i < 4; i++)
++ swiz[i] = i < intrin->num_components ? i : 0;
+
-}
++ new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
++ intrin->num_components, false);
++ } else {
++ nir_ssa_def *old_def =
++ nir_phi_builder_value_get_block_def(node->pb_value, block);
++ /* For writemasked store_var intrinsics, we combine the newly
++ * written values with the existing contents of unwritten
++ * channels, creating a new SSA value for the whole vector.
++ */
++ nir_ssa_def *srcs[4];
++ for (unsigned i = 0; i < intrin->num_components; i++) {
++ if (intrin->const_index[0] & (1 << i)) {
++ srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
++ } else {
++ srcs[i] = nir_channel(&b, old_def, i);
+ }
+ }
++ new_def = nir_vec(&b, srcs, intrin->num_components);
+ }
++
++ assert(new_def->num_components == intrin->num_components);
++
++ nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
++ nir_instr_remove(&intrin->instr);
++ break;
++ }
++
++ default:
++ break;
+ }
+ }
- state.phi_table = _mesa_hash_table_create(state.dead_ctx,
- _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+
++ for (unsigned i = 0; i < block->num_dom_children; ++i)
++ rename_variables_block(block->dom_children[i], state);
++
++ return true;
++}
+
+ /** Implements a pass to lower variable uses to SSA values
+ *
+ * This path walks the list of instructions and tries to lower as many
+ * local variable load/store operations to SSA defs and uses as it can.
+ * The process involves four passes:
+ *
+ * 1) Iterate over all of the instructions and mark where each local
+ * variable deref is used in a load, store, or copy. While we're at
+ * it, we keep track of all of the fully-qualified (no wildcards) and
+ * fully-direct references we see and store them in the
+ * direct_deref_nodes hash table.
+ *
+ * 2) Walk over the the list of fully-qualified direct derefs generated in
+ * the previous pass. For each deref, we determine if it can ever be
+ * aliased, i.e. if there is an indirect reference anywhere that may
+ * refer to it. If it cannot be aliased, we mark it for lowering to an
+ * SSA value. At this point, we lower any var_copy instructions that
+ * use the given deref to load/store operations and, if the deref has a
+ * constant initializer, we go ahead and add a load_const value at the
+ * beginning of the function with the initialized value.
+ *
+ * 3) Walk over the list of derefs we plan to lower to SSA values and
+ * insert phi nodes as needed.
+ *
+ * 4) Perform "variable renaming" by replacing the load/store instructions
+ * with SSA definitions and SSA uses.
+ */
+ static bool
+ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
+ {
+ struct lower_variables_state state;
+
+ state.shader = impl->function->shader;
+ state.dead_ctx = ralloc_context(state.shader);
+ state.impl = impl;
+
+ state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ exec_list_make_empty(&state.direct_deref_nodes);
- if (deref->var->constant_initializer) {
- nir_load_const_instr *load =
- nir_deref_get_const_initializer_load(state.shader, deref);
- nir_ssa_def_init(&load->instr, &load->def,
- glsl_get_vector_elements(node->type), NULL);
- nir_instr_insert_before_cf_list(&impl->body, &load->instr);
- def_stack_push(node, &load->def, &state);
- }
-
+
+ /* Build the initial deref structures and direct_deref_nodes table */
+ state.add_to_direct_deref_nodes = true;
+ nir_foreach_block(impl, register_variable_uses_block, &state);
+
+ bool progress = false;
+
+ nir_metadata_require(impl, nir_metadata_block_index);
+
+ /* We're about to iterate through direct_deref_nodes. Don't modify it. */
+ state.add_to_direct_deref_nodes = false;
+
+ foreach_list_typed_safe(struct deref_node, node, direct_derefs_link,
+ &state.direct_deref_nodes) {
+ nir_deref_var *deref = node->deref;
+
+ if (deref->var->data.mode != nir_var_local) {
+ exec_node_remove(&node->direct_derefs_link);
+ continue;
+ }
+
+ if (deref_may_be_aliased(deref, &state)) {
+ exec_node_remove(&node->direct_derefs_link);
+ continue;
+ }
+
+ node->lower_to_ssa = true;
+ progress = true;
+
- insert_phi_nodes(&state);
+ foreach_deref_node_match(deref, lower_copies_to_load_store, &state);
+ }
+
+ if (!progress)
+ return false;
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ /* We may have lowered some copy instructions to load/store
+ * instructions. The uses from the copy instructions hav already been
+ * removed but we need to rescan to ensure that the uses from the newly
+ * added load/store instructions are registered. We need this
+ * information for phi node insertion below.
+ */
+ nir_foreach_block(impl, register_variable_uses_block, &state);
+
++ state.phi_builder = nir_phi_builder_create(state.impl);
++
++ NIR_VLA(BITSET_WORD, store_blocks, BITSET_WORDS(state.impl->num_blocks));
++ foreach_list_typed(struct deref_node, node, direct_derefs_link,
++ &state.direct_deref_nodes) {
++ if (!node->lower_to_ssa)
++ continue;
++
++ memset(store_blocks, 0,
++ BITSET_WORDS(state.impl->num_blocks) * sizeof(*store_blocks));
++
++ if (node->stores) {
++ struct set_entry *store_entry;
++ set_foreach(node->stores, store_entry) {
++ nir_intrinsic_instr *store =
++ (nir_intrinsic_instr *)store_entry->key;
++ BITSET_SET(store_blocks, store->instr.block->index);
++ }
++ }
++
++ if (node->deref->var->constant_initializer)
++ BITSET_SET(store_blocks, 0);
++
++ node->pb_value =
++ nir_phi_builder_add_value(state.phi_builder,
++ glsl_get_vector_elements(node->type),
++ store_blocks);
++
++ if (node->deref->var->constant_initializer) {
++ nir_load_const_instr *load =
++ nir_deref_get_const_initializer_load(state.shader, node->deref);
++ nir_instr_insert_before_cf_list(&impl->body, &load->instr);
++ nir_phi_builder_value_set_block_def(node->pb_value,
++ nir_start_block(impl), &load->def);
++ }
++ }
++
+ rename_variables_block(nir_start_block(impl), &state);
+
++ nir_phi_builder_finish(state.phi_builder);
++
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ ralloc_free(state.dead_ctx);
+
+ return progress;
+ }
+
+ void
+ nir_lower_vars_to_ssa(nir_shader *shader)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_vars_to_ssa_impl(function->impl);
+ }
+ }
--- /dev/null
-def unop_convert(name, in_type, out_type, const_expr):
+ #! /usr/bin/env python
+ #
+ # Copyright (C) 2014 Connor Abbott
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ #
+ # Authors:
+ # Connor Abbott (cwabbott0@gmail.com)
+
+
+ # Class that represents all the information we have about the opcode
+ # NOTE: this must be kept in sync with nir_op_info
+
+ class Opcode(object):
+ """Class that represents all the information we have about the opcode
+ NOTE: this must be kept in sync with nir_op_info
+ """
+ def __init__(self, name, output_size, output_type, input_sizes,
+ input_types, algebraic_properties, const_expr):
+ """Parameters:
+
+ - name is the name of the opcode (prepend nir_op_ for the enum name)
+ - all types are strings that get nir_type_ prepended to them
+ - input_types is a list of types
+ - algebraic_properties is a space-seperated string, where nir_op_is_ is
+ prepended before each entry
+ - const_expr is an expression or series of statements that computes the
+ constant value of the opcode given the constant values of its inputs.
+
+ Constant expressions are formed from the variables src0, src1, ...,
+ src(N-1), where N is the number of arguments. The output of the
+ expression should be stored in the dst variable. Per-component input
+ and output variables will be scalars and non-per-component input and
+ output variables will be a struct with fields named x, y, z, and w
+ all of the correct type. Input and output variables can be assumed
+ to already be of the correct type and need no conversion. In
+ particular, the conversion from the C bool type to/from NIR_TRUE and
+ NIR_FALSE happens automatically.
+
+ For per-component instructions, the entire expression will be
+ executed once for each component. For non-per-component
+ instructions, the expression is expected to store the correct values
+ in dst.x, dst.y, etc. If "dst" does not exist anywhere in the
+ constant expression, an assignment to dst will happen automatically
+ and the result will be equivalent to "dst = <expression>" for
+ per-component instructions and "dst.x = dst.y = ... = <expression>"
+ for non-per-component instructions.
+ """
+ assert isinstance(name, str)
+ assert isinstance(output_size, int)
+ assert isinstance(output_type, str)
+ assert isinstance(input_sizes, list)
+ assert isinstance(input_sizes[0], int)
+ assert isinstance(input_types, list)
+ assert isinstance(input_types[0], str)
+ assert isinstance(algebraic_properties, str)
+ assert isinstance(const_expr, str)
+ assert len(input_sizes) == len(input_types)
+ assert 0 <= output_size <= 4
+ for size in input_sizes:
+ assert 0 <= size <= 4
+ if output_size != 0:
+ assert size != 0
+ self.name = name
+ self.num_inputs = len(input_sizes)
+ self.output_size = output_size
+ self.output_type = output_type
+ self.input_sizes = input_sizes
+ self.input_types = input_types
+ self.algebraic_properties = algebraic_properties
+ self.const_expr = const_expr
+
+ # helper variables for strings
+ tfloat = "float"
+ tint = "int"
+ tbool = "bool"
+ tuint = "uint"
+
+ commutative = "commutative "
+ associative = "associative "
+
+ # global dictionary of opcodes
+ opcodes = {}
+
+ def opcode(name, output_size, output_type, input_sizes, input_types,
+ algebraic_properties, const_expr):
+ assert name not in opcodes
+ opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
+ input_types, algebraic_properties, const_expr)
+
-unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
-unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
++def unop_convert(name, out_type, in_type, const_expr):
+ opcode(name, 0, out_type, [0], [in_type], "", const_expr)
+
+ def unop(name, ty, const_expr):
+ opcode(name, 0, ty, [0], [ty], "", const_expr)
+
+ def unop_horiz(name, output_size, output_type, input_size, input_type,
+ const_expr):
+ opcode(name, output_size, output_type, [input_size], [input_type], "",
+ const_expr)
+
+ def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
+ reduce_expr, final_expr):
+ def prereduce(src):
+ return "(" + prereduce_expr.format(src=src) + ")"
+ def final(src):
+ return final_expr.format(src="(" + src + ")")
+ def reduce_(src0, src1):
+ return reduce_expr.format(src0=src0, src1=src1)
+ src0 = prereduce("src0.x")
+ src1 = prereduce("src0.y")
+ src2 = prereduce("src0.z")
+ src3 = prereduce("src0.w")
+ unop_horiz(name + "2", output_size, output_type, 2, input_type,
+ final(reduce_(src0, src1)))
+ unop_horiz(name + "3", output_size, output_type, 3, input_type,
+ final(reduce_(reduce_(src0, src1), src2)))
+ unop_horiz(name + "4", output_size, output_type, 4, input_type,
+ final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+
+ # These two move instructions differ in what modifiers they support and what
+ # the negate modifier means. Otherwise, they are identical.
+ unop("fmov", tfloat, "src0")
+ unop("imov", tint, "src0")
+
+ unop("ineg", tint, "-src0")
+ unop("fneg", tfloat, "-src0")
+ unop("inot", tint, "~src0") # invert every bit of the integer
+ unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
+ unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
+ unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
+ unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
+ unop("fabs", tfloat, "fabsf(src0)")
+ unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
+ unop("frcp", tfloat, "1.0f / src0")
+ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
+ unop("fsqrt", tfloat, "sqrtf(src0)")
+ unop("fexp2", tfloat, "exp2f(src0)")
+ unop("flog2", tfloat, "log2f(src0)")
-unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
++unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
++unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
++unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
+ # Float-to-boolean conversion
-unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
++unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
+ # Boolean-to-float conversion
-unop_convert("i2b", tint, tbool, "src0 != 0")
-unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
++unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
+ # Int-to-boolean conversion
-unop_convert("ufind_msb", tuint, tint, """
++unop_convert("i2b", tbool, tint, "src0 != 0")
++unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
++unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
+
+ # Unary floating-point rounding operations.
+
+
+ unop("ftrunc", tfloat, "truncf(src0)")
+ unop("fceil", tfloat, "ceilf(src0)")
+ unop("ffloor", tfloat, "floorf(src0)")
+ unop("ffract", tfloat, "src0 - floorf(src0)")
+ unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+
++unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
+
+ # Trigonometric operations.
+
+
+ unop("fsin", tfloat, "sinf(src0)")
+ unop("fcos", tfloat, "cosf(src0)")
+
+
+ # Partial derivatives.
+
+
+ unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
+ unop("fddy", tfloat, "0.0f")
+ unop("fddx_fine", tfloat, "0.0f")
+ unop("fddy_fine", tfloat, "0.0f")
+ unop("fddx_coarse", tfloat, "0.0f")
+ unop("fddy_coarse", tfloat, "0.0f")
+
+
+ # Floating point pack and unpack operations.
+
+ def pack_2x16(fmt):
+ unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
+ dst.x = (uint32_t) pack_fmt_1x16(src0.x);
+ dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
+ """.replace("fmt", fmt))
+
+ def pack_4x8(fmt):
+ unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
+ dst.x = (uint32_t) pack_fmt_1x8(src0.x);
+ dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
+ dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
+ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
+ """.replace("fmt", fmt))
+
+ def unpack_2x16(fmt):
+ unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
+ dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
+ dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
+ """.replace("fmt", fmt))
+
+ def unpack_4x8(fmt):
+ unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
+ dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
+ dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
+ dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
+ dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24));
+ """.replace("fmt", fmt))
+
+
+ pack_2x16("snorm")
+ pack_4x8("snorm")
+ pack_2x16("unorm")
+ pack_4x8("unorm")
+ pack_2x16("half")
+ unpack_2x16("snorm")
+ unpack_4x8("snorm")
+ unpack_2x16("unorm")
+ unpack_4x8("unorm")
+ unpack_2x16("half")
+
++unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
++dst = (src0.x & 0xffff) | (src0.y >> 16);
++""")
++
++unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
++dst = (src0.x << 0) |
++ (src0.y << 8) |
++ (src0.z << 16) |
++ (src0.w << 24);
++""")
+
+ # Lowered floating point unpacking operations.
+
+
+ unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
+ "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
+ unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
+ "unpack_half_1x16((uint16_t)(src0.x >> 16))")
+
+
+ # Bit operations, part of ARB_gpu_shader5.
+
+
+ unop("bitfield_reverse", tuint, """
+ /* we're not winning any awards for speed here, but that's ok */
+ dst = 0;
+ for (unsigned bit = 0; bit < 32; bit++)
+ dst |= ((src0 >> bit) & 1) << (31 - bit);
+ """)
+ unop("bit_count", tuint, """
+ dst = 0;
+ for (unsigned bit = 0; bit < 32; bit++) {
+ if ((src0 >> bit) & 1)
+ dst++;
+ }
+ """)
+
-binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
++unop_convert("ufind_msb", tint, tuint, """
+ dst = -1;
+ for (int bit = 31; bit > 0; bit--) {
+ if ((src0 >> bit) & 1) {
+ dst = bit;
+ break;
+ }
+ }
+ """)
+
+ unop("ifind_msb", tint, """
+ dst = -1;
+ for (int bit = 31; bit >= 0; bit--) {
+ /* If src0 < 0, we're looking for the first 0 bit.
+ * if src0 >= 0, we're looking for the first 1 bit.
+ */
+ if ((((src0 >> bit) & 1) && (src0 >= 0)) ||
+ (!((src0 >> bit) & 1) && (src0 < 0))) {
+ dst = bit;
+ break;
+ }
+ }
+ """)
+
+ unop("find_lsb", tint, """
+ dst = -1;
+ for (unsigned bit = 0; bit < 32; bit++) {
+ if ((src0 >> bit) & 1) {
+ dst = bit;
+ break;
+ }
+ }
+ """)
+
+
+ for i in xrange(1, 5):
+ for j in xrange(1, 5):
+ unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
+
+ def binop_convert(name, out_type, in_type, alg_props, const_expr):
+ opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
+
+ def binop(name, ty, alg_props, const_expr):
+ binop_convert(name, ty, ty, alg_props, const_expr)
+
+ def binop_compare(name, ty, alg_props, const_expr):
+ binop_convert(name, tbool, ty, alg_props, const_expr)
+
+ def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
+ src2_type, const_expr):
+ opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type],
+ "", const_expr)
+
+ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
+ reduce_expr, final_expr):
+ def final(src):
+ return final_expr.format(src= "(" + src + ")")
+ def reduce_(src0, src1):
+ return reduce_expr.format(src0=src0, src1=src1)
+ def prereduce(src0, src1):
+ return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")"
+ src0 = prereduce("src0.x", "src1.x")
+ src1 = prereduce("src0.y", "src1.y")
+ src2 = prereduce("src0.z", "src1.z")
+ src3 = prereduce("src0.w", "src1.w")
+ opcode(name + "2", output_size, output_type,
+ [2, 2], [src_type, src_type], commutative,
+ final(reduce_(src0, src1)))
+ opcode(name + "3", output_size, output_type,
+ [3, 3], [src_type, src_type], commutative,
+ final(reduce_(reduce_(src0, src1), src2)))
+ opcode(name + "4", output_size, output_type,
+ [4, 4], [src_type, src_type], commutative,
+ final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+ binop("fadd", tfloat, commutative + associative, "src0 + src1")
+ binop("iadd", tint, commutative + associative, "src0 + src1")
+ binop("fsub", tfloat, "", "src0 - src1")
+ binop("isub", tint, "", "src0 - src1")
+
+ binop("fmul", tfloat, commutative + associative, "src0 * src1")
+ # low 32-bits of signed/unsigned integer multiply
+ binop("imul", tint, commutative + associative, "src0 * src1")
+ # high 32-bits of signed integer multiply
+ binop("imul_high", tint, commutative,
+ "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
+ # high 32-bits of unsigned integer multiply
+ binop("umul_high", tuint, commutative,
+ "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
+
+ binop("fdiv", tfloat, "", "src0 / src1")
+ binop("idiv", tint, "", "src0 / src1")
+ binop("udiv", tuint, "", "src0 / src1")
+
+ # returns a boolean representing the carry resulting from the addition of
+ # the two unsigned arguments.
+
+ binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0")
+
+ # returns a boolean representing the borrow resulting from the subtraction
+ # of the two unsigned arguments.
+
+ binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
+
+ binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
+
++# For signed integers, there are several different possible definitions of
++# "modulus" or "remainder". We follow the conventions used by LLVM and
++# SPIR-V. The irem opcode implements the standard C/C++ signed "%"
++# operation while the imod opcode implements the more mathematical
++# "modulus" operation. For details on the difference, see
++#
++# http://mathforum.org/library/drmath/view/52343.html
++
++binop("irem", tint, "", "src1 == 0 ? 0 : src0 % src1")
++binop("imod", tint, "",
++ "src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ?"
++ " src0 % src1 : src0 % src1 + src1)")
++binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
++binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / src1)")
++
+ #
+ # Comparisons
+ #
+
+
+ # these integer-aware comparisons return a boolean (0 or ~0)
+
+ binop_compare("flt", tfloat, "", "src0 < src1")
+ binop_compare("fge", tfloat, "", "src0 >= src1")
+ binop_compare("feq", tfloat, commutative, "src0 == src1")
+ binop_compare("fne", tfloat, commutative, "src0 != src1")
+ binop_compare("ilt", tint, "", "src0 < src1")
+ binop_compare("ige", tint, "", "src0 >= src1")
+ binop_compare("ieq", tint, commutative, "src0 == src1")
+ binop_compare("ine", tint, commutative, "src0 != src1")
+ binop_compare("ult", tuint, "", "src0 < src1")
+ binop_compare("uge", tuint, "", "src0 >= src1")
+
+ # integer-aware GLSL-style comparisons that compare floats and ints
+
+ binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}",
+ "{src0} && {src1}", "{src}")
+ binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+ "{src0} || {src1}", "{src}")
+ binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}",
+ "{src0} && {src1}", "{src}")
+ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+ "{src0} || {src1}", "{src}")
+
+ # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
+
+ binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}",
+ "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
+ binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+ "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
+
+ # These comparisons for integer-less hardware return 1.0 and 0.0 for true
+ # and false respectively
+
+ binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+ binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+ binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+
+
+ binop("ishl", tint, "", "src0 << src1")
+ binop("ishr", tint, "", "src0 >> src1")
+ binop("ushr", tuint, "", "src0 >> src1")
+
+ # bitwise logic operators
+ #
+ # These are also used as boolean and, or, xor for hardware supporting
+ # integers.
+
+
+ binop("iand", tuint, commutative + associative, "src0 & src1")
+ binop("ior", tuint, commutative + associative, "src0 | src1")
+ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
+
+
+ # floating point logic operators
+ #
+ # These use (src != 0.0) for testing the truth of the input, and output 1.0
+ # for true and 0.0 for false
+
+ binop("fand", tfloat, commutative,
+ "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
+ binop("for", tfloat, commutative,
+ "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
+ binop("fxor", tfloat, commutative,
+ "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
+
+ binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
+ "{src}")
+
+ binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+ "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
+ opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "",
+ "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
+ "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+
+ binop("fmin", tfloat, "", "fminf(src0, src1)")
+ binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
+ binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1")
+ binop("fmax", tfloat, "", "fmaxf(src0, src1)")
+ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
+ binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
+
+ # Saturated vector add for 4 8bit ints.
+ binop("usadd_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
+ }
+ """)
+
+ # Saturated vector subtract for 4 8bit ints.
+ binop("ussub_4x8", tint, "", """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ if (src0_chan > src1_chan)
+ dst |= (src0_chan - src1_chan) << i;
+ }
+ """)
+
+ # vector min for 4 8bit ints.
+ binop("umin_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+ }
+ """)
+
+ # vector max for 4 8bit ints.
+ binop("umax_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+ dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+ }
+ """)
+
+ # unorm multiply: (a * b) / 255.
+ binop("umul_unorm_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ dst |= ((src0_chan * src1_chan) / 255) << i;
+ }
+ """)
+
+ binop("fpow", tfloat, "", "powf(src0, src1)")
+
+ binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
+ "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
+
+ # bfm implements the behavior of the first operation of the SM5 "bfi" assembly
+ # and that of the "bfi1" i965 instruction. That is, it has undefined behavior
+ # if either of its arguments are 32.
+ binop_convert("bfm", tuint, tint, "", """
+ int bits = src0, offset = src1;
+ if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
+ dst = 0; /* undefined */
+ else
+ dst = ((1u << bits) - 1) << offset;
+ """)
+
+ opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
+ dst = ldexpf(src0, src1);
+ /* flush denormals to zero. */
+ if (!isnormal(dst))
+ dst = copysignf(0.0f, src0);
+ """)
+
+ # Combines the first component of each input to make a 2-component vector.
+
+ binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
+ dst.x = src0.x;
+ dst.y = src1.x;
+ """)
+
++# Byte extraction
++binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
++binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
++
++# Word extraction
++binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
++binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
++
++
+ def triop(name, ty, const_expr):
+ opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
+ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
+ opcode(name, output_size, tuint,
+ [src1_size, src2_size, src3_size],
+ [tuint, tuint, tuint], "", const_expr)
+
+ triop("ffma", tfloat, "src0 * src1 + src2")
+
+ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+
+ # Conditional Select
+ #
+ # A vector conditional select instruction (like ?:, but operating per-
+ # component on vectors). There are two versions, one for floating point
+ # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
+
+
+ triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+ opcode("bcsel", 0, tuint, [0, 0, 0],
+ [tbool, tuint, tuint], "", "src0 ? src1 : src2")
+
+ # SM5 bfi assembly
+ triop("bfi", tuint, """
+ unsigned mask = src0, insert = src1, base = src2;
+ if (mask == 0) {
+ dst = base;
+ } else {
+ unsigned tmp = mask;
+ while (!(tmp & 1)) {
+ tmp >>= 1;
+ insert <<= 1;
+ }
+ dst = (base & ~mask) | (insert & mask);
+ }
+ """)
+
+ # SM5 ubfe/ibfe assembly
+ opcode("ubfe", 0, tuint,
+ [0, 0, 0], [tuint, tint, tint], "", """
+ unsigned base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+ dst = 0;
+ } else if (bits < 0 || offset < 0) {
+ dst = 0; /* undefined */
+ } else if (offset + bits < 32) {
+ dst = (base << (32 - bits - offset)) >> (32 - bits);
+ } else {
+ dst = base >> offset;
+ }
+ """)
+ opcode("ibfe", 0, tint,
+ [0, 0, 0], [tint, tint, tint], "", """
+ int base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+ dst = 0;
+ } else if (bits < 0 || offset < 0) {
+ dst = 0; /* undefined */
+ } else if (offset + bits < 32) {
+ dst = (base << (32 - bits - offset)) >> (32 - bits);
+ } else {
+ dst = base >> offset;
+ }
+ """)
+
+ # GLSL bitfieldExtract()
+ opcode("ubitfield_extract", 0, tuint,
+ [0, 0, 0], [tuint, tint, tint], "", """
+ unsigned base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+ dst = 0;
+ } else if (bits < 0 || offset < 0 || offset + bits > 32) {
+ dst = 0; /* undefined per the spec */
+ } else {
+ dst = (base >> offset) & ((1ull << bits) - 1);
+ }
+ """)
+ opcode("ibitfield_extract", 0, tint,
+ [0, 0, 0], [tint, tint, tint], "", """
+ int base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+ dst = 0;
+ } else if (offset < 0 || bits < 0 || offset + bits > 32) {
+ dst = 0;
+ } else {
+ dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */
+ }
+ """)
+
+ # Combines the first component of each input to make a 3-component vector.
+
+ triop_horiz("vec3", 3, 1, 1, 1, """
+ dst.x = src0.x;
+ dst.y = src1.x;
+ dst.z = src2.x;
+ """)
+
+ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
+ src4_size, const_expr):
+ opcode(name, output_size, tuint,
+ [src1_size, src2_size, src3_size, src4_size],
+ [tuint, tuint, tuint, tuint],
+ "", const_expr)
+
+ opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
+ [tuint, tuint, tint, tint], "", """
+ unsigned base = src0, insert = src1;
+ int offset = src2, bits = src3;
+ if (bits == 0) {
+ dst = 0;
+ } else if (offset < 0 || bits < 0 || bits + offset > 32) {
+ dst = 0;
+ } else {
+ unsigned mask = ((1ull << bits) - 1) << offset;
+ dst = (base & ~mask) | ((insert << bits) & mask);
+ }
+ """)
+
+ quadop_horiz("vec4", 4, 1, 1, 1, 1, """
+ dst.x = src0.x;
+ dst.y = src1.x;
+ dst.z = src2.x;
+ dst.w = src3.x;
+ """)
+
+
--- /dev/null
+ #! /usr/bin/env python
++# -*- encoding: utf-8 -*-
+ #
+ # Copyright (C) 2014 Intel Corporation
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ #
+ # Authors:
+ # Jason Ekstrand (jason@jlekstrand.net)
+
+ import nir_algebraic
+
+ # Convenience variables
+ a = 'a'
+ b = 'b'
+ c = 'c'
+ d = 'd'
+
+ # Written in the form (<search>, <replace>) where <search> is an expression
+ # and <replace> is either an expression or a value. An expression is
+ # defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+ # where each source is either an expression or a value. A value can be
+ # either a numeric constant or a string representing a variable name.
+ #
+ # Variable names are specified as "[#]name[@type]" where "#" inicates that
+ # the given variable will only match constants and the type indicates that
+ # the given variable will only match values from ALU instructions with the
+ # given output type.
+ #
+ # For constants, you have to be careful to make sure that it is the right
+ # type because python is unaware of the source and destination types of the
+ # opcodes.
+
+ optimizations = [
+ (('fneg', ('fneg', a)), a),
+ (('ineg', ('ineg', a)), a),
+ (('fabs', ('fabs', a)), ('fabs', a)),
+ (('fabs', ('fneg', a)), ('fabs', a)),
+ (('iabs', ('iabs', a)), ('iabs', a)),
+ (('iabs', ('ineg', a)), ('iabs', a)),
+ (('fadd', a, 0.0), a),
+ (('iadd', a, 0), a),
+ (('usadd_4x8', a, 0), a),
+ (('usadd_4x8', a, ~0), ~0),
+ (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+ (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
+ (('fadd', ('fneg', a), a), 0.0),
+ (('iadd', ('ineg', a), a), 0),
+ (('iadd', ('ineg', a), ('iadd', a, b)), b),
+ (('iadd', a, ('iadd', ('ineg', a), b)), b),
+ (('fadd', ('fneg', a), ('fadd', a, b)), b),
+ (('fadd', a, ('fadd', ('fneg', a), b)), b),
+ (('fmul', a, 0.0), 0.0),
+ (('imul', a, 0), 0),
+ (('umul_unorm_4x8', a, 0), 0),
+ (('umul_unorm_4x8', a, ~0), a),
+ (('fmul', a, 1.0), a),
+ (('imul', a, 1), a),
+ (('fmul', a, -1.0), ('fneg', a)),
+ (('imul', a, -1), ('ineg', a)),
++ (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+ (('ffma', 0.0, a, b), b),
+ (('ffma', a, 0.0, b), b),
+ (('ffma', a, b, 0.0), ('fmul', a, b)),
+ (('ffma', a, 1.0, b), ('fadd', a, b)),
+ (('ffma', 1.0, a, b), ('fadd', a, b)),
+ (('flrp', a, b, 0.0), a),
+ (('flrp', a, b, 1.0), b),
+ (('flrp', a, a, b), a),
+ (('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
+ (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+ (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+ # Comparison simplifications
+ (('inot', ('flt', a, b)), ('fge', a, b)),
+ (('inot', ('fge', a, b)), ('flt', a, b)),
+ (('inot', ('feq', a, b)), ('fne', a, b)),
+ (('inot', ('fne', a, b)), ('feq', a, b)),
+ (('inot', ('ilt', a, b)), ('ige', a, b)),
+ (('inot', ('ige', a, b)), ('ilt', a, b)),
+ (('inot', ('ieq', a, b)), ('ine', a, b)),
+ (('inot', ('ine', a, b)), ('ieq', a, b)),
+ (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
+ (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+ (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
+ (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
+ (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
+ (('fmin', a, a), a),
+ (('fmax', a, a), a),
+ (('imin', a, a), a),
+ (('imax', a, a), a),
+ (('umin', a, a), a),
+ (('umax', a, a), a),
+ (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+ (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
+ (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
+ (('fsat', ('fsat', a)), ('fsat', a)),
+ (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
+ (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+ (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+ (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+ (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+ (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+ (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+ (('fne', ('fneg', a), a), ('fne', a, 0.0)),
+ (('feq', ('fneg', a), a), ('feq', a, 0.0)),
+ # Emulating booleans
+ (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
+ (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+ (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+ (('iand', 'a@bool', 1.0), ('b2f', a)),
+ (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ # Comparison with the same args. Note that these are not done for
+ # the float versions because NaN always returns false on float
+ # inequalities.
+ (('ilt', a, a), False),
+ (('ige', a, a), True),
+ (('ieq', a, a), True),
+ (('ine', a, a), False),
+ (('ult', a, a), False),
+ (('uge', a, a), True),
+ # Logical and bit operations
+ (('fand', a, 0.0), 0.0),
+ (('iand', a, a), a),
+ (('iand', a, ~0), a),
+ (('iand', a, 0), 0),
+ (('ior', a, a), a),
+ (('ior', a, 0), a),
+ (('fxor', a, a), 0.0),
+ (('ixor', a, a), 0),
+ (('inot', ('inot', a)), a),
+ # DeMorgan's Laws
+ (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))),
+ (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
+ # Shift optimizations
+ (('ishl', 0, a), 0),
+ (('ishl', a, 0), a),
+ (('ishr', 0, a), 0),
+ (('ishr', a, 0), a),
+ (('ushr', 0, a), 0),
+ (('ushr', a, 0), a),
+ # Exponential/logarithmic identities
+ (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+ (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+ (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
+ (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+ (('fpow', a, 1.0), a),
+ (('fpow', a, 2.0), ('fmul', a, a)),
+ (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
+ (('fpow', 2.0, a), ('fexp2', a)),
+ (('fpow', ('fpow', a, 2.2), 0.454545), a),
+ (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
+ (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ # Division and reciprocal
+ (('fdiv', 1.0, a), ('frcp', a)),
+ (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+ (('frcp', ('frcp', a)), a),
+ (('frcp', ('fsqrt', a)), ('frsq', a)),
+ (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
+ (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+ # Boolean simplifications
+ (('ieq', 'a@bool', True), a),
+ (('ine', 'a@bool', True), ('inot', a)),
+ (('ine', 'a@bool', False), a),
+ (('ieq', 'a@bool', False), ('inot', 'a')),
+ (('bcsel', a, True, False), ('ine', a, 0)),
+ (('bcsel', a, False, True), ('ieq', a, 0)),
+ (('bcsel', True, b, c), b),
+ (('bcsel', False, b, c), c),
+ # The result of this should be hit by constant propagation and, in the
+ # next round of opt_algebraic, get picked up by one of the above two.
+ (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
+
+ (('bcsel', a, b, b), b),
+ (('fcsel', a, b, b), b),
+
+ # Conversions
+ (('i2b', ('b2i', a)), a),
+ (('f2i', ('ftrunc', a)), ('f2i', a)),
+ (('f2u', ('ftrunc', a)), ('f2u', a)),
+
+ # Subtracts
+ (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+ (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+ (('ussub_4x8', a, 0), a),
+ (('ussub_4x8', a, ~0), 0),
+ (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+ (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
+ (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
+ (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
+ (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+ (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
+ (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
+ (('iabs', ('isub', 0, a)), ('iabs', a)),
+
+ # Misc. lowering
+ (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
++ (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),
+ (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
+ (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
++ (('ldexp', 'x', 'exp'),
++ ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))),
+
+ (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
+ ('bcsel', ('ilt', 31, 'bits'), 'insert',
+ ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
+ 'options->lower_bitfield_insert'),
+
+ (('ibitfield_extract', 'value', 'offset', 'bits'),
+ ('bcsel', ('ilt', 31, 'bits'), 'value',
+ ('ibfe', 'value', 'offset', 'bits')),
+ 'options->lower_bitfield_extract'),
+
+ (('ubitfield_extract', 'value', 'offset', 'bits'),
+ ('bcsel', ('ult', 31, 'bits'), 'value',
+ ('ubfe', 'value', 'offset', 'bits')),
+ 'options->lower_bitfield_extract'),
++
++ (('extract_ibyte', a, b),
++ ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
++ 'options->lower_extract_byte'),
++
++ (('extract_ubyte', a, b),
++ ('iand', ('ushr', a, ('imul', b, 8)), 0xff),
++ 'options->lower_extract_byte'),
++
++ (('extract_iword', a, b),
++ ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
++ 'options->lower_extract_word'),
++
++ (('extract_uword', a, b),
++ ('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
++ 'options->lower_extract_word'),
++
++ (('pack_unorm_2x16', 'v'),
++ ('pack_uvec2_to_uint',
++ ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
++ 'options->lower_pack_unorm_2x16'),
++
++ (('pack_unorm_4x8', 'v'),
++ ('pack_uvec4_to_uint',
++ ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
++ 'options->lower_pack_unorm_4x8'),
++
++ (('pack_snorm_2x16', 'v'),
++ ('pack_uvec2_to_uint',
++ ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
++ 'options->lower_pack_snorm_2x16'),
++
++ (('pack_snorm_4x8', 'v'),
++ ('pack_uvec4_to_uint',
++ ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
++ 'options->lower_pack_snorm_4x8'),
++
++ (('unpack_unorm_2x16', 'v'),
++ ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
++ ('extract_uword', 'v', 1), 0, 0)),
++ 65535.0),
++ 'options->lower_unpack_unorm_2x16'),
++
++ (('unpack_unorm_4x8', 'v'),
++ ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
++ ('extract_ubyte', 'v', 1),
++ ('extract_ubyte', 'v', 2),
++ ('extract_ubyte', 'v', 3))),
++ 255.0),
++ 'options->lower_unpack_unorm_4x8'),
++
++ (('unpack_snorm_2x16', 'v'),
++ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
++ ('extract_iword', 'v', 1), 0, 0)),
++ 32767.0))),
++ 'options->lower_unpack_snorm_2x16'),
++
++ (('unpack_snorm_4x8', 'v'),
++ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
++ ('extract_ibyte', 'v', 1),
++ ('extract_ibyte', 'v', 2),
++ ('extract_ibyte', 'v', 3))),
++ 127.0))),
++ 'options->lower_unpack_snorm_4x8'),
+ ]
+
+ # Add optimizations to handle the case where the result of a ternary is
+ # compared to a constant. This way we can take things like
+ #
+ # (a ? 0 : 1) > 0
+ #
+ # and turn it into
+ #
+ # a ? (0 > 0) : (1 > 0)
+ #
+ # which constant folding will eat for lunch. The resulting ternary will
+ # further get cleaned up by the boolean reductions above and we will be
+ # left with just the original variable "a".
+ for op in ['flt', 'fge', 'feq', 'fne',
+ 'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
+ optimizations += [
+ ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
+ ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
+ ((op, '#d', ('bcsel', a, '#b', '#c')),
+ ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
+ ]
+
+ # This section contains "late" optimizations that should be run after the
+ # regular optimizations have finished. Optimizations should go here if
+ # they help code generation but do not necessarily produce code that is
+ # more easily optimizable.
+ late_optimizations = [
+ (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+ (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+ (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+ (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+ (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+ (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
+ ]
+
+ print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+ print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+ late_optimizations).render()
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir_phi_builder.h"
++#include "nir/nir_vla.h"
++
++struct nir_phi_builder {
++ nir_shader *shader;
++ nir_function_impl *impl;
++
++ /* Copied from the impl for easy access */
++ unsigned num_blocks;
++
++ /* Array of all blocks indexed by block->index. */
++ nir_block **blocks;
++
++ /* Hold on to the values so we can easily iterate over them. */
++ struct exec_list values;
++
++ /* Worklist for phi adding */
++ unsigned iter_count;
++ unsigned *work;
++ nir_block **W;
++};
++
++#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1)
++
++struct nir_phi_builder_value {
++ struct exec_node node;
++
++ struct nir_phi_builder *builder;
++
++ /* Needed so we can create phis and undefs */
++ unsigned num_components;
++
++ /* The list of phi nodes associated with this value. Phi nodes are not
++ * added directly. Instead, they are created, the instr->block pointer
++ * set, and then added to this list. Later, in phi_builder_finish, we
++ * set up their sources and add them to the top of their respective
++ * blocks.
++ */
++ struct exec_list phis;
++
++ /* Array of SSA defs, indexed by block. If a phi needs to be inserted
++ * in a given block, it will have the magic value NEEDS_PHI.
++ */
++ nir_ssa_def *defs[0];
++};
++
++static bool
++fill_block_array(nir_block *block, void *void_data)
++{
++ nir_block **blocks = void_data;
++ blocks[block->index] = block;
++ return true;
++}
++
++struct nir_phi_builder *
++nir_phi_builder_create(nir_function_impl *impl)
++{
++ struct nir_phi_builder *pb = ralloc(NULL, struct nir_phi_builder);
++
++ pb->shader = impl->function->shader;
++ pb->impl = impl;
++
++ assert(impl->valid_metadata & (nir_metadata_block_index |
++ nir_metadata_dominance));
++
++ pb->num_blocks = impl->num_blocks;
++ pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks);
++ nir_foreach_block(impl, fill_block_array, pb->blocks);
++
++ exec_list_make_empty(&pb->values);
++
++ pb->iter_count = 0;
++ pb->work = rzalloc_array(pb, unsigned, pb->num_blocks);
++ pb->W = ralloc_array(pb, nir_block *, pb->num_blocks);
++
++ return pb;
++}
++
++struct nir_phi_builder_value *
++nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
++ const BITSET_WORD *defs)
++{
++ struct nir_phi_builder_value *val;
++ unsigned i, w_start = 0, w_end = 0;
++
++ val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
++ val->builder = pb;
++ val->num_components = num_components;
++ exec_list_make_empty(&val->phis);
++ exec_list_push_tail(&pb->values, &val->node);
++
++ pb->iter_count++;
++
++ BITSET_WORD tmp;
++ BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) {
++ if (pb->work[i] < pb->iter_count)
++ pb->W[w_end++] = pb->blocks[i];
++ pb->work[i] = pb->iter_count;
++ }
++
++ while (w_start != w_end) {
++ nir_block *cur = pb->W[w_start++];
++ struct set_entry *dom_entry;
++ set_foreach(cur->dom_frontier, dom_entry) {
++ nir_block *next = (nir_block *) dom_entry->key;
++
++ /*
++ * If there's more than one return statement, then the end block
++ * can be a join point for some definitions. However, there are
++ * no instructions in the end block, so nothing would use those
++ * phi nodes. Of course, we couldn't place those phi nodes
++ * anyways due to the restriction of having no instructions in the
++ * end block...
++ */
++ if (next == pb->impl->end_block)
++ continue;
++
++ if (val->defs[next->index] == NULL) {
++ val->defs[next->index] = NEEDS_PHI;
++
++ if (pb->work[next->index] < pb->iter_count) {
++ pb->work[next->index] = pb->iter_count;
++ pb->W[w_end++] = next;
++ }
++ }
++ }
++ }
++
++ return val;
++}
++
++void
++nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
++ nir_block *block, nir_ssa_def *def)
++{
++ val->defs[block->index] = def;
++}
++
++nir_ssa_def *
++nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
++ nir_block *block)
++{
++ if (val->defs[block->index] == NULL) {
++ if (block->imm_dom) {
++ /* Grab it from our immediate dominator. We'll stash it here for
++ * easy access later.
++ */
++ val->defs[block->index] =
++ nir_phi_builder_value_get_block_def(val, block->imm_dom);
++ return val->defs[block->index];
++ } else {
++ /* No immediate dominator means that this block is either the
++ * start block or unreachable. In either case, the value is
++ * undefined so we need an SSA undef.
++ */
++ nir_ssa_undef_instr *undef =
++ nir_ssa_undef_instr_create(val->builder->shader,
++ val->num_components);
++ nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
++ &undef->instr);
++ val->defs[block->index] = &undef->def;
++ return &undef->def;
++ }
++ } else if (val->defs[block->index] == NEEDS_PHI) {
++ /* If we need a phi instruction, go ahead and create one but don't
++ * add it to the program yet. Later, we'll go through and set up phi
++ * sources and add the instructions will be added at that time.
++ */
++ nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
++ nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL);
++ phi->instr.block = block;
++ exec_list_push_tail(&val->phis, &phi->instr.node);
++ val->defs[block->index] = &phi->dest.ssa;
++ return &phi->dest.ssa;
++ } else {
++ return val->defs[block->index];
++ }
++}
++
++static int
++compare_blocks(const void *_a, const void *_b)
++{
++ nir_block * const * a = _a;
++ nir_block * const * b = _b;
++
++ return (*a)->index - (*b)->index;
++}
++
++void
++nir_phi_builder_finish(struct nir_phi_builder *pb)
++{
++ const unsigned num_blocks = pb->num_blocks;
++ NIR_VLA(nir_block *, preds, num_blocks);
++
++ foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
++ /* We can't iterate over the list of phis normally because we are
++ * removing them as we go and, in some cases, adding new phis as we
++ * build the source lists of others.
++ */
++ while (!exec_list_is_empty(&val->phis)) {
++ struct exec_node *head = exec_list_get_head(&val->phis);
++ nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node);
++ assert(phi->instr.type == nir_instr_type_phi);
++
++ exec_node_remove(&phi->instr.node);
++
++ /* Construct an array of predecessors. We sort it to ensure
++ * determinism in the phi insertion algorithm.
++ *
++ * XXX: Calling qsort this many times seems expensive.
++ */
++ int num_preds = 0;
++ struct set_entry *entry;
++ set_foreach(phi->instr.block->predecessors, entry)
++ preds[num_preds++] = (nir_block *)entry->key;
++ qsort(preds, num_preds, sizeof(*preds), compare_blocks);
++
++ for (unsigned i = 0; i < num_preds; i++) {
++ nir_phi_src *src = ralloc(phi, nir_phi_src);
++ src->pred = preds[i];
++ src->src = nir_src_for_ssa(
++ nir_phi_builder_value_get_block_def(val, preds[i]));
++ exec_list_push_tail(&phi->srcs, &src->node);
++ }
++
++ nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr);
++ }
++ }
++
++ ralloc_free(pb);
++}
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#pragma once
++
++#include "nir.h"
++
++struct nir_phi_builder;
++struct nir_phi_builder_value;
++
++/* Create a new phi builder.
++ *
++ * While this is fairly cheap, it does allocate some memory and walk the list
++ * of blocks so it's recommended that you only call it once and use it to
++ * build phis for several values.
++ */
++struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl);
++
++/* Register a value with the builder.
++ *
++ * The 'defs' parameter specifies a bitset of blocks in which the given value
++ * is defined. This is used to determine where to place the phi nodes.
++ */
++struct nir_phi_builder_value *
++nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
++ const BITSET_WORD *defs);
++
++/* Register a definition for the given value and block.
++ *
++ * It is safe to call this function as many times as you wish for any given
++ * block/value pair. However, it always replaces whatever was there
++ * previously even if that definition is from a phi node. The phi builder
++ * always uses the latest information it has, so you must be careful about the
++ * order in which you register definitions. The final value at the end of the
++ * block must be the last value registered.
++ */
++void
++nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
++ nir_block *block, nir_ssa_def *def);
++
++/* Get the definition for the given value in the given block.
++ *
++ * This definition will always be the latest definition known for the given
++ * block. If no definition is immediately available, it will crawl up the
++ * dominance tree and insert phi nodes as needed until it finds one. In the
++ * case that no suitable definition is found, it will return the result of a
++ * nir_ssa_undef_instr with the correct number of components.
++ *
++ * Because this function only uses the latest available information for any
++ * given block, you must have already finished registering definitions for any
++ * blocks that dominate the current block in order to get the correct result.
++ */
++nir_ssa_def *
++nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
++ nir_block *block);
++
++/* Finish building phi nodes and free the builder.
++ *
++ * This function does far more than just free memory. Prior to calling
++ * nir_phi_builder_finish, no phi nodes have actually been inserted in the
++ * program. This function is what finishes setting up phi node sources and
++ * adds the phi nodes to the program.
++ */
++void nir_phi_builder_finish(struct nir_phi_builder *pb);
--- /dev/null
- "uniform ", "shader_storage", "system " };
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir.h"
+ #include "compiler/shader_enums.h"
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ static void
+ print_tabs(unsigned num_tabs, FILE *fp)
+ {
+ for (unsigned i = 0; i < num_tabs; i++)
+ fprintf(fp, "\t");
+ }
+
+ typedef struct {
+ FILE *fp;
+ nir_shader *shader;
+ /** map from nir_variable -> printable name */
+ struct hash_table *ht;
+
+ /** set of names used so far for nir_variables */
+ struct set *syms;
+
+ /* an index used to make new non-conflicting names */
+ unsigned index;
+ } print_state;
+
+ static void
+ print_register(nir_register *reg, print_state *state)
+ {
+ FILE *fp = state->fp;
+ if (reg->name != NULL)
+ fprintf(fp, "/* %s */ ", reg->name);
+ if (reg->is_global)
+ fprintf(fp, "gr%u", reg->index);
+ else
+ fprintf(fp, "r%u", reg->index);
+ }
+
+ static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
+
+ static void
+ print_register_decl(nir_register *reg, print_state *state)
+ {
+ FILE *fp = state->fp;
+ fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
+ if (reg->is_packed)
+ fprintf(fp, "(packed) ");
+ print_register(reg, state);
+ if (reg->num_array_elems != 0)
+ fprintf(fp, "[%u]", reg->num_array_elems);
+ fprintf(fp, "\n");
+ }
+
+ static void
+ print_ssa_def(nir_ssa_def *def, print_state *state)
+ {
+ FILE *fp = state->fp;
+ if (def->name != NULL)
+ fprintf(fp, "/* %s */ ", def->name);
+ fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
+ }
+
+ static void
+ print_ssa_use(nir_ssa_def *def, print_state *state)
+ {
+ FILE *fp = state->fp;
+ if (def->name != NULL)
+ fprintf(fp, "/* %s */ ", def->name);
+ fprintf(fp, "ssa_%u", def->index);
+ }
+
+ static void print_src(nir_src *src, print_state *state);
+
+ static void
+ print_reg_src(nir_reg_src *src, print_state *state)
+ {
+ FILE *fp = state->fp;
+ print_register(src->reg, state);
+ if (src->reg->num_array_elems != 0) {
+ fprintf(fp, "[%u", src->base_offset);
+ if (src->indirect != NULL) {
+ fprintf(fp, " + ");
+ print_src(src->indirect, state);
+ }
+ fprintf(fp, "]");
+ }
+ }
+
+ static void
+ print_reg_dest(nir_reg_dest *dest, print_state *state)
+ {
+ FILE *fp = state->fp;
+ print_register(dest->reg, state);
+ if (dest->reg->num_array_elems != 0) {
+ fprintf(fp, "[%u", dest->base_offset);
+ if (dest->indirect != NULL) {
+ fprintf(fp, " + ");
+ print_src(dest->indirect, state);
+ }
+ fprintf(fp, "]");
+ }
+ }
+
+ static void
+ print_src(nir_src *src, print_state *state)
+ {
+ if (src->is_ssa)
+ print_ssa_use(src->ssa, state);
+ else
+ print_reg_src(&src->reg, state);
+ }
+
+ static void
+ print_dest(nir_dest *dest, print_state *state)
+ {
+ if (dest->is_ssa)
+ print_ssa_def(&dest->ssa, state);
+ else
+ print_reg_dest(&dest->reg, state);
+ }
+
+ static void
+ print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ if (instr->src[src].negate)
+ fprintf(fp, "-");
+ if (instr->src[src].abs)
+ fprintf(fp, "abs(");
+
+ print_src(&instr->src[src].src, state);
+
+ bool print_swizzle = false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ if (instr->src[src].swizzle[i] != i) {
+ print_swizzle = true;
+ break;
+ }
+ }
+
+ if (print_swizzle) {
+ fprintf(fp, ".");
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+ }
+ }
+
+ if (instr->src[src].abs)
+ fprintf(fp, ")");
+ }
+
+ static void
+ print_alu_dest(nir_alu_dest *dest, print_state *state)
+ {
+ FILE *fp = state->fp;
+ /* we're going to print the saturate modifier later, after the opcode */
+
+ print_dest(&dest->dest, state);
+
+ if (!dest->dest.is_ssa &&
+ dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
+ fprintf(fp, ".");
+ for (unsigned i = 0; i < 4; i++)
+ if ((dest->write_mask >> i) & 1)
+ fprintf(fp, "%c", "xyzw"[i]);
+ }
+ }
+
+ static void
+ print_alu_instr(nir_alu_instr *instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ print_alu_dest(&instr->dest, state);
+
+ fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+ if (instr->dest.saturate)
+ fprintf(fp, ".sat");
+ fprintf(fp, " ");
+
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_alu_src(instr, i, state);
+ }
+ }
+
++static const char *
++get_var_name(nir_variable *var, print_state *state)
++{
++ if (state->ht == NULL)
++ return var->name;
++
++ assert(state->syms);
++
++ struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
++ if (entry)
++ return entry->data;
++
++ char *name;
++ if (var->name == NULL) {
++ name = ralloc_asprintf(state->syms, "@%u", state->index++);
++ } else {
++ struct set_entry *set_entry = _mesa_set_search(state->syms, var->name);
++ if (set_entry != NULL) {
++ /* we have a collision with another name, append an @ + a unique
++ * index */
++ name = ralloc_asprintf(state->syms, "%s@%u", var->name,
++ state->index++);
++ } else {
++ /* Mark this one as seen */
++ _mesa_set_add(state->syms, var->name);
++ name = var->name;
++ }
++ }
++
++ _mesa_hash_table_insert(state->ht, var, name);
++
++ return name;
++}
++
+ static void
+ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state)
+ {
+ FILE *fp = state->fp;
+ unsigned total_elems = glsl_get_components(type);
+ unsigned i;
+
+ switch (glsl_get_base_type(type)) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ for (i = 0; i < total_elems; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "0x%08x", c->value.u[i]);
+ }
+ break;
+
+ case GLSL_TYPE_FLOAT:
+ for (i = 0; i < total_elems; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "%f", c->value.f[i]);
+ }
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ for (i = 0; i < c->num_elements; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "{ ");
+ print_constant(c->elements[i], glsl_get_struct_field(type, i), state);
+ fprintf(fp, " }");
+ }
+ break;
+
+ case GLSL_TYPE_ARRAY:
+ for (i = 0; i < c->num_elements; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "{ ");
+ print_constant(c->elements[i], glsl_get_array_element(type), state);
+ fprintf(fp, " }");
+ }
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+ }
+
+ static void
+ print_var_decl(nir_variable *var, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ fprintf(fp, "decl_var ");
+
+ const char *const cent = (var->data.centroid) ? "centroid " : "";
+ const char *const samp = (var->data.sample) ? "sample " : "";
+ const char *const patch = (var->data.patch) ? "patch " : "";
+ const char *const inv = (var->data.invariant) ? "invariant " : "";
+ const char *const mode[] = { "shader_in ", "shader_out ", "", "",
- struct set_entry *entry = NULL;
- if (state->syms)
- entry = _mesa_set_search(state->syms, var->name);
-
- char *name;
-
- if (entry != NULL) {
- /* we have a collision with another name, append an @ + a unique index */
- name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++);
- } else {
- name = var->name;
- }
-
- fprintf(fp, " %s", name);
++ "uniform ", "shader_storage ", "shared ",
++ "system "};
+
+ fprintf(fp, "%s%s%s%s%s%s ",
+ cent, samp, patch, inv, mode[var->data.mode],
+ glsl_interp_qualifier_name(var->data.interpolation));
+
+ glsl_print_type(var->type, fp);
+
-
- if (state->syms) {
- _mesa_set_add(state->syms, name);
- _mesa_hash_table_insert(state->ht, var, name);
- }
++ fprintf(fp, " %s", get_var_name(var, state));
+
+ if (var->data.mode == nir_var_shader_in ||
+ var->data.mode == nir_var_shader_out ||
+ var->data.mode == nir_var_uniform ||
+ var->data.mode == nir_var_shader_storage) {
+ const char *loc = NULL;
+ char buf[4];
+
+ switch (state->shader->stage) {
+ case MESA_SHADER_VERTEX:
+ if (var->data.mode == nir_var_shader_in)
+ loc = gl_vert_attrib_name(var->data.location);
+ else if (var->data.mode == nir_var_shader_out)
+ loc = gl_varying_slot_name(var->data.location);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if ((var->data.mode == nir_var_shader_in) ||
+ (var->data.mode == nir_var_shader_out))
+ loc = gl_varying_slot_name(var->data.location);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.mode == nir_var_shader_in)
+ loc = gl_varying_slot_name(var->data.location);
+ else if (var->data.mode == nir_var_shader_out)
+ loc = gl_frag_result_name(var->data.location);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_COMPUTE:
+ default:
+ /* TODO */
+ break;
+ }
+
+ if (!loc) {
+ snprintf(buf, sizeof(buf), "%u", var->data.location);
+ loc = buf;
+ }
+
+ fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
+ }
+
+ if (var->constant_initializer) {
+ fprintf(fp, " = { ");
+ print_constant(var->constant_initializer, var->type, state);
+ fprintf(fp, " }");
+ }
+
+ fprintf(fp, "\n");
- const char *name;
- if (state->ht) {
- struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
-
- assert(entry != NULL);
- name = entry->data;
- } else {
- name = var->name;
- }
-
- fprintf(fp, "%s", name);
+ }
+
+ static void
+ print_var(nir_variable *var, print_state *state)
+ {
+ FILE *fp = state->fp;
- fprintf(fp, "%u", instr->sampler_index);
++ fprintf(fp, "%s", get_var_name(var, state));
+ }
+
+ static void
+ print_deref_var(nir_deref_var *deref, print_state *state)
+ {
+ print_var(deref->var, state);
+ }
+
+ static void
+ print_deref_array(nir_deref_array *deref, print_state *state)
+ {
+ FILE *fp = state->fp;
+ fprintf(fp, "[");
+ switch (deref->deref_array_type) {
+ case nir_deref_array_type_direct:
+ fprintf(fp, "%u", deref->base_offset);
+ break;
+ case nir_deref_array_type_indirect:
+ if (deref->base_offset != 0)
+ fprintf(fp, "%u + ", deref->base_offset);
+ print_src(&deref->indirect, state);
+ break;
+ case nir_deref_array_type_wildcard:
+ fprintf(fp, "*");
+ break;
+ }
+ fprintf(fp, "]");
+ }
+
+ static void
+ print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
+ print_state *state)
+ {
+ FILE *fp = state->fp;
+ fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
+ }
+
+ static void
+ print_deref(nir_deref_var *deref, print_state *state)
+ {
+ nir_deref *tail = &deref->deref;
+ nir_deref *pretail = NULL;
+ while (tail != NULL) {
+ switch (tail->deref_type) {
+ case nir_deref_type_var:
+ assert(pretail == NULL);
+ assert(tail == &deref->deref);
+ print_deref_var(deref, state);
+ break;
+
+ case nir_deref_type_array:
+ assert(pretail != NULL);
+ print_deref_array(nir_deref_as_array(tail), state);
+ break;
+
+ case nir_deref_type_struct:
+ assert(pretail != NULL);
+ print_deref_struct(nir_deref_as_struct(tail),
+ pretail->type, state);
+ break;
+
+ default:
+ unreachable("Invalid deref type");
+ }
+
+ pretail = tail;
+ tail = pretail->child;
+ }
+ }
+
+ static void
+ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
+ {
+ unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+ FILE *fp = state->fp;
+
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+ print_dest(&instr->dest, state);
+ fprintf(fp, " = ");
+ }
+
+ fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);
+
+ for (unsigned i = 0; i < num_srcs; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_src(&instr->src[i], state);
+ }
+
+ fprintf(fp, ") (");
+
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+
+ for (unsigned i = 0; i < num_vars; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_deref(instr->variables[i], state);
+ }
+
+ fprintf(fp, ") (");
+
+ unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;
+
+ for (unsigned i = 0; i < num_indices; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ fprintf(fp, "%d", instr->const_index[i]);
+ }
+
+ fprintf(fp, ")");
+
+ if (!state->shader)
+ return;
+
+ struct exec_list *var_list = NULL;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_uniform:
+ var_list = &state->shader->uniforms;
+ break;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_per_vertex_input:
+ var_list = &state->shader->inputs;
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ var_list = &state->shader->outputs;
+ break;
+ default:
+ return;
+ }
+
+ nir_foreach_variable(var, var_list) {
+ if ((var->data.driver_location == instr->const_index[0]) &&
+ var->name) {
+ fprintf(fp, "\t/* %s */", var->name);
+ break;
+ }
+ }
+ }
+
+ static void
+ print_tex_instr(nir_tex_instr *instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ print_dest(&instr->dest, state);
+
+ fprintf(fp, " = ");
+
+ switch (instr->op) {
+ case nir_texop_tex:
+ fprintf(fp, "tex ");
+ break;
+ case nir_texop_txb:
+ fprintf(fp, "txb ");
+ break;
+ case nir_texop_txl:
+ fprintf(fp, "txl ");
+ break;
+ case nir_texop_txd:
+ fprintf(fp, "txd ");
+ break;
+ case nir_texop_txf:
+ fprintf(fp, "txf ");
+ break;
+ case nir_texop_txf_ms:
+ fprintf(fp, "txf_ms ");
+ break;
+ case nir_texop_txs:
+ fprintf(fp, "txs ");
+ break;
+ case nir_texop_lod:
+ fprintf(fp, "lod ");
+ break;
+ case nir_texop_tg4:
+ fprintf(fp, "tg4 ");
+ break;
+ case nir_texop_query_levels:
+ fprintf(fp, "query_levels ");
+ break;
+ case nir_texop_texture_samples:
+ fprintf(fp, "texture_samples ");
+ break;
+ case nir_texop_samples_identical:
+ fprintf(fp, "samples_identical ");
+ break;
+ default:
+ unreachable("Invalid texture operation");
+ break;
+ }
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ print_src(&instr->src[i].src, state);
+
+ fprintf(fp, " ");
+
+ switch(instr->src[i].src_type) {
+ case nir_tex_src_coord:
+ fprintf(fp, "(coord)");
+ break;
+ case nir_tex_src_projector:
+ fprintf(fp, "(projector)");
+ break;
+ case nir_tex_src_comparitor:
+ fprintf(fp, "(comparitor)");
+ break;
+ case nir_tex_src_offset:
+ fprintf(fp, "(offset)");
+ break;
+ case nir_tex_src_bias:
+ fprintf(fp, "(bias)");
+ break;
+ case nir_tex_src_lod:
+ fprintf(fp, "(lod)");
+ break;
+ case nir_tex_src_ms_index:
+ fprintf(fp, "(ms_index)");
+ break;
+ case nir_tex_src_ddx:
+ fprintf(fp, "(ddx)");
+ break;
+ case nir_tex_src_ddy:
+ fprintf(fp, "(ddy)");
+ break;
++ case nir_tex_src_texture_offset:
++ fprintf(fp, "(texture_offset)");
++ break;
+ case nir_tex_src_sampler_offset:
+ fprintf(fp, "(sampler_offset)");
+ break;
+
+ default:
+ unreachable("Invalid texture source type");
+ break;
+ }
+
+ fprintf(fp, ", ");
+ }
+
+ bool has_nonzero_offset = false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (instr->const_offset[i] != 0) {
+ has_nonzero_offset = true;
+ break;
+ }
+ }
+
+ if (has_nonzero_offset) {
+ fprintf(fp, "[%i %i %i %i] (offset), ",
+ instr->const_offset[0], instr->const_offset[1],
+ instr->const_offset[2], instr->const_offset[3]);
+ }
+
+ if (instr->op == nir_texop_tg4) {
+ fprintf(fp, "%u (gather_component), ", instr->component);
+ }
+
++ if (instr->texture) {
++ assert(instr->sampler);
++ fprintf(fp, " (texture)");
++ }
+ if (instr->sampler) {
+ print_deref(instr->sampler, state);
++ fprintf(fp, " (sampler)");
+ } else {
-
- fprintf(fp, " (sampler)");
++ assert(instr->texture == NULL);
++ fprintf(fp, "%u (texture) %u (sampler)",
++ instr->texture_index, instr->sampler_index);
+ }
+ }
+
+ static void
+ print_call_instr(nir_call_instr *instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ fprintf(fp, "call %s ", instr->callee->name);
+
+ for (unsigned i = 0; i < instr->num_params; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_deref(instr->params[i], state);
+ }
+
+ if (instr->return_deref != NULL) {
+ if (instr->num_params != 0)
+ fprintf(fp, ", ");
+ fprintf(fp, "returning ");
+ print_deref(instr->return_deref, state);
+ }
+ }
+
+ static void
+ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ print_ssa_def(&instr->def, state);
+
+ fprintf(fp, " = load_const (");
+
+ for (unsigned i = 0; i < instr->def.num_components; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ /*
+ * we don't really know the type of the constant (if it will be used as a
+ * float or an int), so just print the raw constant in hex for fidelity
+ * and then print the float in a comment for readability.
+ */
+
+ fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+ }
+
+ fprintf(fp, ")");
+ }
+
+ static void
+ print_jump_instr(nir_jump_instr *instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ switch (instr->type) {
+ case nir_jump_break:
+ fprintf(fp, "break");
+ break;
+
+ case nir_jump_continue:
+ fprintf(fp, "continue");
+ break;
+
+ case nir_jump_return:
+ fprintf(fp, "return");
+ break;
+ }
+ }
+
+ static void
+ print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+ print_ssa_def(&instr->def, state);
+ fprintf(fp, " = undefined");
+ }
+
+ static void
+ print_phi_instr(nir_phi_instr *instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+ print_dest(&instr->dest, state);
+ fprintf(fp, " = phi ");
+ nir_foreach_phi_src(instr, src) {
+ if (&src->node != exec_list_get_head(&instr->srcs))
+ fprintf(fp, ", ");
+
+ fprintf(fp, "block_%u: ", src->pred->index);
+ print_src(&src->src, state);
+ }
+ }
+
+ static void
+ print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state)
+ {
+ FILE *fp = state->fp;
+ nir_foreach_parallel_copy_entry(instr, entry) {
+ if (&entry->node != exec_list_get_head(&instr->entries))
+ fprintf(fp, "; ");
+
+ print_dest(&entry->dest, state);
+ fprintf(fp, " = ");
+ print_src(&entry->src, state);
+ }
+ }
+
+ static void
+ print_instr(const nir_instr *instr, print_state *state, unsigned tabs)
+ {
+ FILE *fp = state->fp;
+ print_tabs(tabs, fp);
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ print_alu_instr(nir_instr_as_alu(instr), state);
+ break;
+
+ case nir_instr_type_call:
+ print_call_instr(nir_instr_as_call(instr), state);
+ break;
+
+ case nir_instr_type_intrinsic:
+ print_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+ break;
+
+ case nir_instr_type_tex:
+ print_tex_instr(nir_instr_as_tex(instr), state);
+ break;
+
+ case nir_instr_type_load_const:
+ print_load_const_instr(nir_instr_as_load_const(instr), state);
+ break;
+
+ case nir_instr_type_jump:
+ print_jump_instr(nir_instr_as_jump(instr), state);
+ break;
+
+ case nir_instr_type_ssa_undef:
+ print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+ break;
+
+ case nir_instr_type_phi:
+ print_phi_instr(nir_instr_as_phi(instr), state);
+ break;
+
+ case nir_instr_type_parallel_copy:
+ print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state);
+ break;
+
+ default:
+ unreachable("Invalid instruction type");
+ break;
+ }
+ }
+
+ static int
+ compare_block_index(const void *p1, const void *p2)
+ {
+ const nir_block *block1 = *((const nir_block **) p1);
+ const nir_block *block2 = *((const nir_block **) p2);
+
+ return (int) block1->index - (int) block2->index;
+ }
+
+ static void print_cf_node(nir_cf_node *node, print_state *state,
+ unsigned tabs);
+
+ static void
+ print_block(nir_block *block, print_state *state, unsigned tabs)
+ {
+ FILE *fp = state->fp;
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "block block_%u:\n", block->index);
+
+ /* sort the predecessors by index so we consistently print the same thing */
+
+ nir_block **preds =
+ malloc(block->predecessors->entries * sizeof(nir_block *));
+
+ struct set_entry *entry;
+ unsigned i = 0;
+ set_foreach(block->predecessors, entry) {
+ preds[i++] = (nir_block *) entry->key;
+ }
+
+ qsort(preds, block->predecessors->entries, sizeof(nir_block *),
+ compare_block_index);
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "/* preds: ");
+ for (unsigned i = 0; i < block->predecessors->entries; i++) {
+ fprintf(fp, "block_%u ", preds[i]->index);
+ }
+ fprintf(fp, "*/\n");
+
+ free(preds);
+
+ nir_foreach_instr(block, instr) {
+ print_instr(instr, state, tabs);
+ fprintf(fp, "\n");
+ }
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "/* succs: ");
+ for (unsigned i = 0; i < 2; i++)
+ if (block->successors[i]) {
+ fprintf(fp, "block_%u ", block->successors[i]->index);
+ }
+ fprintf(fp, "*/\n");
+ }
+
+ static void
+ print_if(nir_if *if_stmt, print_state *state, unsigned tabs)
+ {
+ FILE *fp = state->fp;
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "if ");
+ print_src(&if_stmt->condition, state);
+ fprintf(fp, " {\n");
+ foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
+ print_cf_node(node, state, tabs + 1);
+ }
+ print_tabs(tabs, fp);
+ fprintf(fp, "} else {\n");
+ foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
+ print_cf_node(node, state, tabs + 1);
+ }
+ print_tabs(tabs, fp);
+ fprintf(fp, "}\n");
+ }
+
+ static void
+ print_loop(nir_loop *loop, print_state *state, unsigned tabs)
+ {
+ FILE *fp = state->fp;
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "loop {\n");
+ foreach_list_typed(nir_cf_node, node, node, &loop->body) {
+ print_cf_node(node, state, tabs + 1);
+ }
+ print_tabs(tabs, fp);
+ fprintf(fp, "}\n");
+ }
+
+ static void
+ print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs)
+ {
+ switch (node->type) {
+ case nir_cf_node_block:
+ print_block(nir_cf_node_as_block(node), state, tabs);
+ break;
+
+ case nir_cf_node_if:
+ print_if(nir_cf_node_as_if(node), state, tabs);
+ break;
+
+ case nir_cf_node_loop:
+ print_loop(nir_cf_node_as_loop(node), state, tabs);
+ break;
+
+ default:
+ unreachable("Invalid CFG node type");
+ }
+ }
+
+ static void
+ print_function_impl(nir_function_impl *impl, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ fprintf(fp, "\nimpl %s ", impl->function->name);
+
+ for (unsigned i = 0; i < impl->num_params; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_var(impl->params[i], state);
+ }
+
+ if (impl->return_var != NULL) {
+ if (impl->num_params != 0)
+ fprintf(fp, ", ");
+ fprintf(fp, "returning ");
+ print_var(impl->return_var, state);
+ }
+
+ fprintf(fp, "{\n");
+
+ nir_foreach_variable(var, &impl->locals) {
+ fprintf(fp, "\t");
+ print_var_decl(var, state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ fprintf(fp, "\t");
+ print_register_decl(reg, state);
+ }
+
+ nir_index_blocks(impl);
+
+ foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+ print_cf_node(node, state, 1);
+ }
+
+ fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
+ }
+
+ static void
+ print_function(nir_function *function, print_state *state)
+ {
+ FILE *fp = state->fp;
+
+ fprintf(fp, "decl_function %s ", function->name);
+
+ for (unsigned i = 0; i < function->num_params; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ switch (function->params[i].param_type) {
+ case nir_parameter_in:
+ fprintf(fp, "in ");
+ break;
+ case nir_parameter_out:
+ fprintf(fp, "out ");
+ break;
+ case nir_parameter_inout:
+ fprintf(fp, "inout ");
+ break;
+ default:
+ unreachable("Invalid parameter type");
+ }
+
+ glsl_print_type(function->params[i].type, fp);
+ }
+
+ if (function->return_type != NULL) {
+ if (function->num_params != 0)
+ fprintf(fp, ", ");
+ fprintf(fp, "returning ");
+ glsl_print_type(function->return_type, fp);
+ }
+
+ fprintf(fp, "\n");
+
+ if (function->impl != NULL) {
+ print_function_impl(function->impl, state);
+ return;
+ }
+ }
+
+ static void
+ init_print_state(print_state *state, nir_shader *shader, FILE *fp)
+ {
+ state->fp = fp;
+ state->shader = shader;
+ state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
+ _mesa_key_string_equal);
+ state->index = 0;
+ }
+
+ static void
+ destroy_print_state(print_state *state)
+ {
+ _mesa_hash_table_destroy(state->ht, NULL);
+ _mesa_set_destroy(state->syms, NULL);
+ }
+
+ void
+ nir_print_shader(nir_shader *shader, FILE *fp)
+ {
+ print_state state;
+ init_print_state(&state, shader, fp);
+
+ fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
+
+ if (shader->info.name)
+ fprintf(fp, "name: %s\n", shader->info.name);
+
+ if (shader->info.label)
+ fprintf(fp, "label: %s\n", shader->info.label);
+
+ fprintf(fp, "inputs: %u\n", shader->num_inputs);
+ fprintf(fp, "outputs: %u\n", shader->num_outputs);
+ fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
++ fprintf(fp, "shared: %u\n", shader->num_shared);
+
+ nir_foreach_variable(var, &shader->uniforms) {
+ print_var_decl(var, &state);
+ }
+
+ nir_foreach_variable(var, &shader->inputs) {
+ print_var_decl(var, &state);
+ }
+
+ nir_foreach_variable(var, &shader->outputs) {
+ print_var_decl(var, &state);
+ }
+
++ nir_foreach_variable(var, &shader->shared) {
++ print_var_decl(var, &state);
++ }
++
+ nir_foreach_variable(var, &shader->globals) {
+ print_var_decl(var, &state);
+ }
+
+ nir_foreach_variable(var, &shader->system_values) {
+ print_var_decl(var, &state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ print_register_decl(reg, &state);
+ }
+
+ foreach_list_typed(nir_function, func, node, &shader->functions) {
+ print_function(func, &state);
+ }
+
+ destroy_print_state(&state);
+ }
+
+ void
+ nir_print_instr(const nir_instr *instr, FILE *fp)
+ {
+ print_state state = {
+ .fp = fp,
+ };
+ print_instr(instr, &state, 0);
+
+ }
--- /dev/null
-nir_remove_dead_variables(nir_shader *shader)
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir.h"
+
+ static void
+ add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live)
+ {
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++) {
+ nir_variable *var = instr->variables[i]->var;
+ _mesa_set_add(live, var);
+ }
+ }
+
+ static void
+ add_var_use_call(nir_call_instr *instr, struct set *live)
+ {
+ if (instr->return_deref != NULL) {
+ nir_variable *var = instr->return_deref->var;
+ _mesa_set_add(live, var);
+ }
+
+ for (unsigned i = 0; i < instr->num_params; i++) {
+ nir_variable *var = instr->params[i]->var;
+ _mesa_set_add(live, var);
+ }
+ }
+
+ static void
+ add_var_use_tex(nir_tex_instr *instr, struct set *live)
+ {
+ if (instr->sampler != NULL) {
+ nir_variable *var = instr->sampler->var;
+ _mesa_set_add(live, var);
+ }
+ }
+
+ static bool
+ add_var_use_block(nir_block *block, void *state)
+ {
+ struct set *live = state;
+
+ nir_foreach_instr(block, instr) {
+ switch(instr->type) {
+ case nir_instr_type_intrinsic:
+ add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live);
+ break;
+
+ case nir_instr_type_call:
+ add_var_use_call(nir_instr_as_call(instr), live);
+ break;
+
+ case nir_instr_type_tex:
+ add_var_use_tex(nir_instr_as_tex(instr), live);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ static void
+ add_var_use_shader(nir_shader *shader, struct set *live)
+ {
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_foreach_block(function->impl, add_var_use_block, live);
+ }
+ }
+ }
+
+ static bool
+ remove_dead_vars(struct exec_list *var_list, struct set *live)
+ {
+ bool progress = false;
+
+ foreach_list_typed_safe(nir_variable, var, node, var_list) {
+ struct set_entry *entry = _mesa_set_search(live, var);
+ if (entry == NULL) {
+ exec_node_remove(&var->node);
+ ralloc_free(var);
+ progress = true;
+ }
+ }
+
+ return progress;
+ }
+
+ bool
- progress = remove_dead_vars(&shader->globals, live) || progress;
++nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode)
+ {
+ bool progress = false;
+ struct set *live =
+ _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+ add_var_use_shader(shader, live);
+
- nir_foreach_function(shader, function) {
- if (function->impl) {
- if (remove_dead_vars(&function->impl->locals, live)) {
- nir_metadata_preserve(function->impl, nir_metadata_block_index |
- nir_metadata_dominance |
- nir_metadata_live_ssa_defs);
- progress = true;
++ if (mode == nir_var_uniform || mode == nir_var_all)
++ progress = remove_dead_vars(&shader->uniforms, live) || progress;
+
++ if (mode == nir_var_shader_in || mode == nir_var_all)
++ progress = remove_dead_vars(&shader->inputs, live) || progress;
++
++ if (mode == nir_var_shader_out || mode == nir_var_all)
++ progress = remove_dead_vars(&shader->outputs, live) || progress;
++
++ if (mode == nir_var_global || mode == nir_var_all)
++ progress = remove_dead_vars(&shader->globals, live) || progress;
++
++ if (mode == nir_var_system_value || mode == nir_var_all)
++ progress = remove_dead_vars(&shader->system_values, live) || progress;
++
++ if (mode == nir_var_local || mode == nir_var_all) {
++ nir_foreach_function(shader, function) {
++ if (function->impl) {
++ if (remove_dead_vars(&function->impl->locals, live)) {
++ nir_metadata_preserve(function->impl, nir_metadata_block_index |
++ nir_metadata_dominance |
++ nir_metadata_live_ssa_defs);
++ progress = true;
++ }
+ }
+ }
+ }
+
+ _mesa_set_destroy(live, NULL);
+ return progress;
+ }
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_phi_builder.h"
++
++struct repair_ssa_state {
++ nir_function_impl *impl;
++
++ BITSET_WORD *def_set;
++ struct nir_phi_builder *phi_builder;
++
++ bool progress;
++};
++
++/* Get ready to build a phi and return the builder */
++static struct nir_phi_builder *
++prep_build_phi(struct repair_ssa_state *state)
++{
++ const unsigned num_words = BITSET_WORDS(state->impl->num_blocks);
++
++ /* We create the phi builder on-demand. */
++ if (state->phi_builder == NULL) {
++ state->phi_builder = nir_phi_builder_create(state->impl);
++ state->def_set = ralloc_array(NULL, BITSET_WORD, num_words);
++ }
++
++ /* We're going to build a phi. That's progress. */
++ state->progress = true;
++
++ /* Set the defs set to empty */
++ memset(state->def_set, 0, num_words * sizeof(*state->def_set));
++
++ return state->phi_builder;
++}
++
++static nir_block *
++get_src_block(nir_src *src)
++{
++ if (src->parent_instr->type == nir_instr_type_phi) {
++ return exec_node_data(nir_phi_src, src, src)->pred;
++ } else {
++ return src->parent_instr->block;
++ }
++}
++
++static bool
++repair_ssa_def(nir_ssa_def *def, void *void_state)
++{
++ struct repair_ssa_state *state = void_state;
++
++ bool is_valid = true;
++ nir_foreach_use(def, src) {
++ if (!nir_block_dominates(def->parent_instr->block, get_src_block(src))) {
++ is_valid = false;
++ break;
++ }
++ }
++
++ if (is_valid)
++ return true;
++
++ struct nir_phi_builder *pb = prep_build_phi(state);
++
++ BITSET_SET(state->def_set, def->parent_instr->block->index);
++
++ struct nir_phi_builder_value *val =
++ nir_phi_builder_add_value(pb, def->num_components, state->def_set);
++
++ nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def);
++
++ nir_foreach_use_safe(def, src) {
++ nir_block *src_block = get_src_block(src);
++ if (!nir_block_dominates(def->parent_instr->block, src_block)) {
++ nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(
++ nir_phi_builder_value_get_block_def(val, src_block)));
++ }
++ }
++
++ return true;
++}
++
++static bool
++repair_ssa_block(nir_block *block, void *state)
++{
++ nir_foreach_instr_safe(block, instr) {
++ nir_foreach_ssa_def(instr, repair_ssa_def, state);
++ }
++
++ return true;
++}
++
++bool
++nir_repair_ssa_impl(nir_function_impl *impl)
++{
++ struct repair_ssa_state state;
++
++ state.impl = impl;
++ state.phi_builder = NULL;
++ state.progress = false;
++
++ nir_metadata_require(impl, nir_metadata_block_index |
++ nir_metadata_dominance);
++
++ nir_foreach_block(impl, repair_ssa_block, &state);
++
++ if (state.progress)
++ nir_metadata_preserve(impl, nir_metadata_block_index |
++ nir_metadata_dominance);
++
++ if (state.phi_builder) {
++ nir_phi_builder_finish(state.phi_builder);
++ ralloc_free(state.def_set);
++ }
++
++ return state.progress;
++}
++
++/** This pass can be used to repair SSA form in a shader.
++ *
++ * Sometimes a transformation (such as return lowering) will have to make
++ * changes to a shader which, while still correct, break some of NIR's SSA
++ * invariants. This pass will insert ssa_undefs and phi nodes as needed to
++ * get the shader back into SSA that the validator will like.
++ */
++bool
++nir_repair_ssa(nir_shader *shader)
++{
++ bool progress = false;
++
++ nir_foreach_function(shader, function) {
++ if (function->impl)
++ progress = nir_repair_ssa_impl(function->impl) || progress;
++ }
++
++ return progress;
++}
--- /dev/null
+ /*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+ #include "nir.h"
+
+ /**
+ * \file nir_sweep.c
+ *
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+ * memory - anything still connected to the program will be kept, and any dead memory
+ * we dropped on the floor will be freed.
+ *
+ * The expectation is that drivers should call this when finished compiling the shader
+ * (after any optimization, lowering, and so on). However, it's also fine to call it
+ * earlier, and even many times, trading CPU cycles for memory savings.
+ */
+
+ #define steal_list(mem_ctx, type, list) \
+ foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+
+ static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+
+ static bool
+ sweep_src_indirect(nir_src *src, void *nir)
+ {
+ if (!src->is_ssa && src->reg.indirect)
+ ralloc_steal(nir, src->reg.indirect);
+
+ return true;
+ }
+
+ static bool
+ sweep_dest_indirect(nir_dest *dest, void *nir)
+ {
+ if (!dest->is_ssa && dest->reg.indirect)
+ ralloc_steal(nir, dest->reg.indirect);
+
+ return true;
+ }
+
+ static void
+ sweep_block(nir_shader *nir, nir_block *block)
+ {
+ ralloc_steal(nir, block);
+
+ nir_foreach_instr(block, instr) {
+ ralloc_steal(nir, instr);
+
+ nir_foreach_src(instr, sweep_src_indirect, nir);
+ nir_foreach_dest(instr, sweep_dest_indirect, nir);
+ }
+ }
+
+ static void
+ sweep_if(nir_shader *nir, nir_if *iff)
+ {
+ ralloc_steal(nir, iff);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+ }
+
+ static void
+ sweep_loop(nir_shader *nir, nir_loop *loop)
+ {
+ ralloc_steal(nir, loop);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+ }
+
+ static void
+ sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+ {
+ switch (cf_node->type) {
+ case nir_cf_node_block:
+ sweep_block(nir, nir_cf_node_as_block(cf_node));
+ break;
+ case nir_cf_node_if:
+ sweep_if(nir, nir_cf_node_as_if(cf_node));
+ break;
+ case nir_cf_node_loop:
+ sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+ break;
+ default:
+ unreachable("Invalid CF node type");
+ }
+ }
+
+ static void
+ sweep_impl(nir_shader *nir, nir_function_impl *impl)
+ {
+ ralloc_steal(nir, impl);
+
+ ralloc_steal(nir, impl->params);
+ ralloc_steal(nir, impl->return_var);
+ steal_list(nir, nir_variable, &impl->locals);
+ steal_list(nir, nir_register, &impl->registers);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ sweep_block(nir, impl->end_block);
+
+ /* Wipe out all the metadata, if any. */
+ nir_metadata_preserve(impl, nir_metadata_none);
+ }
+
+ static void
+ sweep_function(nir_shader *nir, nir_function *f)
+ {
+ ralloc_steal(nir, f);
+ ralloc_steal(nir, f->params);
+
+ if (f->impl)
+ sweep_impl(nir, f->impl);
+ }
+
+ void
+ nir_sweep(nir_shader *nir)
+ {
+ void *rubbish = ralloc_context(NULL);
+
+ /* First, move ownership of all the memory to a temporary context; assume dead. */
+ ralloc_adopt(rubbish, nir);
+
+ ralloc_steal(nir, (char *)nir->info.name);
+ if (nir->info.label)
+ ralloc_steal(nir, (char *)nir->info.label);
+
+ /* Variables and registers are not dead. Steal them back. */
+ steal_list(nir, nir_variable, &nir->uniforms);
+ steal_list(nir, nir_variable, &nir->inputs);
+ steal_list(nir, nir_variable, &nir->outputs);
++ steal_list(nir, nir_variable, &nir->shared);
+ steal_list(nir, nir_variable, &nir->globals);
+ steal_list(nir, nir_variable, &nir->system_values);
+ steal_list(nir, nir_register, &nir->registers);
+
+ /* Recurse into functions, stealing their contents back. */
+ foreach_list_typed(nir_function, func, node, &nir->functions) {
+ sweep_function(nir, func);
+ }
+
+ /* Free everything we didn't steal back. */
+ ralloc_free(rubbish);
+ }
--- /dev/null
- if (instr->return_deref == NULL)
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir.h"
+ #include <assert.h>
+
+ /*
+ * This file checks for invalid IR indicating a bug somewhere in the compiler.
+ */
+
+ /* Since this file is just a pile of asserts, don't bother compiling it if
+ * we're not building a debug build.
+ */
+ #ifdef DEBUG
+
+ /*
+ * Per-register validation state.
+ */
+
+ typedef struct {
+ /*
+ * equivalent to the uses and defs in nir_register, but built up by the
+ * validator. At the end, we verify that the sets have the same entries.
+ */
+ struct set *uses, *if_uses, *defs;
+ nir_function_impl *where_defined; /* NULL for global registers */
+ } reg_validate_state;
+
+ typedef struct {
+ /*
+ * equivalent to the uses in nir_ssa_def, but built up by the validator.
+ * At the end, we verify that the sets have the same entries.
+ */
+ struct set *uses, *if_uses;
+ nir_function_impl *where_defined;
+ } ssa_def_validate_state;
+
+ typedef struct {
+ /* map of register -> validation state (struct above) */
+ struct hash_table *regs;
+
+ /* the current shader being validated */
+ nir_shader *shader;
+
+ /* the current instruction being validated */
+ nir_instr *instr;
+
+ /* the current basic block being validated */
+ nir_block *block;
+
+ /* the current if statement being validated */
+ nir_if *if_stmt;
+
+ /* the current loop being visited */
+ nir_loop *loop;
+
+ /* the parent of the current cf node being visited */
+ nir_cf_node *parent_node;
+
+ /* the current function implementation being validated */
+ nir_function_impl *impl;
+
+ /* map of SSA value -> function implementation where it is defined */
+ struct hash_table *ssa_defs;
+
+ /* bitset of ssa definitions we have found; used to check uniqueness */
+ BITSET_WORD *ssa_defs_found;
+
+ /* bitset of registers we have currently found; used to check uniqueness */
+ BITSET_WORD *regs_found;
+
+ /* map of local variable -> function implementation where it is defined */
+ struct hash_table *var_defs;
+ } validate_state;
+
+ static void validate_src(nir_src *src, validate_state *state);
+
+ static void
+ validate_reg_src(nir_src *src, validate_state *state)
+ {
+ assert(src->reg.reg != NULL);
+
+ struct hash_entry *entry;
+ entry = _mesa_hash_table_search(state->regs, src->reg.reg);
+ assert(entry);
+
+ reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+ if (state->instr) {
+ _mesa_set_add(reg_state->uses, src);
+ } else {
+ assert(state->if_stmt);
+ _mesa_set_add(reg_state->if_uses, src);
+ }
+
+ if (!src->reg.reg->is_global) {
+ assert(reg_state->where_defined == state->impl &&
+ "using a register declared in a different function");
+ }
+
+ assert((src->reg.reg->num_array_elems == 0 ||
+ src->reg.base_offset < src->reg.reg->num_array_elems) &&
+ "definitely out-of-bounds array access");
+
+ if (src->reg.indirect) {
+ assert(src->reg.reg->num_array_elems != 0);
+ assert((src->reg.indirect->is_ssa ||
+ src->reg.indirect->reg.indirect == NULL) &&
+ "only one level of indirection allowed");
+ validate_src(src->reg.indirect, state);
+ }
+ }
+
+ static void
+ validate_ssa_src(nir_src *src, validate_state *state)
+ {
+ assert(src->ssa != NULL);
+
+ struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa);
+
+ assert(entry);
+
+ ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+ assert(def_state->where_defined == state->impl &&
+ "using an SSA value defined in a different function");
+
+ if (state->instr) {
+ _mesa_set_add(def_state->uses, src);
+ } else {
+ assert(state->if_stmt);
+ _mesa_set_add(def_state->if_uses, src);
+ }
+
+ /* TODO validate that the use is dominated by the definition */
+ }
+
+ static void
+ validate_src(nir_src *src, validate_state *state)
+ {
+ if (state->instr)
+ assert(src->parent_instr == state->instr);
+ else
+ assert(src->parent_if == state->if_stmt);
+
+ if (src->is_ssa)
+ validate_ssa_src(src, state);
+ else
+ validate_reg_src(src, state);
+ }
+
+ static void
+ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
+ {
+ nir_alu_src *src = &instr->src[index];
+
+ unsigned num_components;
+ if (src->src.is_ssa)
+ num_components = src->src.ssa->num_components;
+ else {
+ if (src->src.reg.reg->is_packed)
+ num_components = 4; /* can't check anything */
+ else
+ num_components = src->src.reg.reg->num_components;
+ }
+ for (unsigned i = 0; i < 4; i++) {
+ assert(src->swizzle[i] < 4);
+
+ if (nir_alu_instr_channel_used(instr, index, i))
+ assert(src->swizzle[i] < num_components);
+ }
+
+ validate_src(&src->src, state);
+ }
+
+ static void
+ validate_reg_dest(nir_reg_dest *dest, validate_state *state)
+ {
+ assert(dest->reg != NULL);
+
+ assert(dest->parent_instr == state->instr);
+
+ struct hash_entry *entry2;
+ entry2 = _mesa_hash_table_search(state->regs, dest->reg);
+
+ assert(entry2);
+
+ reg_validate_state *reg_state = (reg_validate_state *) entry2->data;
+ _mesa_set_add(reg_state->defs, dest);
+
+ if (!dest->reg->is_global) {
+ assert(reg_state->where_defined == state->impl &&
+ "writing to a register declared in a different function");
+ }
+
+ assert((dest->reg->num_array_elems == 0 ||
+ dest->base_offset < dest->reg->num_array_elems) &&
+ "definitely out-of-bounds array access");
+
+ if (dest->indirect) {
+ assert(dest->reg->num_array_elems != 0);
+ assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) &&
+ "only one level of indirection allowed");
+ validate_src(dest->indirect, state);
+ }
+ }
+
+ static void
+ validate_ssa_def(nir_ssa_def *def, validate_state *state)
+ {
+ assert(def->index < state->impl->ssa_alloc);
+ assert(!BITSET_TEST(state->ssa_defs_found, def->index));
+ BITSET_SET(state->ssa_defs_found, def->index);
+
+ assert(def->parent_instr == state->instr);
+
+ assert(def->num_components <= 4);
+
+ list_validate(&def->uses);
+ list_validate(&def->if_uses);
+
+ ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
+ ssa_def_validate_state);
+ def_state->where_defined = state->impl;
+ def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ _mesa_hash_table_insert(state->ssa_defs, def, def_state);
+ }
+
+ static void
+ validate_dest(nir_dest *dest, validate_state *state)
+ {
+ if (dest->is_ssa)
+ validate_ssa_def(&dest->ssa, state);
+ else
+ validate_reg_dest(&dest->reg, state);
+ }
+
+ static void
+ validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+ {
+ unsigned dest_size =
+ dest->dest.is_ssa ? dest->dest.ssa.num_components
+ : dest->dest.reg.reg->num_components;
+ bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed;
+ /*
+ * validate that the instruction doesn't write to components not in the
+ * register/SSA value
+ */
+ assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1)));
+
+ /* validate that saturate is only ever used on instructions with
+ * destinations of type float
+ */
+ nir_alu_instr *alu = nir_instr_as_alu(state->instr);
+ assert(nir_op_infos[alu->op].output_type == nir_type_float ||
+ !dest->saturate);
+
+ validate_dest(&dest->dest, state);
+ }
+
+ static void
+ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
+ {
+ assert(instr->op < nir_num_opcodes);
+
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ validate_alu_src(instr, i, state);
+ }
+
+ validate_alu_dest(&instr->dest, state);
+ }
+
+ static void
+ validate_deref_chain(nir_deref *deref, validate_state *state)
+ {
+ assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+
+ nir_deref *parent = NULL;
+ while (deref != NULL) {
+ switch (deref->deref_type) {
+ case nir_deref_type_array:
+ assert(deref->type == glsl_get_array_element(parent->type));
+ if (nir_deref_as_array(deref)->deref_array_type ==
+ nir_deref_array_type_indirect)
+ validate_src(&nir_deref_as_array(deref)->indirect, state);
+ break;
+
+ case nir_deref_type_struct:
+ assert(deref->type ==
+ glsl_get_struct_field(parent->type,
+ nir_deref_as_struct(deref)->index));
+ break;
+
+ case nir_deref_type_var:
+ break;
+
+ default:
+ assert(!"Invalid deref type");
+ break;
+ }
+
+ parent = deref;
+ deref = deref->child;
+ }
+ }
+
+ static void
+ validate_var_use(nir_variable *var, validate_state *state)
+ {
+ if (var->data.mode == nir_var_local) {
+ struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
+
+ assert(entry);
+ assert((nir_function_impl *) entry->data == state->impl);
+ }
+ }
+
+ static void
+ validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
+ {
+ assert(deref != NULL);
+ assert(ralloc_parent(deref) == parent_mem_ctx);
+ assert(deref->deref.type == deref->var->type);
+
+ validate_var_use(deref->var, state);
+
+ validate_deref_chain(&deref->deref, state);
+ }
+
+ static void
+ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
+ {
+ unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+ for (unsigned i = 0; i < num_srcs; i++) {
+ unsigned components_read =
+ nir_intrinsic_infos[instr->intrinsic].src_components[i];
+ if (components_read == 0)
+ components_read = instr->num_components;
+
+ assert(components_read > 0);
+
+ if (instr->src[i].is_ssa) {
+ assert(components_read <= instr->src[i].ssa->num_components);
+ } else if (!instr->src[i].reg.reg->is_packed) {
+ assert(components_read <= instr->src[i].reg.reg->num_components);
+ }
+
+ validate_src(&instr->src[i], state);
+ }
+
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++) {
+ validate_deref_var(instr, instr->variables[i], state);
+ }
+
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+ unsigned components_written =
+ nir_intrinsic_infos[instr->intrinsic].dest_components;
+ if (components_written == 0)
+ components_written = instr->num_components;
+
+ assert(components_written > 0);
+
+ if (instr->dest.is_ssa) {
+ assert(components_written <= instr->dest.ssa.num_components);
+ } else if (!instr->dest.reg.reg->is_packed) {
+ assert(components_written <= instr->dest.reg.reg->num_components);
+ }
+
+ validate_dest(&instr->dest, state);
+ }
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_var: {
+ const struct glsl_type *type =
+ nir_deref_tail(&instr->variables[0]->deref)->type;
+ assert(glsl_type_is_vector_or_scalar(type) ||
+ (instr->variables[0]->var->data.mode == nir_var_uniform &&
+ glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
+ assert(instr->num_components == glsl_get_vector_elements(type));
+ break;
+ }
+ case nir_intrinsic_store_var: {
+ const struct glsl_type *type =
+ nir_deref_tail(&instr->variables[0]->deref)->type;
+ assert(glsl_type_is_vector_or_scalar(type) ||
+ (instr->variables[0]->var->data.mode == nir_var_uniform &&
+ glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
+ assert(instr->num_components == glsl_get_vector_elements(type));
+ assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+ instr->variables[0]->var->data.mode != nir_var_uniform &&
+ instr->variables[0]->var->data.mode != nir_var_shader_storage);
+ assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0);
+ break;
+ }
+ case nir_intrinsic_copy_var:
+ assert(nir_deref_tail(&instr->variables[0]->deref)->type ==
+ nir_deref_tail(&instr->variables[1]->deref)->type);
+ assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+ instr->variables[0]->var->data.mode != nir_var_uniform &&
+ instr->variables[0]->var->data.mode != nir_var_shader_storage);
+ break;
+ default:
+ break;
+ }
+ }
+
+ static void
+ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
+ {
+ bool src_type_seen[nir_num_tex_src_types];
+ for (unsigned i = 0; i < nir_num_tex_src_types; i++)
+ src_type_seen[i] = false;
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ assert(!src_type_seen[instr->src[i].src_type]);
+ src_type_seen[instr->src[i].src_type] = true;
+ validate_src(&instr->src[i].src, state);
+ }
+
+ if (instr->sampler != NULL)
+ validate_deref_var(instr, instr->sampler, state);
+
+ validate_dest(&instr->dest, state);
+ }
+
+ static void
+ validate_call_instr(nir_call_instr *instr, validate_state *state)
+ {
- else
++ if (instr->return_deref == NULL) {
+ assert(glsl_type_is_void(instr->callee->return_type));
-
- validate_deref_var(instr, instr->return_deref, state);
++ } else {
+ assert(instr->return_deref->deref.type == instr->callee->return_type);
++ validate_deref_var(instr, instr->return_deref, state);
++ }
+
+ assert(instr->num_params == instr->callee->num_params);
+
+ for (unsigned i = 0; i < instr->num_params; i++) {
+ assert(instr->callee->params[i].type == instr->params[i]->deref.type);
+ validate_deref_var(instr, instr->params[i], state);
+ }
+ }
+
+ static void
+ validate_load_const_instr(nir_load_const_instr *instr, validate_state *state)
+ {
+ validate_ssa_def(&instr->def, state);
+ }
+
+ static void
+ validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state)
+ {
+ validate_ssa_def(&instr->def, state);
+ }
+
+ static void
+ validate_phi_instr(nir_phi_instr *instr, validate_state *state)
+ {
+ /*
+ * don't validate the sources until we get to them from their predecessor
+ * basic blocks, to avoid validating an SSA use before its definition.
+ */
+
+ validate_dest(&instr->dest, state);
+
+ exec_list_validate(&instr->srcs);
+ assert(exec_list_length(&instr->srcs) ==
+ state->block->predecessors->entries);
+ }
+
+ static void
+ validate_instr(nir_instr *instr, validate_state *state)
+ {
+ assert(instr->block == state->block);
+
+ state->instr = instr;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ validate_alu_instr(nir_instr_as_alu(instr), state);
+ break;
+
+ case nir_instr_type_call:
+ validate_call_instr(nir_instr_as_call(instr), state);
+ break;
+
+ case nir_instr_type_intrinsic:
+ validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+ break;
+
+ case nir_instr_type_tex:
+ validate_tex_instr(nir_instr_as_tex(instr), state);
+ break;
+
+ case nir_instr_type_load_const:
+ validate_load_const_instr(nir_instr_as_load_const(instr), state);
+ break;
+
+ case nir_instr_type_phi:
+ validate_phi_instr(nir_instr_as_phi(instr), state);
+ break;
+
+ case nir_instr_type_ssa_undef:
+ validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+ break;
+
+ case nir_instr_type_jump:
+ break;
+
+ default:
+ assert(!"Invalid ALU instruction type");
+ break;
+ }
+
+ state->instr = NULL;
+ }
+
+ static void
+ validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state)
+ {
+ state->instr = &instr->instr;
+
+ assert(instr->dest.is_ssa);
+
+ exec_list_validate(&instr->srcs);
+ nir_foreach_phi_src(instr, src) {
+ if (src->pred == pred) {
+ assert(src->src.is_ssa);
+ assert(src->src.ssa->num_components ==
+ instr->dest.ssa.num_components);
+
+ validate_src(&src->src, state);
+ state->instr = NULL;
+ return;
+ }
+ }
+
+ abort();
+ }
+
+ static void
+ validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state)
+ {
+ nir_foreach_instr(succ, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ validate_phi_src(nir_instr_as_phi(instr), block, state);
+ }
+ }
+
+ static void validate_cf_node(nir_cf_node *node, validate_state *state);
+
+ static void
+ validate_block(nir_block *block, validate_state *state)
+ {
+ assert(block->cf_node.parent == state->parent_node);
+
+ state->block = block;
+
+ exec_list_validate(&block->instr_list);
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ assert(instr == nir_block_first_instr(block) ||
+ nir_instr_prev(instr)->type == nir_instr_type_phi);
+ }
+
+ if (instr->type == nir_instr_type_jump) {
+ assert(instr == nir_block_last_instr(block));
+ }
+
+ validate_instr(instr, state);
+ }
+
+ assert(block->successors[0] != NULL);
+ assert(block->successors[0] != block->successors[1]);
+
+ for (unsigned i = 0; i < 2; i++) {
+ if (block->successors[i] != NULL) {
+ struct set_entry *entry =
+ _mesa_set_search(block->successors[i]->predecessors, block);
+ assert(entry);
+
+ validate_phi_srcs(block, block->successors[i], state);
+ }
+ }
+
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ const nir_block *pred = entry->key;
+ assert(pred->successors[0] == block ||
+ pred->successors[1] == block);
+ }
+
+ if (!exec_list_is_empty(&block->instr_list) &&
+ nir_block_last_instr(block)->type == nir_instr_type_jump) {
+ assert(block->successors[1] == NULL);
+ nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block));
+ switch (jump->type) {
+ case nir_jump_break: {
+ nir_block *after =
+ nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node));
+ assert(block->successors[0] == after);
+ break;
+ }
+
+ case nir_jump_continue: {
+ nir_block *first =
+ nir_cf_node_as_block(nir_loop_first_cf_node(state->loop));
+ assert(block->successors[0] == first);
+ break;
+ }
+
+ case nir_jump_return:
+ assert(block->successors[0] == state->impl->end_block);
+ break;
+
+ default:
+ unreachable("bad jump type");
+ }
+ } else {
+ nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+ if (next == NULL) {
+ switch (state->parent_node->type) {
+ case nir_cf_node_loop: {
+ nir_block *first =
+ nir_cf_node_as_block(nir_loop_first_cf_node(state->loop));
+ assert(block->successors[0] == first);
+ /* due to the hack for infinite loops, block->successors[1] may
+ * point to the block after the loop.
+ */
+ break;
+ }
+
+ case nir_cf_node_if: {
+ nir_block *after =
+ nir_cf_node_as_block(nir_cf_node_next(state->parent_node));
+ assert(block->successors[0] == after);
+ assert(block->successors[1] == NULL);
+ break;
+ }
+
+ case nir_cf_node_function:
+ assert(block->successors[0] == state->impl->end_block);
+ assert(block->successors[1] == NULL);
+ break;
+
+ default:
+ unreachable("unknown control flow node type");
+ }
+ } else {
+ if (next->type == nir_cf_node_if) {
+ nir_if *if_stmt = nir_cf_node_as_if(next);
+ assert(&block->successors[0]->cf_node ==
+ nir_if_first_then_node(if_stmt));
+ assert(&block->successors[1]->cf_node ==
+ nir_if_first_else_node(if_stmt));
+ } else {
+ assert(next->type == nir_cf_node_loop);
+ nir_loop *loop = nir_cf_node_as_loop(next);
+ assert(&block->successors[0]->cf_node ==
+ nir_loop_first_cf_node(loop));
+ assert(block->successors[1] == NULL);
+ }
+ }
+ }
+ }
+
+ static void
+ validate_if(nir_if *if_stmt, validate_state *state)
+ {
+ state->if_stmt = if_stmt;
+
+ assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev));
+ nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node);
+ assert(prev_node->type == nir_cf_node_block);
+
+ assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next));
+ nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node);
+ assert(next_node->type == nir_cf_node_block);
+
+ validate_src(&if_stmt->condition, state);
+
+ assert(!exec_list_is_empty(&if_stmt->then_list));
+ assert(!exec_list_is_empty(&if_stmt->else_list));
+
+ nir_cf_node *old_parent = state->parent_node;
+ state->parent_node = &if_stmt->cf_node;
+
+ exec_list_validate(&if_stmt->then_list);
+ foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) {
+ validate_cf_node(cf_node, state);
+ }
+
+ exec_list_validate(&if_stmt->else_list);
+ foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) {
+ validate_cf_node(cf_node, state);
+ }
+
+ state->parent_node = old_parent;
+ state->if_stmt = NULL;
+ }
+
+ static void
+ validate_loop(nir_loop *loop, validate_state *state)
+ {
+ assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev));
+ nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node);
+ assert(prev_node->type == nir_cf_node_block);
+
+ assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next));
+ nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node);
+ assert(next_node->type == nir_cf_node_block);
+
+ assert(!exec_list_is_empty(&loop->body));
+
+ nir_cf_node *old_parent = state->parent_node;
+ state->parent_node = &loop->cf_node;
+ nir_loop *old_loop = state->loop;
+ state->loop = loop;
+
+ exec_list_validate(&loop->body);
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+ validate_cf_node(cf_node, state);
+ }
+
+ state->parent_node = old_parent;
+ state->loop = old_loop;
+ }
+
+ static void
+ validate_cf_node(nir_cf_node *node, validate_state *state)
+ {
+ assert(node->parent == state->parent_node);
+
+ switch (node->type) {
+ case nir_cf_node_block:
+ validate_block(nir_cf_node_as_block(node), state);
+ break;
+
+ case nir_cf_node_if:
+ validate_if(nir_cf_node_as_if(node), state);
+ break;
+
+ case nir_cf_node_loop:
+ validate_loop(nir_cf_node_as_loop(node), state);
+ break;
+
+ default:
+ unreachable("Invalid CF node type");
+ }
+ }
+
+ static void
+ prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state)
+ {
+ assert(reg->is_global == is_global);
+
+ if (is_global)
+ assert(reg->index < state->shader->reg_alloc);
+ else
+ assert(reg->index < state->impl->reg_alloc);
+ assert(!BITSET_TEST(state->regs_found, reg->index));
+ BITSET_SET(state->regs_found, reg->index);
+
+ list_validate(®->uses);
+ list_validate(®->defs);
+ list_validate(®->if_uses);
+
+ reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state);
+ reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ reg_state->where_defined = is_global ? NULL : state->impl;
+
+ _mesa_hash_table_insert(state->regs, reg, reg_state);
+ }
+
+ static void
+ postvalidate_reg_decl(nir_register *reg, validate_state *state)
+ {
+ struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg);
+
+ reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+ nir_foreach_use(reg, src) {
+ struct set_entry *entry = _mesa_set_search(reg_state->uses, src);
+ assert(entry);
+ _mesa_set_remove(reg_state->uses, entry);
+ }
+
+ if (reg_state->uses->entries != 0) {
+ printf("extra entries in register uses:\n");
+ struct set_entry *entry;
+ set_foreach(reg_state->uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ nir_foreach_if_use(reg, src) {
+ struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src);
+ assert(entry);
+ _mesa_set_remove(reg_state->if_uses, entry);
+ }
+
+ if (reg_state->if_uses->entries != 0) {
+ printf("extra entries in register if_uses:\n");
+ struct set_entry *entry;
+ set_foreach(reg_state->if_uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ nir_foreach_def(reg, src) {
+ struct set_entry *entry = _mesa_set_search(reg_state->defs, src);
+ assert(entry);
+ _mesa_set_remove(reg_state->defs, entry);
+ }
+
+ if (reg_state->defs->entries != 0) {
+ printf("extra entries in register defs:\n");
+ struct set_entry *entry;
+ set_foreach(reg_state->defs, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+ }
+
+ static void
+ validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
+ {
+ assert(is_global != (var->data.mode == nir_var_local));
+
+ /*
+ * TODO validate some things ir_validate.cpp does (requires more GLSL type
+ * support)
+ */
+
+ if (!is_global) {
+ _mesa_hash_table_insert(state->var_defs, var, state->impl);
+ }
+ }
+
+ static bool
+ postvalidate_ssa_def(nir_ssa_def *def, void *void_state)
+ {
+ validate_state *state = void_state;
+
+ struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+ ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+ nir_foreach_use(def, src) {
+ struct set_entry *entry = _mesa_set_search(def_state->uses, src);
+ assert(entry);
+ _mesa_set_remove(def_state->uses, entry);
+ }
+
+ if (def_state->uses->entries != 0) {
+ printf("extra entries in register uses:\n");
+ struct set_entry *entry;
+ set_foreach(def_state->uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ nir_foreach_if_use(def, src) {
+ struct set_entry *entry = _mesa_set_search(def_state->if_uses, src);
+ assert(entry);
+ _mesa_set_remove(def_state->if_uses, entry);
+ }
+
+ if (def_state->if_uses->entries != 0) {
+ printf("extra entries in register uses:\n");
+ struct set_entry *entry;
+ set_foreach(def_state->if_uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ return true;
+ }
+
+ static bool
+ postvalidate_ssa_defs_block(nir_block *block, void *state)
+ {
+ nir_foreach_instr(block, instr)
+ nir_foreach_ssa_def(instr, postvalidate_ssa_def, state);
+
+ return true;
+ }
+
+ static void
+ validate_function_impl(nir_function_impl *impl, validate_state *state)
+ {
+ assert(impl->function->impl == impl);
+ assert(impl->cf_node.parent == NULL);
+
+ assert(impl->num_params == impl->function->num_params);
+ for (unsigned i = 0; i < impl->num_params; i++)
+ assert(impl->params[i]->type == impl->function->params[i].type);
+
+ if (glsl_type_is_void(impl->function->return_type))
+ assert(impl->return_var == NULL);
+ else
+ assert(impl->return_var->type == impl->function->return_type);
+
+ assert(exec_list_is_empty(&impl->end_block->instr_list));
+ assert(impl->end_block->successors[0] == NULL);
+ assert(impl->end_block->successors[1] == NULL);
+
+ state->impl = impl;
+ state->parent_node = &impl->cf_node;
+
+ exec_list_validate(&impl->locals);
+ nir_foreach_variable(var, &impl->locals) {
+ validate_var_decl(var, false, state);
+ }
+
+ state->regs_found = realloc(state->regs_found,
+ BITSET_WORDS(impl->reg_alloc) *
+ sizeof(BITSET_WORD));
+ memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) *
+ sizeof(BITSET_WORD));
+ exec_list_validate(&impl->registers);
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ prevalidate_reg_decl(reg, false, state);
+ }
+
+ state->ssa_defs_found = realloc(state->ssa_defs_found,
+ BITSET_WORDS(impl->ssa_alloc) *
+ sizeof(BITSET_WORD));
+ memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) *
+ sizeof(BITSET_WORD));
+ exec_list_validate(&impl->body);
+ foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+ validate_cf_node(node, state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ postvalidate_reg_decl(reg, state);
+ }
+
+ nir_foreach_block(impl, postvalidate_ssa_defs_block, state);
+ }
+
+ static void
+ validate_function(nir_function *func, validate_state *state)
+ {
+ if (func->impl != NULL) {
+ assert(func->impl->function == func);
+ validate_function_impl(func->impl, state);
+ }
+ }
+
+ static void
+ init_validate_state(validate_state *state)
+ {
+ state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->ssa_defs_found = NULL;
+ state->regs_found = NULL;
+ state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->loop = NULL;
+ }
+
+ static void
+ destroy_validate_state(validate_state *state)
+ {
+ _mesa_hash_table_destroy(state->regs, NULL);
+ _mesa_hash_table_destroy(state->ssa_defs, NULL);
+ free(state->ssa_defs_found);
+ free(state->regs_found);
+ _mesa_hash_table_destroy(state->var_defs, NULL);
+ }
+
+ void
+ nir_validate_shader(nir_shader *shader)
+ {
+ validate_state state;
+ init_validate_state(&state);
+
+ state.shader = shader;
+
+ exec_list_validate(&shader->uniforms);
+ nir_foreach_variable(var, &shader->uniforms) {
+ validate_var_decl(var, true, &state);
+ }
+
+ exec_list_validate(&shader->inputs);
+ nir_foreach_variable(var, &shader->inputs) {
+ validate_var_decl(var, true, &state);
+ }
+
+ exec_list_validate(&shader->outputs);
+ nir_foreach_variable(var, &shader->outputs) {
+ validate_var_decl(var, true, &state);
+ }
+
++ exec_list_validate(&shader->shared);
++ nir_foreach_variable(var, &shader->shared) {
++ validate_var_decl(var, true, &state);
++ }
++
+ exec_list_validate(&shader->globals);
+ nir_foreach_variable(var, &shader->globals) {
+ validate_var_decl(var, true, &state);
+ }
+
+ exec_list_validate(&shader->system_values);
+ nir_foreach_variable(var, &shader->system_values) {
+ validate_var_decl(var, true, &state);
+ }
+
+ state.regs_found = realloc(state.regs_found,
+ BITSET_WORDS(shader->reg_alloc) *
+ sizeof(BITSET_WORD));
+ memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) *
+ sizeof(BITSET_WORD));
+ exec_list_validate(&shader->registers);
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ prevalidate_reg_decl(reg, true, &state);
+ }
+
+ exec_list_validate(&shader->functions);
+ foreach_list_typed(nir_function, func, node, &shader->functions) {
+ validate_function(func, &state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ postvalidate_reg_decl(reg, &state);
+ }
+
+ destroy_validate_state(&state);
+ }
+
+ #endif /* NDEBUG */
--- /dev/null
--- /dev/null
++/*
++** Copyright (c) 2014-2015 The Khronos Group Inc.
++**
++** Permission is hereby granted, free of charge, to any person obtaining a copy
++** of this software and/or associated documentation files (the "Materials"),
++** to deal in the Materials without restriction, including without limitation
++** the rights to use, copy, modify, merge, publish, distribute, sublicense,
++** and/or sell copies of the Materials, and to permit persons to whom the
++** Materials are furnished to do so, subject to the following conditions:
++**
++** The above copyright notice and this permission notice shall be included in
++** all copies or substantial portions of the Materials.
++**
++** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
++** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
++** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
++**
++** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
++** IN THE MATERIALS.
++*/
++
++#ifndef GLSLstd450_H
++#define GLSLstd450_H
++
++const int GLSLstd450Version = 99;
++const int GLSLstd450Revision = 3;
++
++enum GLSLstd450 {
++ GLSLstd450Bad = 0, // Don't use
++
++ GLSLstd450Round = 1,
++ GLSLstd450RoundEven = 2,
++ GLSLstd450Trunc = 3,
++ GLSLstd450FAbs = 4,
++ GLSLstd450SAbs = 5,
++ GLSLstd450FSign = 6,
++ GLSLstd450SSign = 7,
++ GLSLstd450Floor = 8,
++ GLSLstd450Ceil = 9,
++ GLSLstd450Fract = 10,
++
++ GLSLstd450Radians = 11,
++ GLSLstd450Degrees = 12,
++ GLSLstd450Sin = 13,
++ GLSLstd450Cos = 14,
++ GLSLstd450Tan = 15,
++ GLSLstd450Asin = 16,
++ GLSLstd450Acos = 17,
++ GLSLstd450Atan = 18,
++ GLSLstd450Sinh = 19,
++ GLSLstd450Cosh = 20,
++ GLSLstd450Tanh = 21,
++ GLSLstd450Asinh = 22,
++ GLSLstd450Acosh = 23,
++ GLSLstd450Atanh = 24,
++ GLSLstd450Atan2 = 25,
++
++ GLSLstd450Pow = 26,
++ GLSLstd450Exp = 27,
++ GLSLstd450Log = 28,
++ GLSLstd450Exp2 = 29,
++ GLSLstd450Log2 = 30,
++ GLSLstd450Sqrt = 31,
++ GLSLstd450InverseSqrt = 32,
++
++ GLSLstd450Determinant = 33,
++ GLSLstd450MatrixInverse = 34,
++
++ GLSLstd450Modf = 35, // second operand needs an OpVariable to write to
++ GLSLstd450ModfStruct = 36, // no OpVariable operand
++ GLSLstd450FMin = 37,
++ GLSLstd450UMin = 38,
++ GLSLstd450SMin = 39,
++ GLSLstd450FMax = 40,
++ GLSLstd450UMax = 41,
++ GLSLstd450SMax = 42,
++ GLSLstd450FClamp = 43,
++ GLSLstd450UClamp = 44,
++ GLSLstd450SClamp = 45,
++ GLSLstd450FMix = 46,
++ GLSLstd450IMix = 47,
++ GLSLstd450Step = 48,
++ GLSLstd450SmoothStep = 49,
++
++ GLSLstd450Fma = 50,
++ GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to
++ GLSLstd450FrexpStruct = 52, // no OpVariable operand
++ GLSLstd450Ldexp = 53,
++
++ GLSLstd450PackSnorm4x8 = 54,
++ GLSLstd450PackUnorm4x8 = 55,
++ GLSLstd450PackSnorm2x16 = 56,
++ GLSLstd450PackUnorm2x16 = 57,
++ GLSLstd450PackHalf2x16 = 58,
++ GLSLstd450PackDouble2x32 = 59,
++ GLSLstd450UnpackSnorm2x16 = 60,
++ GLSLstd450UnpackUnorm2x16 = 61,
++ GLSLstd450UnpackHalf2x16 = 62,
++ GLSLstd450UnpackSnorm4x8 = 63,
++ GLSLstd450UnpackUnorm4x8 = 64,
++ GLSLstd450UnpackDouble2x32 = 65,
++
++ GLSLstd450Length = 66,
++ GLSLstd450Distance = 67,
++ GLSLstd450Cross = 68,
++ GLSLstd450Normalize = 69,
++ GLSLstd450FaceForward = 70,
++ GLSLstd450Reflect = 71,
++ GLSLstd450Refract = 72,
++
++ GLSLstd450FindILsb = 73,
++ GLSLstd450FindSMsb = 74,
++ GLSLstd450FindUMsb = 75,
++
++ GLSLstd450InterpolateAtCentroid = 76,
++ GLSLstd450InterpolateAtSample = 77,
++ GLSLstd450InterpolateAtOffset = 78,
++
++ GLSLstd450Count
++};
++
++#endif // #ifndef GLSLstd450_H
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ * Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#pragma once
++
++#ifndef _NIR_SPIRV_H_
++#define _NIR_SPIRV_H_
++
++#include "nir/nir.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++struct nir_spirv_specialization {
++ uint32_t id;
++ uint32_t data;
++};
++
++nir_function *spirv_to_nir(const uint32_t *words, size_t word_count,
++ struct nir_spirv_specialization *specializations,
++ unsigned num_specializations,
++ gl_shader_stage stage, const char *entry_point_name,
++ const nir_shader_compiler_options *options);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* _NIR_SPIRV_H_ */
--- /dev/null
--- /dev/null
++/*
++** Copyright (c) 2014-2015 The Khronos Group Inc.
++**
++** Permission is hereby granted, free of charge, to any person obtaining a copy
++** of this software and/or associated documentation files (the "Materials"),
++** to deal in the Materials without restriction, including without limitation
++** the rights to use, copy, modify, merge, publish, distribute, sublicense,
++** and/or sell copies of the Materials, and to permit persons to whom the
++** Materials are furnished to do so, subject to the following conditions:
++**
++** The above copyright notice and this permission notice shall be included in
++** all copies or substantial portions of the Materials.
++**
++** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
++** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
++** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
++**
++** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
++** IN THE MATERIALS.
++*/
++
++/*
++** This header is automatically generated by the same tool that creates
++** the Binary Section of the SPIR-V specification.
++*/
++
++/*
++** Enumeration tokens for SPIR-V, in various styles:
++** C, C++, C++11, JSON, Lua, Python
++**
++** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
++** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
++** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL
++** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL
++** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL']
++**
++** Some tokens act like mask values, which can be OR'd together,
++** while others are mutually exclusive. The mask-like ones have
++** "Mask" in their name, and a parallel enum that has the shift
++** amount (1 << x) for each corresponding enumerant.
++*/
++
++#ifndef spirv_H
++#define spirv_H
++
++typedef unsigned int SpvId;
++
++#define SPV_VERSION 0x10000
++#define SPV_REVISION 2
++
++static const unsigned int SpvMagicNumber = 0x07230203;
++static const unsigned int SpvVersion = 0x00010000;
++static const unsigned int SpvRevision = 2;
++static const unsigned int SpvOpCodeMask = 0xffff;
++static const unsigned int SpvWordCountShift = 16;
++
++typedef enum SpvSourceLanguage_ {
++ SpvSourceLanguageUnknown = 0,
++ SpvSourceLanguageESSL = 1,
++ SpvSourceLanguageGLSL = 2,
++ SpvSourceLanguageOpenCL_C = 3,
++ SpvSourceLanguageOpenCL_CPP = 4,
++} SpvSourceLanguage;
++
++typedef enum SpvExecutionModel_ {
++ SpvExecutionModelVertex = 0,
++ SpvExecutionModelTessellationControl = 1,
++ SpvExecutionModelTessellationEvaluation = 2,
++ SpvExecutionModelGeometry = 3,
++ SpvExecutionModelFragment = 4,
++ SpvExecutionModelGLCompute = 5,
++ SpvExecutionModelKernel = 6,
++} SpvExecutionModel;
++
++typedef enum SpvAddressingModel_ {
++ SpvAddressingModelLogical = 0,
++ SpvAddressingModelPhysical32 = 1,
++ SpvAddressingModelPhysical64 = 2,
++} SpvAddressingModel;
++
++typedef enum SpvMemoryModel_ {
++ SpvMemoryModelSimple = 0,
++ SpvMemoryModelGLSL450 = 1,
++ SpvMemoryModelOpenCL = 2,
++} SpvMemoryModel;
++
++typedef enum SpvExecutionMode_ {
++ SpvExecutionModeInvocations = 0,
++ SpvExecutionModeSpacingEqual = 1,
++ SpvExecutionModeSpacingFractionalEven = 2,
++ SpvExecutionModeSpacingFractionalOdd = 3,
++ SpvExecutionModeVertexOrderCw = 4,
++ SpvExecutionModeVertexOrderCcw = 5,
++ SpvExecutionModePixelCenterInteger = 6,
++ SpvExecutionModeOriginUpperLeft = 7,
++ SpvExecutionModeOriginLowerLeft = 8,
++ SpvExecutionModeEarlyFragmentTests = 9,
++ SpvExecutionModePointMode = 10,
++ SpvExecutionModeXfb = 11,
++ SpvExecutionModeDepthReplacing = 12,
++ SpvExecutionModeDepthGreater = 14,
++ SpvExecutionModeDepthLess = 15,
++ SpvExecutionModeDepthUnchanged = 16,
++ SpvExecutionModeLocalSize = 17,
++ SpvExecutionModeLocalSizeHint = 18,
++ SpvExecutionModeInputPoints = 19,
++ SpvExecutionModeInputLines = 20,
++ SpvExecutionModeInputLinesAdjacency = 21,
++ SpvExecutionModeTriangles = 22,
++ SpvExecutionModeInputTrianglesAdjacency = 23,
++ SpvExecutionModeQuads = 24,
++ SpvExecutionModeIsolines = 25,
++ SpvExecutionModeOutputVertices = 26,
++ SpvExecutionModeOutputPoints = 27,
++ SpvExecutionModeOutputLineStrip = 28,
++ SpvExecutionModeOutputTriangleStrip = 29,
++ SpvExecutionModeVecTypeHint = 30,
++ SpvExecutionModeContractionOff = 31,
++} SpvExecutionMode;
++
++typedef enum SpvStorageClass_ {
++ SpvStorageClassUniformConstant = 0,
++ SpvStorageClassInput = 1,
++ SpvStorageClassUniform = 2,
++ SpvStorageClassOutput = 3,
++ SpvStorageClassWorkgroup = 4,
++ SpvStorageClassCrossWorkgroup = 5,
++ SpvStorageClassPrivate = 6,
++ SpvStorageClassFunction = 7,
++ SpvStorageClassGeneric = 8,
++ SpvStorageClassPushConstant = 9,
++ SpvStorageClassAtomicCounter = 10,
++ SpvStorageClassImage = 11,
++} SpvStorageClass;
++
++typedef enum SpvDim_ {
++ SpvDim1D = 0,
++ SpvDim2D = 1,
++ SpvDim3D = 2,
++ SpvDimCube = 3,
++ SpvDimRect = 4,
++ SpvDimBuffer = 5,
++ SpvDimSubpassData = 6,
++} SpvDim;
++
++typedef enum SpvSamplerAddressingMode_ {
++ SpvSamplerAddressingModeNone = 0,
++ SpvSamplerAddressingModeClampToEdge = 1,
++ SpvSamplerAddressingModeClamp = 2,
++ SpvSamplerAddressingModeRepeat = 3,
++ SpvSamplerAddressingModeRepeatMirrored = 4,
++} SpvSamplerAddressingMode;
++
++typedef enum SpvSamplerFilterMode_ {
++ SpvSamplerFilterModeNearest = 0,
++ SpvSamplerFilterModeLinear = 1,
++} SpvSamplerFilterMode;
++
++typedef enum SpvImageFormat_ {
++ SpvImageFormatUnknown = 0,
++ SpvImageFormatRgba32f = 1,
++ SpvImageFormatRgba16f = 2,
++ SpvImageFormatR32f = 3,
++ SpvImageFormatRgba8 = 4,
++ SpvImageFormatRgba8Snorm = 5,
++ SpvImageFormatRg32f = 6,
++ SpvImageFormatRg16f = 7,
++ SpvImageFormatR11fG11fB10f = 8,
++ SpvImageFormatR16f = 9,
++ SpvImageFormatRgba16 = 10,
++ SpvImageFormatRgb10A2 = 11,
++ SpvImageFormatRg16 = 12,
++ SpvImageFormatRg8 = 13,
++ SpvImageFormatR16 = 14,
++ SpvImageFormatR8 = 15,
++ SpvImageFormatRgba16Snorm = 16,
++ SpvImageFormatRg16Snorm = 17,
++ SpvImageFormatRg8Snorm = 18,
++ SpvImageFormatR16Snorm = 19,
++ SpvImageFormatR8Snorm = 20,
++ SpvImageFormatRgba32i = 21,
++ SpvImageFormatRgba16i = 22,
++ SpvImageFormatRgba8i = 23,
++ SpvImageFormatR32i = 24,
++ SpvImageFormatRg32i = 25,
++ SpvImageFormatRg16i = 26,
++ SpvImageFormatRg8i = 27,
++ SpvImageFormatR16i = 28,
++ SpvImageFormatR8i = 29,
++ SpvImageFormatRgba32ui = 30,
++ SpvImageFormatRgba16ui = 31,
++ SpvImageFormatRgba8ui = 32,
++ SpvImageFormatR32ui = 33,
++ SpvImageFormatRgb10a2ui = 34,
++ SpvImageFormatRg32ui = 35,
++ SpvImageFormatRg16ui = 36,
++ SpvImageFormatRg8ui = 37,
++ SpvImageFormatR16ui = 38,
++ SpvImageFormatR8ui = 39,
++} SpvImageFormat;
++
++typedef enum SpvImageChannelOrder_ {
++ SpvImageChannelOrderR = 0,
++ SpvImageChannelOrderA = 1,
++ SpvImageChannelOrderRG = 2,
++ SpvImageChannelOrderRA = 3,
++ SpvImageChannelOrderRGB = 4,
++ SpvImageChannelOrderRGBA = 5,
++ SpvImageChannelOrderBGRA = 6,
++ SpvImageChannelOrderARGB = 7,
++ SpvImageChannelOrderIntensity = 8,
++ SpvImageChannelOrderLuminance = 9,
++ SpvImageChannelOrderRx = 10,
++ SpvImageChannelOrderRGx = 11,
++ SpvImageChannelOrderRGBx = 12,
++ SpvImageChannelOrderDepth = 13,
++ SpvImageChannelOrderDepthStencil = 14,
++ SpvImageChannelOrdersRGB = 15,
++ SpvImageChannelOrdersRGBx = 16,
++ SpvImageChannelOrdersRGBA = 17,
++ SpvImageChannelOrdersBGRA = 18,
++} SpvImageChannelOrder;
++
++typedef enum SpvImageChannelDataType_ {
++ SpvImageChannelDataTypeSnormInt8 = 0,
++ SpvImageChannelDataTypeSnormInt16 = 1,
++ SpvImageChannelDataTypeUnormInt8 = 2,
++ SpvImageChannelDataTypeUnormInt16 = 3,
++ SpvImageChannelDataTypeUnormShort565 = 4,
++ SpvImageChannelDataTypeUnormShort555 = 5,
++ SpvImageChannelDataTypeUnormInt101010 = 6,
++ SpvImageChannelDataTypeSignedInt8 = 7,
++ SpvImageChannelDataTypeSignedInt16 = 8,
++ SpvImageChannelDataTypeSignedInt32 = 9,
++ SpvImageChannelDataTypeUnsignedInt8 = 10,
++ SpvImageChannelDataTypeUnsignedInt16 = 11,
++ SpvImageChannelDataTypeUnsignedInt32 = 12,
++ SpvImageChannelDataTypeHalfFloat = 13,
++ SpvImageChannelDataTypeFloat = 14,
++ SpvImageChannelDataTypeUnormInt24 = 15,
++ SpvImageChannelDataTypeUnormInt101010_2 = 16,
++} SpvImageChannelDataType;
++
++typedef enum SpvImageOperandsShift_ {
++ SpvImageOperandsBiasShift = 0,
++ SpvImageOperandsLodShift = 1,
++ SpvImageOperandsGradShift = 2,
++ SpvImageOperandsConstOffsetShift = 3,
++ SpvImageOperandsOffsetShift = 4,
++ SpvImageOperandsConstOffsetsShift = 5,
++ SpvImageOperandsSampleShift = 6,
++ SpvImageOperandsMinLodShift = 7,
++} SpvImageOperandsShift;
++
++typedef enum SpvImageOperandsMask_ {
++ SpvImageOperandsMaskNone = 0,
++ SpvImageOperandsBiasMask = 0x00000001,
++ SpvImageOperandsLodMask = 0x00000002,
++ SpvImageOperandsGradMask = 0x00000004,
++ SpvImageOperandsConstOffsetMask = 0x00000008,
++ SpvImageOperandsOffsetMask = 0x00000010,
++ SpvImageOperandsConstOffsetsMask = 0x00000020,
++ SpvImageOperandsSampleMask = 0x00000040,
++ SpvImageOperandsMinLodMask = 0x00000080,
++} SpvImageOperandsMask;
++
++typedef enum SpvFPFastMathModeShift_ {
++ SpvFPFastMathModeNotNaNShift = 0,
++ SpvFPFastMathModeNotInfShift = 1,
++ SpvFPFastMathModeNSZShift = 2,
++ SpvFPFastMathModeAllowRecipShift = 3,
++ SpvFPFastMathModeFastShift = 4,
++} SpvFPFastMathModeShift;
++
++typedef enum SpvFPFastMathModeMask_ {
++ SpvFPFastMathModeMaskNone = 0,
++ SpvFPFastMathModeNotNaNMask = 0x00000001,
++ SpvFPFastMathModeNotInfMask = 0x00000002,
++ SpvFPFastMathModeNSZMask = 0x00000004,
++ SpvFPFastMathModeAllowRecipMask = 0x00000008,
++ SpvFPFastMathModeFastMask = 0x00000010,
++} SpvFPFastMathModeMask;
++
++typedef enum SpvFPRoundingMode_ {
++ SpvFPRoundingModeRTE = 0,
++ SpvFPRoundingModeRTZ = 1,
++ SpvFPRoundingModeRTP = 2,
++ SpvFPRoundingModeRTN = 3,
++} SpvFPRoundingMode;
++
++typedef enum SpvLinkageType_ {
++ SpvLinkageTypeExport = 0,
++ SpvLinkageTypeImport = 1,
++} SpvLinkageType;
++
++typedef enum SpvAccessQualifier_ {
++ SpvAccessQualifierReadOnly = 0,
++ SpvAccessQualifierWriteOnly = 1,
++ SpvAccessQualifierReadWrite = 2,
++} SpvAccessQualifier;
++
++typedef enum SpvFunctionParameterAttribute_ {
++ SpvFunctionParameterAttributeZext = 0,
++ SpvFunctionParameterAttributeSext = 1,
++ SpvFunctionParameterAttributeByVal = 2,
++ SpvFunctionParameterAttributeSret = 3,
++ SpvFunctionParameterAttributeNoAlias = 4,
++ SpvFunctionParameterAttributeNoCapture = 5,
++ SpvFunctionParameterAttributeNoWrite = 6,
++ SpvFunctionParameterAttributeNoReadWrite = 7,
++} SpvFunctionParameterAttribute;
++
++typedef enum SpvDecoration_ {
++ SpvDecorationRelaxedPrecision = 0,
++ SpvDecorationSpecId = 1,
++ SpvDecorationBlock = 2,
++ SpvDecorationBufferBlock = 3,
++ SpvDecorationRowMajor = 4,
++ SpvDecorationColMajor = 5,
++ SpvDecorationArrayStride = 6,
++ SpvDecorationMatrixStride = 7,
++ SpvDecorationGLSLShared = 8,
++ SpvDecorationGLSLPacked = 9,
++ SpvDecorationCPacked = 10,
++ SpvDecorationBuiltIn = 11,
++ SpvDecorationNoPerspective = 13,
++ SpvDecorationFlat = 14,
++ SpvDecorationPatch = 15,
++ SpvDecorationCentroid = 16,
++ SpvDecorationSample = 17,
++ SpvDecorationInvariant = 18,
++ SpvDecorationRestrict = 19,
++ SpvDecorationAliased = 20,
++ SpvDecorationVolatile = 21,
++ SpvDecorationConstant = 22,
++ SpvDecorationCoherent = 23,
++ SpvDecorationNonWritable = 24,
++ SpvDecorationNonReadable = 25,
++ SpvDecorationUniform = 26,
++ SpvDecorationSaturatedConversion = 28,
++ SpvDecorationStream = 29,
++ SpvDecorationLocation = 30,
++ SpvDecorationComponent = 31,
++ SpvDecorationIndex = 32,
++ SpvDecorationBinding = 33,
++ SpvDecorationDescriptorSet = 34,
++ SpvDecorationOffset = 35,
++ SpvDecorationXfbBuffer = 36,
++ SpvDecorationXfbStride = 37,
++ SpvDecorationFuncParamAttr = 38,
++ SpvDecorationFPRoundingMode = 39,
++ SpvDecorationFPFastMathMode = 40,
++ SpvDecorationLinkageAttributes = 41,
++ SpvDecorationNoContraction = 42,
++ SpvDecorationInputAttachmentIndex = 43,
++ SpvDecorationAlignment = 44,
++} SpvDecoration;
++
++typedef enum SpvBuiltIn_ {
++ SpvBuiltInPosition = 0,
++ SpvBuiltInPointSize = 1,
++ SpvBuiltInClipDistance = 3,
++ SpvBuiltInCullDistance = 4,
++ SpvBuiltInVertexId = 5,
++ SpvBuiltInInstanceId = 6,
++ SpvBuiltInPrimitiveId = 7,
++ SpvBuiltInInvocationId = 8,
++ SpvBuiltInLayer = 9,
++ SpvBuiltInViewportIndex = 10,
++ SpvBuiltInTessLevelOuter = 11,
++ SpvBuiltInTessLevelInner = 12,
++ SpvBuiltInTessCoord = 13,
++ SpvBuiltInPatchVertices = 14,
++ SpvBuiltInFragCoord = 15,
++ SpvBuiltInPointCoord = 16,
++ SpvBuiltInFrontFacing = 17,
++ SpvBuiltInSampleId = 18,
++ SpvBuiltInSamplePosition = 19,
++ SpvBuiltInSampleMask = 20,
++ SpvBuiltInFragDepth = 22,
++ SpvBuiltInHelperInvocation = 23,
++ SpvBuiltInNumWorkgroups = 24,
++ SpvBuiltInWorkgroupSize = 25,
++ SpvBuiltInWorkgroupId = 26,
++ SpvBuiltInLocalInvocationId = 27,
++ SpvBuiltInGlobalInvocationId = 28,
++ SpvBuiltInLocalInvocationIndex = 29,
++ SpvBuiltInWorkDim = 30,
++ SpvBuiltInGlobalSize = 31,
++ SpvBuiltInEnqueuedWorkgroupSize = 32,
++ SpvBuiltInGlobalOffset = 33,
++ SpvBuiltInGlobalLinearId = 34,
++ SpvBuiltInSubgroupSize = 36,
++ SpvBuiltInSubgroupMaxSize = 37,
++ SpvBuiltInNumSubgroups = 38,
++ SpvBuiltInNumEnqueuedSubgroups = 39,
++ SpvBuiltInSubgroupId = 40,
++ SpvBuiltInSubgroupLocalInvocationId = 41,
++ SpvBuiltInVertexIndex = 42,
++ SpvBuiltInInstanceIndex = 43,
++} SpvBuiltIn;
++
++typedef enum SpvSelectionControlShift_ {
++ SpvSelectionControlFlattenShift = 0,
++ SpvSelectionControlDontFlattenShift = 1,
++} SpvSelectionControlShift;
++
++typedef enum SpvSelectionControlMask_ {
++ SpvSelectionControlMaskNone = 0,
++ SpvSelectionControlFlattenMask = 0x00000001,
++ SpvSelectionControlDontFlattenMask = 0x00000002,
++} SpvSelectionControlMask;
++
++typedef enum SpvLoopControlShift_ {
++ SpvLoopControlUnrollShift = 0,
++ SpvLoopControlDontUnrollShift = 1,
++} SpvLoopControlShift;
++
++typedef enum SpvLoopControlMask_ {
++ SpvLoopControlMaskNone = 0,
++ SpvLoopControlUnrollMask = 0x00000001,
++ SpvLoopControlDontUnrollMask = 0x00000002,
++} SpvLoopControlMask;
++
++typedef enum SpvFunctionControlShift_ {
++ SpvFunctionControlInlineShift = 0,
++ SpvFunctionControlDontInlineShift = 1,
++ SpvFunctionControlPureShift = 2,
++ SpvFunctionControlConstShift = 3,
++} SpvFunctionControlShift;
++
++typedef enum SpvFunctionControlMask_ {
++ SpvFunctionControlMaskNone = 0,
++ SpvFunctionControlInlineMask = 0x00000001,
++ SpvFunctionControlDontInlineMask = 0x00000002,
++ SpvFunctionControlPureMask = 0x00000004,
++ SpvFunctionControlConstMask = 0x00000008,
++} SpvFunctionControlMask;
++
++typedef enum SpvMemorySemanticsShift_ {
++ SpvMemorySemanticsAcquireShift = 1,
++ SpvMemorySemanticsReleaseShift = 2,
++ SpvMemorySemanticsAcquireReleaseShift = 3,
++ SpvMemorySemanticsSequentiallyConsistentShift = 4,
++ SpvMemorySemanticsUniformMemoryShift = 6,
++ SpvMemorySemanticsSubgroupMemoryShift = 7,
++ SpvMemorySemanticsWorkgroupMemoryShift = 8,
++ SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
++ SpvMemorySemanticsAtomicCounterMemoryShift = 10,
++ SpvMemorySemanticsImageMemoryShift = 11,
++} SpvMemorySemanticsShift;
++
++typedef enum SpvMemorySemanticsMask_ {
++ SpvMemorySemanticsMaskNone = 0,
++ SpvMemorySemanticsAcquireMask = 0x00000002,
++ SpvMemorySemanticsReleaseMask = 0x00000004,
++ SpvMemorySemanticsAcquireReleaseMask = 0x00000008,
++ SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010,
++ SpvMemorySemanticsUniformMemoryMask = 0x00000040,
++ SpvMemorySemanticsSubgroupMemoryMask = 0x00000080,
++ SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100,
++ SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
++ SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
++ SpvMemorySemanticsImageMemoryMask = 0x00000800,
++} SpvMemorySemanticsMask;
++
++typedef enum SpvMemoryAccessShift_ {
++ SpvMemoryAccessVolatileShift = 0,
++ SpvMemoryAccessAlignedShift = 1,
++ SpvMemoryAccessNontemporalShift = 2,
++} SpvMemoryAccessShift;
++
++typedef enum SpvMemoryAccessMask_ {
++ SpvMemoryAccessMaskNone = 0,
++ SpvMemoryAccessVolatileMask = 0x00000001,
++ SpvMemoryAccessAlignedMask = 0x00000002,
++ SpvMemoryAccessNontemporalMask = 0x00000004,
++} SpvMemoryAccessMask;
++
++typedef enum SpvScope_ {
++ SpvScopeCrossDevice = 0,
++ SpvScopeDevice = 1,
++ SpvScopeWorkgroup = 2,
++ SpvScopeSubgroup = 3,
++ SpvScopeInvocation = 4,
++} SpvScope;
++
++typedef enum SpvGroupOperation_ {
++ SpvGroupOperationReduce = 0,
++ SpvGroupOperationInclusiveScan = 1,
++ SpvGroupOperationExclusiveScan = 2,
++} SpvGroupOperation;
++
++typedef enum SpvKernelEnqueueFlags_ {
++ SpvKernelEnqueueFlagsNoWait = 0,
++ SpvKernelEnqueueFlagsWaitKernel = 1,
++ SpvKernelEnqueueFlagsWaitWorkGroup = 2,
++} SpvKernelEnqueueFlags;
++
++typedef enum SpvKernelProfilingInfoShift_ {
++ SpvKernelProfilingInfoCmdExecTimeShift = 0,
++} SpvKernelProfilingInfoShift;
++
++typedef enum SpvKernelProfilingInfoMask_ {
++ SpvKernelProfilingInfoMaskNone = 0,
++ SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001,
++} SpvKernelProfilingInfoMask;
++
++typedef enum SpvCapability_ {
++ SpvCapabilityMatrix = 0,
++ SpvCapabilityShader = 1,
++ SpvCapabilityGeometry = 2,
++ SpvCapabilityTessellation = 3,
++ SpvCapabilityAddresses = 4,
++ SpvCapabilityLinkage = 5,
++ SpvCapabilityKernel = 6,
++ SpvCapabilityVector16 = 7,
++ SpvCapabilityFloat16Buffer = 8,
++ SpvCapabilityFloat16 = 9,
++ SpvCapabilityFloat64 = 10,
++ SpvCapabilityInt64 = 11,
++ SpvCapabilityInt64Atomics = 12,
++ SpvCapabilityImageBasic = 13,
++ SpvCapabilityImageReadWrite = 14,
++ SpvCapabilityImageMipmap = 15,
++ SpvCapabilityPipes = 17,
++ SpvCapabilityGroups = 18,
++ SpvCapabilityDeviceEnqueue = 19,
++ SpvCapabilityLiteralSampler = 20,
++ SpvCapabilityAtomicStorage = 21,
++ SpvCapabilityInt16 = 22,
++ SpvCapabilityTessellationPointSize = 23,
++ SpvCapabilityGeometryPointSize = 24,
++ SpvCapabilityImageGatherExtended = 25,
++ SpvCapabilityStorageImageMultisample = 27,
++ SpvCapabilityUniformBufferArrayDynamicIndexing = 28,
++ SpvCapabilitySampledImageArrayDynamicIndexing = 29,
++ SpvCapabilityStorageBufferArrayDynamicIndexing = 30,
++ SpvCapabilityStorageImageArrayDynamicIndexing = 31,
++ SpvCapabilityClipDistance = 32,
++ SpvCapabilityCullDistance = 33,
++ SpvCapabilityImageCubeArray = 34,
++ SpvCapabilitySampleRateShading = 35,
++ SpvCapabilityImageRect = 36,
++ SpvCapabilitySampledRect = 37,
++ SpvCapabilityGenericPointer = 38,
++ SpvCapabilityInt8 = 39,
++ SpvCapabilityInputAttachment = 40,
++ SpvCapabilitySparseResidency = 41,
++ SpvCapabilityMinLod = 42,
++ SpvCapabilitySampled1D = 43,
++ SpvCapabilityImage1D = 44,
++ SpvCapabilitySampledCubeArray = 45,
++ SpvCapabilitySampledBuffer = 46,
++ SpvCapabilityImageBuffer = 47,
++ SpvCapabilityImageMSArray = 48,
++ SpvCapabilityStorageImageExtendedFormats = 49,
++ SpvCapabilityImageQuery = 50,
++ SpvCapabilityDerivativeControl = 51,
++ SpvCapabilityInterpolationFunction = 52,
++ SpvCapabilityTransformFeedback = 53,
++ SpvCapabilityGeometryStreams = 54,
++ SpvCapabilityStorageImageReadWithoutFormat = 55,
++ SpvCapabilityStorageImageWriteWithoutFormat = 56,
++ SpvCapabilityMultiViewport = 57,
++} SpvCapability;
++
++typedef enum SpvOp_ {
++ SpvOpNop = 0,
++ SpvOpUndef = 1,
++ SpvOpSourceContinued = 2,
++ SpvOpSource = 3,
++ SpvOpSourceExtension = 4,
++ SpvOpName = 5,
++ SpvOpMemberName = 6,
++ SpvOpString = 7,
++ SpvOpLine = 8,
++ SpvOpExtension = 10,
++ SpvOpExtInstImport = 11,
++ SpvOpExtInst = 12,
++ SpvOpMemoryModel = 14,
++ SpvOpEntryPoint = 15,
++ SpvOpExecutionMode = 16,
++ SpvOpCapability = 17,
++ SpvOpTypeVoid = 19,
++ SpvOpTypeBool = 20,
++ SpvOpTypeInt = 21,
++ SpvOpTypeFloat = 22,
++ SpvOpTypeVector = 23,
++ SpvOpTypeMatrix = 24,
++ SpvOpTypeImage = 25,
++ SpvOpTypeSampler = 26,
++ SpvOpTypeSampledImage = 27,
++ SpvOpTypeArray = 28,
++ SpvOpTypeRuntimeArray = 29,
++ SpvOpTypeStruct = 30,
++ SpvOpTypeOpaque = 31,
++ SpvOpTypePointer = 32,
++ SpvOpTypeFunction = 33,
++ SpvOpTypeEvent = 34,
++ SpvOpTypeDeviceEvent = 35,
++ SpvOpTypeReserveId = 36,
++ SpvOpTypeQueue = 37,
++ SpvOpTypePipe = 38,
++ SpvOpTypeForwardPointer = 39,
++ SpvOpConstantTrue = 41,
++ SpvOpConstantFalse = 42,
++ SpvOpConstant = 43,
++ SpvOpConstantComposite = 44,
++ SpvOpConstantSampler = 45,
++ SpvOpConstantNull = 46,
++ SpvOpSpecConstantTrue = 48,
++ SpvOpSpecConstantFalse = 49,
++ SpvOpSpecConstant = 50,
++ SpvOpSpecConstantComposite = 51,
++ SpvOpSpecConstantOp = 52,
++ SpvOpFunction = 54,
++ SpvOpFunctionParameter = 55,
++ SpvOpFunctionEnd = 56,
++ SpvOpFunctionCall = 57,
++ SpvOpVariable = 59,
++ SpvOpImageTexelPointer = 60,
++ SpvOpLoad = 61,
++ SpvOpStore = 62,
++ SpvOpCopyMemory = 63,
++ SpvOpCopyMemorySized = 64,
++ SpvOpAccessChain = 65,
++ SpvOpInBoundsAccessChain = 66,
++ SpvOpPtrAccessChain = 67,
++ SpvOpArrayLength = 68,
++ SpvOpGenericPtrMemSemantics = 69,
++ SpvOpInBoundsPtrAccessChain = 70,
++ SpvOpDecorate = 71,
++ SpvOpMemberDecorate = 72,
++ SpvOpDecorationGroup = 73,
++ SpvOpGroupDecorate = 74,
++ SpvOpGroupMemberDecorate = 75,
++ SpvOpVectorExtractDynamic = 77,
++ SpvOpVectorInsertDynamic = 78,
++ SpvOpVectorShuffle = 79,
++ SpvOpCompositeConstruct = 80,
++ SpvOpCompositeExtract = 81,
++ SpvOpCompositeInsert = 82,
++ SpvOpCopyObject = 83,
++ SpvOpTranspose = 84,
++ SpvOpSampledImage = 86,
++ SpvOpImageSampleImplicitLod = 87,
++ SpvOpImageSampleExplicitLod = 88,
++ SpvOpImageSampleDrefImplicitLod = 89,
++ SpvOpImageSampleDrefExplicitLod = 90,
++ SpvOpImageSampleProjImplicitLod = 91,
++ SpvOpImageSampleProjExplicitLod = 92,
++ SpvOpImageSampleProjDrefImplicitLod = 93,
++ SpvOpImageSampleProjDrefExplicitLod = 94,
++ SpvOpImageFetch = 95,
++ SpvOpImageGather = 96,
++ SpvOpImageDrefGather = 97,
++ SpvOpImageRead = 98,
++ SpvOpImageWrite = 99,
++ SpvOpImage = 100,
++ SpvOpImageQueryFormat = 101,
++ SpvOpImageQueryOrder = 102,
++ SpvOpImageQuerySizeLod = 103,
++ SpvOpImageQuerySize = 104,
++ SpvOpImageQueryLod = 105,
++ SpvOpImageQueryLevels = 106,
++ SpvOpImageQuerySamples = 107,
++ SpvOpConvertFToU = 109,
++ SpvOpConvertFToS = 110,
++ SpvOpConvertSToF = 111,
++ SpvOpConvertUToF = 112,
++ SpvOpUConvert = 113,
++ SpvOpSConvert = 114,
++ SpvOpFConvert = 115,
++ SpvOpQuantizeToF16 = 116,
++ SpvOpConvertPtrToU = 117,
++ SpvOpSatConvertSToU = 118,
++ SpvOpSatConvertUToS = 119,
++ SpvOpConvertUToPtr = 120,
++ SpvOpPtrCastToGeneric = 121,
++ SpvOpGenericCastToPtr = 122,
++ SpvOpGenericCastToPtrExplicit = 123,
++ SpvOpBitcast = 124,
++ SpvOpSNegate = 126,
++ SpvOpFNegate = 127,
++ SpvOpIAdd = 128,
++ SpvOpFAdd = 129,
++ SpvOpISub = 130,
++ SpvOpFSub = 131,
++ SpvOpIMul = 132,
++ SpvOpFMul = 133,
++ SpvOpUDiv = 134,
++ SpvOpSDiv = 135,
++ SpvOpFDiv = 136,
++ SpvOpUMod = 137,
++ SpvOpSRem = 138,
++ SpvOpSMod = 139,
++ SpvOpFRem = 140,
++ SpvOpFMod = 141,
++ SpvOpVectorTimesScalar = 142,
++ SpvOpMatrixTimesScalar = 143,
++ SpvOpVectorTimesMatrix = 144,
++ SpvOpMatrixTimesVector = 145,
++ SpvOpMatrixTimesMatrix = 146,
++ SpvOpOuterProduct = 147,
++ SpvOpDot = 148,
++ SpvOpIAddCarry = 149,
++ SpvOpISubBorrow = 150,
++ SpvOpUMulExtended = 151,
++ SpvOpSMulExtended = 152,
++ SpvOpAny = 154,
++ SpvOpAll = 155,
++ SpvOpIsNan = 156,
++ SpvOpIsInf = 157,
++ SpvOpIsFinite = 158,
++ SpvOpIsNormal = 159,
++ SpvOpSignBitSet = 160,
++ SpvOpLessOrGreater = 161,
++ SpvOpOrdered = 162,
++ SpvOpUnordered = 163,
++ SpvOpLogicalEqual = 164,
++ SpvOpLogicalNotEqual = 165,
++ SpvOpLogicalOr = 166,
++ SpvOpLogicalAnd = 167,
++ SpvOpLogicalNot = 168,
++ SpvOpSelect = 169,
++ SpvOpIEqual = 170,
++ SpvOpINotEqual = 171,
++ SpvOpUGreaterThan = 172,
++ SpvOpSGreaterThan = 173,
++ SpvOpUGreaterThanEqual = 174,
++ SpvOpSGreaterThanEqual = 175,
++ SpvOpULessThan = 176,
++ SpvOpSLessThan = 177,
++ SpvOpULessThanEqual = 178,
++ SpvOpSLessThanEqual = 179,
++ SpvOpFOrdEqual = 180,
++ SpvOpFUnordEqual = 181,
++ SpvOpFOrdNotEqual = 182,
++ SpvOpFUnordNotEqual = 183,
++ SpvOpFOrdLessThan = 184,
++ SpvOpFUnordLessThan = 185,
++ SpvOpFOrdGreaterThan = 186,
++ SpvOpFUnordGreaterThan = 187,
++ SpvOpFOrdLessThanEqual = 188,
++ SpvOpFUnordLessThanEqual = 189,
++ SpvOpFOrdGreaterThanEqual = 190,
++ SpvOpFUnordGreaterThanEqual = 191,
++ SpvOpShiftRightLogical = 194,
++ SpvOpShiftRightArithmetic = 195,
++ SpvOpShiftLeftLogical = 196,
++ SpvOpBitwiseOr = 197,
++ SpvOpBitwiseXor = 198,
++ SpvOpBitwiseAnd = 199,
++ SpvOpNot = 200,
++ SpvOpBitFieldInsert = 201,
++ SpvOpBitFieldSExtract = 202,
++ SpvOpBitFieldUExtract = 203,
++ SpvOpBitReverse = 204,
++ SpvOpBitCount = 205,
++ SpvOpDPdx = 207,
++ SpvOpDPdy = 208,
++ SpvOpFwidth = 209,
++ SpvOpDPdxFine = 210,
++ SpvOpDPdyFine = 211,
++ SpvOpFwidthFine = 212,
++ SpvOpDPdxCoarse = 213,
++ SpvOpDPdyCoarse = 214,
++ SpvOpFwidthCoarse = 215,
++ SpvOpEmitVertex = 218,
++ SpvOpEndPrimitive = 219,
++ SpvOpEmitStreamVertex = 220,
++ SpvOpEndStreamPrimitive = 221,
++ SpvOpControlBarrier = 224,
++ SpvOpMemoryBarrier = 225,
++ SpvOpAtomicLoad = 227,
++ SpvOpAtomicStore = 228,
++ SpvOpAtomicExchange = 229,
++ SpvOpAtomicCompareExchange = 230,
++ SpvOpAtomicCompareExchangeWeak = 231,
++ SpvOpAtomicIIncrement = 232,
++ SpvOpAtomicIDecrement = 233,
++ SpvOpAtomicIAdd = 234,
++ SpvOpAtomicISub = 235,
++ SpvOpAtomicSMin = 236,
++ SpvOpAtomicUMin = 237,
++ SpvOpAtomicSMax = 238,
++ SpvOpAtomicUMax = 239,
++ SpvOpAtomicAnd = 240,
++ SpvOpAtomicOr = 241,
++ SpvOpAtomicXor = 242,
++ SpvOpPhi = 245,
++ SpvOpLoopMerge = 246,
++ SpvOpSelectionMerge = 247,
++ SpvOpLabel = 248,
++ SpvOpBranch = 249,
++ SpvOpBranchConditional = 250,
++ SpvOpSwitch = 251,
++ SpvOpKill = 252,
++ SpvOpReturn = 253,
++ SpvOpReturnValue = 254,
++ SpvOpUnreachable = 255,
++ SpvOpLifetimeStart = 256,
++ SpvOpLifetimeStop = 257,
++ SpvOpGroupAsyncCopy = 259,
++ SpvOpGroupWaitEvents = 260,
++ SpvOpGroupAll = 261,
++ SpvOpGroupAny = 262,
++ SpvOpGroupBroadcast = 263,
++ SpvOpGroupIAdd = 264,
++ SpvOpGroupFAdd = 265,
++ SpvOpGroupFMin = 266,
++ SpvOpGroupUMin = 267,
++ SpvOpGroupSMin = 268,
++ SpvOpGroupFMax = 269,
++ SpvOpGroupUMax = 270,
++ SpvOpGroupSMax = 271,
++ SpvOpReadPipe = 274,
++ SpvOpWritePipe = 275,
++ SpvOpReservedReadPipe = 276,
++ SpvOpReservedWritePipe = 277,
++ SpvOpReserveReadPipePackets = 278,
++ SpvOpReserveWritePipePackets = 279,
++ SpvOpCommitReadPipe = 280,
++ SpvOpCommitWritePipe = 281,
++ SpvOpIsValidReserveId = 282,
++ SpvOpGetNumPipePackets = 283,
++ SpvOpGetMaxPipePackets = 284,
++ SpvOpGroupReserveReadPipePackets = 285,
++ SpvOpGroupReserveWritePipePackets = 286,
++ SpvOpGroupCommitReadPipe = 287,
++ SpvOpGroupCommitWritePipe = 288,
++ SpvOpEnqueueMarker = 291,
++ SpvOpEnqueueKernel = 292,
++ SpvOpGetKernelNDrangeSubGroupCount = 293,
++ SpvOpGetKernelNDrangeMaxSubGroupSize = 294,
++ SpvOpGetKernelWorkGroupSize = 295,
++ SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296,
++ SpvOpRetainEvent = 297,
++ SpvOpReleaseEvent = 298,
++ SpvOpCreateUserEvent = 299,
++ SpvOpIsValidEvent = 300,
++ SpvOpSetUserEventStatus = 301,
++ SpvOpCaptureEventProfilingInfo = 302,
++ SpvOpGetDefaultQueue = 303,
++ SpvOpBuildNDRange = 304,
++ SpvOpImageSparseSampleImplicitLod = 305,
++ SpvOpImageSparseSampleExplicitLod = 306,
++ SpvOpImageSparseSampleDrefImplicitLod = 307,
++ SpvOpImageSparseSampleDrefExplicitLod = 308,
++ SpvOpImageSparseSampleProjImplicitLod = 309,
++ SpvOpImageSparseSampleProjExplicitLod = 310,
++ SpvOpImageSparseSampleProjDrefImplicitLod = 311,
++ SpvOpImageSparseSampleProjDrefExplicitLod = 312,
++ SpvOpImageSparseFetch = 313,
++ SpvOpImageSparseGather = 314,
++ SpvOpImageSparseDrefGather = 315,
++ SpvOpImageSparseTexelsResident = 316,
++ SpvOpNoLine = 317,
++ SpvOpAtomicFlagTestAndSet = 318,
++ SpvOpAtomicFlagClear = 319,
++} SpvOp;
++
++#endif // #ifndef spirv_H
++
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ * Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "vtn_private.h"
++#include "nir/nir_vla.h"
++#include "nir/nir_control_flow.h"
++#include "nir/nir_constant_expressions.h"
++
++static struct vtn_ssa_value *
++vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
++{
++ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
++ val->type = type;
++
++ if (glsl_type_is_vector_or_scalar(type)) {
++ unsigned num_components = glsl_get_vector_elements(val->type);
++ nir_ssa_undef_instr *undef =
++ nir_ssa_undef_instr_create(b->shader, num_components);
++
++ nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr);
++ val->def = &undef->def;
++ } else {
++ unsigned elems = glsl_get_length(val->type);
++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++ if (glsl_type_is_matrix(type)) {
++ const struct glsl_type *elem_type =
++ glsl_vector_type(glsl_get_base_type(type),
++ glsl_get_vector_elements(type));
++
++ for (unsigned i = 0; i < elems; i++)
++ val->elems[i] = vtn_undef_ssa_value(b, elem_type);
++ } else if (glsl_type_is_array(type)) {
++ const struct glsl_type *elem_type = glsl_get_array_element(type);
++ for (unsigned i = 0; i < elems; i++)
++ val->elems[i] = vtn_undef_ssa_value(b, elem_type);
++ } else {
++ for (unsigned i = 0; i < elems; i++) {
++ const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
++ val->elems[i] = vtn_undef_ssa_value(b, elem_type);
++ }
++ }
++ }
++
++ return val;
++}
++
++static struct vtn_ssa_value *
++vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
++ const struct glsl_type *type)
++{
++ struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
++
++ if (entry)
++ return entry->data;
++
++ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
++ val->type = type;
++
++ switch (glsl_get_base_type(type)) {
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_BOOL:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_DOUBLE:
++ if (glsl_type_is_vector_or_scalar(type)) {
++ unsigned num_components = glsl_get_vector_elements(val->type);
++ nir_load_const_instr *load =
++ nir_load_const_instr_create(b->shader, num_components);
++
++ for (unsigned i = 0; i < num_components; i++)
++ load->value.u[i] = constant->value.u[i];
++
++ nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
++ val->def = &load->def;
++ } else {
++ assert(glsl_type_is_matrix(type));
++ unsigned rows = glsl_get_vector_elements(val->type);
++ unsigned columns = glsl_get_matrix_columns(val->type);
++ val->elems = ralloc_array(b, struct vtn_ssa_value *, columns);
++
++ for (unsigned i = 0; i < columns; i++) {
++ struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
++ col_val->type = glsl_get_column_type(val->type);
++ nir_load_const_instr *load =
++ nir_load_const_instr_create(b->shader, rows);
++
++ for (unsigned j = 0; j < rows; j++)
++ load->value.u[j] = constant->value.u[rows * i + j];
++
++ nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
++ col_val->def = &load->def;
++
++ val->elems[i] = col_val;
++ }
++ }
++ break;
++
++ case GLSL_TYPE_ARRAY: {
++ unsigned elems = glsl_get_length(val->type);
++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++ const struct glsl_type *elem_type = glsl_get_array_element(val->type);
++ for (unsigned i = 0; i < elems; i++)
++ val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
++ elem_type);
++ break;
++ }
++
++ case GLSL_TYPE_STRUCT: {
++ unsigned elems = glsl_get_length(val->type);
++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++ for (unsigned i = 0; i < elems; i++) {
++ const struct glsl_type *elem_type =
++ glsl_get_struct_field(val->type, i);
++ val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
++ elem_type);
++ }
++ break;
++ }
++
++ default:
++ unreachable("bad constant type");
++ }
++
++ return val;
++}
++
++struct vtn_ssa_value *
++vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
++{
++ struct vtn_value *val = vtn_untyped_value(b, value_id);
++ switch (val->value_type) {
++ case vtn_value_type_undef:
++ return vtn_undef_ssa_value(b, val->type->type);
++
++ case vtn_value_type_constant:
++ return vtn_const_ssa_value(b, val->constant, val->const_type);
++
++ case vtn_value_type_ssa:
++ return val->ssa;
++
++ case vtn_value_type_access_chain:
++ /* This is needed for function parameters */
++ return vtn_variable_load(b, val->access_chain);
++
++ default:
++ unreachable("Invalid type for an SSA value");
++ }
++}
++
++static char *
++vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
++ unsigned word_count, unsigned *words_used)
++{
++ char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words));
++ if (words_used) {
++ /* Ammount of space taken by the string (including the null) */
++ unsigned len = strlen(dup) + 1;
++ *words_used = DIV_ROUND_UP(len, sizeof(*words));
++ }
++ return dup;
++}
++
++const uint32_t *
++vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
++ const uint32_t *end, vtn_instruction_handler handler)
++{
++ b->file = NULL;
++ b->line = -1;
++ b->col = -1;
++
++ const uint32_t *w = start;
++ while (w < end) {
++ SpvOp opcode = w[0] & SpvOpCodeMask;
++ unsigned count = w[0] >> SpvWordCountShift;
++ assert(count >= 1 && w + count <= end);
++
++ switch (opcode) {
++ case SpvOpNop:
++ break; /* Do nothing */
++
++ case SpvOpLine:
++ b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
++ b->line = w[2];
++ b->col = w[3];
++ break;
++
++ case SpvOpNoLine:
++ b->file = NULL;
++ b->line = -1;
++ b->col = -1;
++ break;
++
++ default:
++ if (!handler(b, opcode, w, count))
++ return w;
++ break;
++ }
++
++ w += count;
++ }
++ assert(w == end);
++ return w;
++}
++
++static void
++vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ switch (opcode) {
++ case SpvOpExtInstImport: {
++ struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
++ if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
++ val->ext_handler = vtn_handle_glsl450_instruction;
++ } else {
++ assert(!"Unsupported extension");
++ }
++ break;
++ }
++
++ case SpvOpExtInst: {
++ struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
++ bool handled = val->ext_handler(b, w[4], w, count);
++ (void)handled;
++ assert(handled);
++ break;
++ }
++
++ default:
++ unreachable("Unhandled opcode");
++ }
++}
++
++static void
++_foreach_decoration_helper(struct vtn_builder *b,
++ struct vtn_value *base_value,
++ int parent_member,
++ struct vtn_value *value,
++ vtn_decoration_foreach_cb cb, void *data)
++{
++ for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
++ int member;
++ if (dec->scope == VTN_DEC_DECORATION) {
++ member = parent_member;
++ } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
++ assert(parent_member == -1);
++ member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
++ } else {
++ /* Not a decoration */
++ continue;
++ }
++
++ if (dec->group) {
++ assert(dec->group->value_type == vtn_value_type_decoration_group);
++ _foreach_decoration_helper(b, base_value, member, dec->group,
++ cb, data);
++ } else {
++ cb(b, base_value, member, dec, data);
++ }
++ }
++}
++
++/** Iterates (recursively if needed) over all of the decorations on a value
++ *
++ * This function iterates over all of the decorations applied to a given
++ * value. If it encounters a decoration group, it recurses into the group
++ * and iterates over all of those decorations as well.
++ */
++void
++vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
++ vtn_decoration_foreach_cb cb, void *data)
++{
++ _foreach_decoration_helper(b, value, -1, value, cb, data);
++}
++
++void
++vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
++ vtn_execution_mode_foreach_cb cb, void *data)
++{
++ for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
++ if (dec->scope != VTN_DEC_EXECUTION_MODE)
++ continue;
++
++ assert(dec->group == NULL);
++ cb(b, value, dec, data);
++ }
++}
++
++static void
++vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ const uint32_t *w_end = w + count;
++ const uint32_t target = w[1];
++ w += 2;
++
++ switch (opcode) {
++ case SpvOpDecorationGroup:
++ vtn_push_value(b, target, vtn_value_type_decoration_group);
++ break;
++
++ case SpvOpDecorate:
++ case SpvOpMemberDecorate:
++ case SpvOpExecutionMode: {
++ struct vtn_value *val = &b->values[target];
++
++ struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
++ switch (opcode) {
++ case SpvOpDecorate:
++ dec->scope = VTN_DEC_DECORATION;
++ break;
++ case SpvOpMemberDecorate:
++ dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
++ break;
++ case SpvOpExecutionMode:
++ dec->scope = VTN_DEC_EXECUTION_MODE;
++ break;
++ default:
++ unreachable("Invalid decoration opcode");
++ }
++ dec->decoration = *(w++);
++ dec->literals = w;
++
++ /* Link into the list */
++ dec->next = val->decoration;
++ val->decoration = dec;
++ break;
++ }
++
++ case SpvOpGroupMemberDecorate:
++ case SpvOpGroupDecorate: {
++ struct vtn_value *group =
++ vtn_value(b, target, vtn_value_type_decoration_group);
++
++ for (; w < w_end; w++) {
++ struct vtn_value *val = vtn_untyped_value(b, *w);
++ struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
++
++ dec->group = group;
++ if (opcode == SpvOpGroupDecorate) {
++ dec->scope = VTN_DEC_DECORATION;
++ } else {
++ dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
++ }
++
++ /* Link into the list */
++ dec->next = val->decoration;
++ val->decoration = dec;
++ }
++ break;
++ }
++
++ default:
++ unreachable("Unhandled opcode");
++ }
++}
++
++struct member_decoration_ctx {
++ struct glsl_struct_field *fields;
++ struct vtn_type *type;
++};
++
++/* does a shallow copy of a vtn_type */
++
++static struct vtn_type *
++vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
++{
++ struct vtn_type *dest = ralloc(b, struct vtn_type);
++ dest->type = src->type;
++ dest->is_builtin = src->is_builtin;
++ if (src->is_builtin)
++ dest->builtin = src->builtin;
++
++ if (!glsl_type_is_scalar(src->type)) {
++ switch (glsl_get_base_type(src->type)) {
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_BOOL:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_DOUBLE:
++ case GLSL_TYPE_ARRAY:
++ dest->row_major = src->row_major;
++ dest->stride = src->stride;
++ dest->array_element = src->array_element;
++ break;
++
++ case GLSL_TYPE_STRUCT: {
++ unsigned elems = glsl_get_length(src->type);
++
++ dest->members = ralloc_array(b, struct vtn_type *, elems);
++ memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *));
++
++ dest->offsets = ralloc_array(b, unsigned, elems);
++ memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned));
++ break;
++ }
++
++ default:
++ unreachable("unhandled type");
++ }
++ }
++
++ return dest;
++}
++
++static struct vtn_type *
++mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
++{
++ type->members[member] = vtn_type_copy(b, type->members[member]);
++ type = type->members[member];
++
++ /* We may have an array of matrices.... Oh, joy! */
++ while (glsl_type_is_array(type->type)) {
++ type->array_element = vtn_type_copy(b, type->array_element);
++ type = type->array_element;
++ }
++
++ assert(glsl_type_is_matrix(type->type));
++
++ return type;
++}
++
++static void
++struct_member_decoration_cb(struct vtn_builder *b,
++ struct vtn_value *val, int member,
++ const struct vtn_decoration *dec, void *void_ctx)
++{
++ struct member_decoration_ctx *ctx = void_ctx;
++
++ if (member < 0)
++ return;
++
++ switch (dec->decoration) {
++ case SpvDecorationRelaxedPrecision:
++ break; /* FIXME: Do nothing with this for now. */
++ case SpvDecorationNoPerspective:
++ ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
++ break;
++ case SpvDecorationFlat:
++ ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT;
++ break;
++ case SpvDecorationCentroid:
++ ctx->fields[member].centroid = true;
++ break;
++ case SpvDecorationSample:
++ ctx->fields[member].sample = true;
++ break;
++ case SpvDecorationLocation:
++ ctx->fields[member].location = dec->literals[0];
++ break;
++ case SpvDecorationBuiltIn:
++ ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
++ ctx->type->members[member]->is_builtin = true;
++ ctx->type->members[member]->builtin = dec->literals[0];
++ ctx->type->builtin_block = true;
++ break;
++ case SpvDecorationOffset:
++ ctx->type->offsets[member] = dec->literals[0];
++ break;
++ case SpvDecorationMatrixStride:
++ mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0];
++ break;
++ case SpvDecorationColMajor:
++ break; /* Nothing to do here. Column-major is the default. */
++ case SpvDecorationRowMajor:
++ mutable_matrix_member(b, ctx->type, member)->row_major = true;
++ break;
++ default:
++ unreachable("Unhandled member decoration");
++ }
++}
++
++static void
++type_decoration_cb(struct vtn_builder *b,
++ struct vtn_value *val, int member,
++ const struct vtn_decoration *dec, void *ctx)
++{
++ struct vtn_type *type = val->type;
++
++ if (member != -1)
++ return;
++
++ switch (dec->decoration) {
++ case SpvDecorationArrayStride:
++ type->stride = dec->literals[0];
++ break;
++ case SpvDecorationBlock:
++ type->block = true;
++ break;
++ case SpvDecorationBufferBlock:
++ type->buffer_block = true;
++ break;
++ case SpvDecorationGLSLShared:
++ case SpvDecorationGLSLPacked:
++ /* Ignore these, since we get explicit offsets anyways */
++ break;
++
++ case SpvDecorationStream:
++ assert(dec->literals[0] == 0);
++ break;
++
++ default:
++ unreachable("Unhandled type decoration");
++ }
++}
++
++static unsigned
++translate_image_format(SpvImageFormat format)
++{
++ switch (format) {
++ case SpvImageFormatUnknown: return 0; /* GL_NONE */
++ case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */
++ case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */
++ case SpvImageFormatR32f: return 0x822E; /* GL_R32F */
++ case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */
++ case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */
++ case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */
++ case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */
++ case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */
++ case SpvImageFormatR16f: return 0x822D; /* GL_R16F */
++ case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */
++ case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */
++ case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */
++ case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */
++ case SpvImageFormatR16: return 0x822A; /* GL_R16 */
++ case SpvImageFormatR8: return 0x8229; /* GL_R8 */
++ case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */
++ case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */
++ case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */
++ case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */
++ case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */
++ case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */
++ case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */
++ case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */
++ case SpvImageFormatR32i: return 0x8235; /* GL_R32I */
++ case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */
++ case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */
++ case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */
++ case SpvImageFormatR16i: return 0x8233; /* GL_R16I */
++ case SpvImageFormatR8i: return 0x8231; /* GL_R8I */
++ case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */
++ case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */
++ case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */
++ case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */
++ case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */
++ case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */
++ case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */
++ case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */
++ case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */
++ case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */
++ default:
++ assert(!"Invalid image format");
++ return 0;
++ }
++}
++
++static void
++vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type);
++
++ val->type = rzalloc(b, struct vtn_type);
++ val->type->is_builtin = false;
++ val->type->val = val;
++
++ switch (opcode) {
++ case SpvOpTypeVoid:
++ val->type->type = glsl_void_type();
++ break;
++ case SpvOpTypeBool:
++ val->type->type = glsl_bool_type();
++ break;
++ case SpvOpTypeInt:
++ val->type->type = glsl_int_type();
++ break;
++ case SpvOpTypeFloat:
++ val->type->type = glsl_float_type();
++ break;
++
++ case SpvOpTypeVector: {
++ struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
++ unsigned elems = w[3];
++
++ assert(glsl_type_is_scalar(base->type));
++ val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
++
++ /* Vectors implicitly have sizeof(base_type) stride. For now, this
++ * is always 4 bytes. This will have to change if we want to start
++ * supporting doubles or half-floats.
++ */
++ val->type->stride = 4;
++ val->type->array_element = base;
++ break;
++ }
++
++ case SpvOpTypeMatrix: {
++ struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
++ unsigned columns = w[3];
++
++ assert(glsl_type_is_vector(base->type));
++ val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
++ glsl_get_vector_elements(base->type),
++ columns);
++ assert(!glsl_type_is_error(val->type->type));
++ val->type->array_element = base;
++ val->type->row_major = false;
++ val->type->stride = 0;
++ break;
++ }
++
++ case SpvOpTypeRuntimeArray:
++ case SpvOpTypeArray: {
++ struct vtn_type *array_element =
++ vtn_value(b, w[2], vtn_value_type_type)->type;
++
++ unsigned length;
++ if (opcode == SpvOpTypeRuntimeArray) {
++ /* A length of 0 is used to denote unsized arrays */
++ length = 0;
++ } else {
++ length =
++ vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0];
++ }
++
++ val->type->type = glsl_array_type(array_element->type, length);
++ val->type->array_element = array_element;
++ val->type->stride = 0;
++ break;
++ }
++
++ case SpvOpTypeStruct: {
++ unsigned num_fields = count - 2;
++ val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
++ val->type->offsets = ralloc_array(b, unsigned, num_fields);
++
++ NIR_VLA(struct glsl_struct_field, fields, count);
++ for (unsigned i = 0; i < num_fields; i++) {
++ val->type->members[i] =
++ vtn_value(b, w[i + 2], vtn_value_type_type)->type;
++ fields[i] = (struct glsl_struct_field) {
++ .type = val->type->members[i]->type,
++ .name = ralloc_asprintf(b, "field%d", i),
++ .location = -1,
++ };
++ }
++
++ struct member_decoration_ctx ctx = {
++ .fields = fields,
++ .type = val->type
++ };
++
++ vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
++
++ const char *name = val->name ? val->name : "struct";
++
++ val->type->type = glsl_struct_type(fields, num_fields, name);
++ break;
++ }
++
++ case SpvOpTypeFunction: {
++ const struct glsl_type *return_type =
++ vtn_value(b, w[2], vtn_value_type_type)->type->type;
++ NIR_VLA(struct glsl_function_param, params, count - 3);
++ for (unsigned i = 0; i < count - 3; i++) {
++ params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type;
++
++ /* FIXME: */
++ params[i].in = true;
++ params[i].out = true;
++ }
++ val->type->type = glsl_function_type(return_type, params, count - 3);
++ break;
++ }
++
++ case SpvOpTypePointer:
++ /* FIXME: For now, we'll just do the really lame thing and return
++ * the same type. The validator should ensure that the proper number
++ * of dereferences happen
++ */
++ val->type = vtn_value(b, w[3], vtn_value_type_type)->type;
++ break;
++
++ case SpvOpTypeImage: {
++ const struct glsl_type *sampled_type =
++ vtn_value(b, w[2], vtn_value_type_type)->type->type;
++
++ assert(glsl_type_is_vector_or_scalar(sampled_type));
++
++ enum glsl_sampler_dim dim;
++ switch ((SpvDim)w[3]) {
++ case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break;
++ case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break;
++ case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break;
++ case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break;
++ case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break;
++ case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break;
++ default:
++ unreachable("Invalid SPIR-V Sampler dimension");
++ }
++
++ bool is_shadow = w[4];
++ bool is_array = w[5];
++ bool multisampled = w[6];
++ unsigned sampled = w[7];
++ SpvImageFormat format = w[8];
++
++ if (count > 9)
++ val->type->access_qualifier = w[9];
++ else
++ val->type->access_qualifier = SpvAccessQualifierReadWrite;
++
++ assert(!multisampled && "FIXME: Handl multi-sampled textures");
++
++ val->type->image_format = translate_image_format(format);
++
++ if (sampled == 1) {
++ val->type->type = glsl_sampler_type(dim, is_shadow, is_array,
++ glsl_get_base_type(sampled_type));
++ } else if (sampled == 2) {
++ assert(format);
++ assert(!is_shadow);
++ val->type->type = glsl_image_type(dim, is_array,
++ glsl_get_base_type(sampled_type));
++ } else {
++ assert(!"We need to know if the image will be sampled");
++ }
++ break;
++ }
++
++ case SpvOpTypeSampledImage:
++ val->type = vtn_value(b, w[2], vtn_value_type_type)->type;
++ break;
++
++ case SpvOpTypeSampler:
++ /* The actual sampler type here doesn't really matter. It gets
++ * thrown away the moment you combine it with an image. What really
++ * matters is that it's a sampler type as opposed to an integer type
++ * so the backend knows what to do.
++ *
++ * TODO: Eventually we should consider adding a "bare sampler" type
++ * to glsl_types.
++ */
++ val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false,
++ GLSL_TYPE_FLOAT);
++ break;
++
++ case SpvOpTypeOpaque:
++ case SpvOpTypeEvent:
++ case SpvOpTypeDeviceEvent:
++ case SpvOpTypeReserveId:
++ case SpvOpTypeQueue:
++ case SpvOpTypePipe:
++ default:
++ unreachable("Unhandled opcode");
++ }
++
++ vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
++}
++
++static nir_constant *
++vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type)
++{
++ nir_constant *c = rzalloc(b, nir_constant);
++
++ switch (glsl_get_base_type(type)) {
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_BOOL:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_DOUBLE:
++ /* Nothing to do here. It's already initialized to zero */
++ break;
++
++ case GLSL_TYPE_ARRAY:
++ assert(glsl_get_length(type) > 0);
++ c->num_elements = glsl_get_length(type);
++ c->elements = ralloc_array(b, nir_constant *, c->num_elements);
++
++ c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type));
++ for (unsigned i = 1; i < c->num_elements; i++)
++ c->elements[i] = c->elements[0];
++ break;
++
++ case GLSL_TYPE_STRUCT:
++ c->num_elements = glsl_get_length(type);
++ c->elements = ralloc_array(b, nir_constant *, c->num_elements);
++
++ for (unsigned i = 0; i < c->num_elements; i++) {
++ c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i));
++ }
++ break;
++
++ default:
++ unreachable("Invalid type for null constant");
++ }
++
++ return c;
++}
++
++static void
++spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v,
++ int member, const struct vtn_decoration *dec,
++ void *data)
++{
++ assert(member == -1);
++ if (dec->decoration != SpvDecorationSpecId)
++ return;
++
++ uint32_t *const_value = data;
++
++ for (unsigned i = 0; i < b->num_specializations; i++) {
++ if (b->specializations[i].id == dec->literals[0]) {
++ *const_value = b->specializations[i].data;
++ return;
++ }
++ }
++}
++
++static uint32_t
++get_specialization(struct vtn_builder *b, struct vtn_value *val,
++ uint32_t const_value)
++{
++ vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value);
++ return const_value;
++}
++
++static void
++vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
++ val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
++ val->constant = rzalloc(b, nir_constant);
++ switch (opcode) {
++ case SpvOpConstantTrue:
++ assert(val->const_type == glsl_bool_type());
++ val->constant->value.u[0] = NIR_TRUE;
++ break;
++ case SpvOpConstantFalse:
++ assert(val->const_type == glsl_bool_type());
++ val->constant->value.u[0] = NIR_FALSE;
++ break;
++
++ case SpvOpSpecConstantTrue:
++ case SpvOpSpecConstantFalse: {
++ assert(val->const_type == glsl_bool_type());
++ uint32_t int_val =
++ get_specialization(b, val, (opcode == SpvOpSpecConstantTrue));
++ val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE;
++ break;
++ }
++
++ case SpvOpConstant:
++ assert(glsl_type_is_scalar(val->const_type));
++ val->constant->value.u[0] = w[3];
++ break;
++ case SpvOpSpecConstant:
++ assert(glsl_type_is_scalar(val->const_type));
++ val->constant->value.u[0] = get_specialization(b, val, w[3]);
++ break;
++ case SpvOpSpecConstantComposite:
++ case SpvOpConstantComposite: {
++ unsigned elem_count = count - 3;
++ nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
++ for (unsigned i = 0; i < elem_count; i++)
++ elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant;
++
++ switch (glsl_get_base_type(val->const_type)) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_BOOL:
++ if (glsl_type_is_matrix(val->const_type)) {
++ unsigned rows = glsl_get_vector_elements(val->const_type);
++ assert(glsl_get_matrix_columns(val->const_type) == elem_count);
++ for (unsigned i = 0; i < elem_count; i++)
++ for (unsigned j = 0; j < rows; j++)
++ val->constant->value.u[rows * i + j] = elems[i]->value.u[j];
++ } else {
++ assert(glsl_type_is_vector(val->const_type));
++ assert(glsl_get_vector_elements(val->const_type) == elem_count);
++ for (unsigned i = 0; i < elem_count; i++)
++ val->constant->value.u[i] = elems[i]->value.u[0];
++ }
++ ralloc_free(elems);
++ break;
++
++ case GLSL_TYPE_STRUCT:
++ case GLSL_TYPE_ARRAY:
++ ralloc_steal(val->constant, elems);
++ val->constant->num_elements = elem_count;
++ val->constant->elements = elems;
++ break;
++
++ default:
++ unreachable("Unsupported type for constants");
++ }
++ break;
++ }
++
++ case SpvOpSpecConstantOp: {
++ SpvOp opcode = get_specialization(b, val, w[3]);
++ switch (opcode) {
++ case SpvOpVectorShuffle: {
++ struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
++ struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
++ unsigned len0 = glsl_get_vector_elements(v0->const_type);
++ unsigned len1 = glsl_get_vector_elements(v1->const_type);
++
++ uint32_t u[8];
++ for (unsigned i = 0; i < len0; i++)
++ u[i] = v0->constant->value.u[i];
++ for (unsigned i = 0; i < len1; i++)
++ u[len0 + i] = v1->constant->value.u[i];
++
++ for (unsigned i = 0; i < count - 6; i++) {
++ uint32_t comp = w[i + 6];
++ if (comp == (uint32_t)-1) {
++ val->constant->value.u[i] = 0xdeadbeef;
++ } else {
++ val->constant->value.u[i] = u[comp];
++ }
++ }
++ return;
++ }
++
++ case SpvOpCompositeExtract:
++ case SpvOpCompositeInsert: {
++ struct vtn_value *comp;
++ unsigned deref_start;
++ struct nir_constant **c;
++ if (opcode == SpvOpCompositeExtract) {
++ comp = vtn_value(b, w[4], vtn_value_type_constant);
++ deref_start = 5;
++ c = &comp->constant;
++ } else {
++ comp = vtn_value(b, w[5], vtn_value_type_constant);
++ deref_start = 6;
++ val->constant = nir_constant_clone(comp->constant,
++ (nir_variable *)b);
++ c = &val->constant;
++ }
++
++ int elem = -1;
++ const struct glsl_type *type = comp->const_type;
++ for (unsigned i = deref_start; i < count; i++) {
++ switch (glsl_get_base_type(type)) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_BOOL:
++ /* If we hit this granularity, we're picking off an element */
++ if (elem < 0)
++ elem = 0;
++
++ if (glsl_type_is_matrix(type)) {
++ elem += w[i] * glsl_get_vector_elements(type);
++ type = glsl_get_column_type(type);
++ } else {
++ assert(glsl_type_is_vector(type));
++ elem += w[i];
++ type = glsl_scalar_type(glsl_get_base_type(type));
++ }
++ continue;
++
++ case GLSL_TYPE_ARRAY:
++ c = &(*c)->elements[w[i]];
++ type = glsl_get_array_element(type);
++ continue;
++
++ case GLSL_TYPE_STRUCT:
++ c = &(*c)->elements[w[i]];
++ type = glsl_get_struct_field(type, w[i]);
++ continue;
++
++ default:
++ unreachable("Invalid constant type");
++ }
++ }
++
++ if (opcode == SpvOpCompositeExtract) {
++ if (elem == -1) {
++ val->constant = *c;
++ } else {
++ unsigned num_components = glsl_get_vector_elements(type);
++ for (unsigned i = 0; i < num_components; i++)
++ val->constant->value.u[i] = (*c)->value.u[elem + i];
++ }
++ } else {
++ struct vtn_value *insert =
++ vtn_value(b, w[4], vtn_value_type_constant);
++ assert(insert->const_type == type);
++ if (elem == -1) {
++ *c = insert->constant;
++ } else {
++ unsigned num_components = glsl_get_vector_elements(type);
++ for (unsigned i = 0; i < num_components; i++)
++ (*c)->value.u[elem + i] = insert->constant->value.u[i];
++ }
++ }
++ return;
++ }
++
++ default: {
++ bool swap;
++ nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
++
++ unsigned num_components = glsl_get_vector_elements(val->const_type);
++
++ nir_const_value src[3];
++ assert(count <= 7);
++ for (unsigned i = 0; i < count - 4; i++) {
++ nir_constant *c =
++ vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
++
++ unsigned j = swap ? 1 - i : i;
++ for (unsigned k = 0; k < num_components; k++)
++ src[j].u[k] = c->value.u[k];
++ }
++
++ nir_const_value res = nir_eval_const_opcode(op, num_components, src);
++
++ for (unsigned k = 0; k < num_components; k++)
++ val->constant->value.u[k] = res.u[k];
++
++ return;
++ } /* default */
++ }
++ }
++
++ case SpvOpConstantNull:
++ val->constant = vtn_null_constant(b, val->const_type);
++ break;
++
++ case SpvOpConstantSampler:
++ assert(!"OpConstantSampler requires Kernel Capability");
++ break;
++
++ default:
++ unreachable("Unhandled opcode");
++ }
++}
++
++static void
++vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ struct nir_function *callee =
++ vtn_value(b, w[3], vtn_value_type_function)->func->impl->function;
++
++ nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee);
++ for (unsigned i = 0; i < call->num_params; i++) {
++ unsigned arg_id = w[4 + i];
++ struct vtn_value *arg = vtn_untyped_value(b, arg_id);
++ if (arg->value_type == vtn_value_type_access_chain) {
++ nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain);
++ call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref));
++ } else {
++ struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id);
++
++ /* Make a temporary to store the argument in */
++ nir_variable *tmp =
++ nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp");
++ call->params[i] = nir_deref_var_create(call, tmp);
++
++ vtn_local_store(b, arg_ssa, call->params[i]);
++ }
++ }
++
++ nir_variable *out_tmp = NULL;
++ if (!glsl_type_is_void(callee->return_type)) {
++ out_tmp = nir_local_variable_create(b->impl, callee->return_type,
++ "out_tmp");
++ call->return_deref = nir_deref_var_create(call, out_tmp);
++ }
++
++ nir_builder_instr_insert(&b->nb, &call->instr);
++
++ if (glsl_type_is_void(callee->return_type)) {
++ vtn_push_value(b, w[2], vtn_value_type_undef);
++ } else {
++ struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ retval->ssa = vtn_local_load(b, call->return_deref);
++ }
++}
++
++struct vtn_ssa_value *
++vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
++{
++ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
++ val->type = type;
++
++ if (!glsl_type_is_vector_or_scalar(type)) {
++ unsigned elems = glsl_get_length(type);
++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++ for (unsigned i = 0; i < elems; i++) {
++ const struct glsl_type *child_type;
++
++ switch (glsl_get_base_type(type)) {
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_BOOL:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_DOUBLE:
++ child_type = glsl_get_column_type(type);
++ break;
++ case GLSL_TYPE_ARRAY:
++ child_type = glsl_get_array_element(type);
++ break;
++ case GLSL_TYPE_STRUCT:
++ child_type = glsl_get_struct_field(type, i);
++ break;
++ default:
++ unreachable("unkown base type");
++ }
++
++ val->elems[i] = vtn_create_ssa_value(b, child_type);
++ }
++ }
++
++ return val;
++}
++
++static nir_tex_src
++vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
++{
++ nir_tex_src src;
++ src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def);
++ src.src_type = type;
++ return src;
++}
++
++static void
++vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ if (opcode == SpvOpSampledImage) {
++ struct vtn_value *val =
++ vtn_push_value(b, w[2], vtn_value_type_sampled_image);
++ val->sampled_image = ralloc(b, struct vtn_sampled_image);
++ val->sampled_image->image =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++ val->sampled_image->sampler =
++ vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain;
++ return;
++ } else if (opcode == SpvOpImage) {
++ struct vtn_value *val =
++ vtn_push_value(b, w[2], vtn_value_type_access_chain);
++ struct vtn_value *src_val = vtn_untyped_value(b, w[3]);
++ if (src_val->value_type == vtn_value_type_sampled_image) {
++ val->access_chain = src_val->sampled_image->image;
++ } else {
++ assert(src_val->value_type == vtn_value_type_access_chain);
++ val->access_chain = src_val->access_chain;
++ }
++ return;
++ }
++
++ struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type;
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++
++ struct vtn_sampled_image sampled;
++ struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
++ if (sampled_val->value_type == vtn_value_type_sampled_image) {
++ sampled = *sampled_val->sampled_image;
++ } else {
++ assert(sampled_val->value_type == vtn_value_type_access_chain);
++ sampled.image = NULL;
++ sampled.sampler = sampled_val->access_chain;
++ }
++
++ nir_tex_src srcs[8]; /* 8 should be enough */
++ nir_tex_src *p = srcs;
++
++ unsigned idx = 4;
++
++ bool has_coord = false;
++ switch (opcode) {
++ case SpvOpImageSampleImplicitLod:
++ case SpvOpImageSampleExplicitLod:
++ case SpvOpImageSampleDrefImplicitLod:
++ case SpvOpImageSampleDrefExplicitLod:
++ case SpvOpImageSampleProjImplicitLod:
++ case SpvOpImageSampleProjExplicitLod:
++ case SpvOpImageSampleProjDrefImplicitLod:
++ case SpvOpImageSampleProjDrefExplicitLod:
++ case SpvOpImageFetch:
++ case SpvOpImageGather:
++ case SpvOpImageDrefGather:
++ case SpvOpImageQueryLod: {
++ /* All these types have the coordinate as their first real argument */
++ struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]);
++ has_coord = true;
++ p->src = nir_src_for_ssa(coord->def);
++ p->src_type = nir_tex_src_coord;
++ p++;
++ break;
++ }
++
++ default:
++ break;
++ }
++
++ /* These all have an explicit depth value as their next source */
++ switch (opcode) {
++ case SpvOpImageSampleDrefImplicitLod:
++ case SpvOpImageSampleDrefExplicitLod:
++ case SpvOpImageSampleProjDrefImplicitLod:
++ case SpvOpImageSampleProjDrefExplicitLod:
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
++ break;
++ default:
++ break;
++ }
++
++ /* For OpImageQuerySizeLod, we always have an LOD */
++ if (opcode == SpvOpImageQuerySizeLod)
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
++
++ /* Figure out the base texture operation */
++ nir_texop texop;
++ switch (opcode) {
++ case SpvOpImageSampleImplicitLod:
++ case SpvOpImageSampleDrefImplicitLod:
++ case SpvOpImageSampleProjImplicitLod:
++ case SpvOpImageSampleProjDrefImplicitLod:
++ texop = nir_texop_tex;
++ break;
++
++ case SpvOpImageSampleExplicitLod:
++ case SpvOpImageSampleDrefExplicitLod:
++ case SpvOpImageSampleProjExplicitLod:
++ case SpvOpImageSampleProjDrefExplicitLod:
++ texop = nir_texop_txl;
++ break;
++
++ case SpvOpImageFetch:
++ texop = nir_texop_txf;
++ break;
++
++ case SpvOpImageGather:
++ case SpvOpImageDrefGather:
++ texop = nir_texop_tg4;
++ break;
++
++ case SpvOpImageQuerySizeLod:
++ case SpvOpImageQuerySize:
++ texop = nir_texop_txs;
++ break;
++
++ case SpvOpImageQueryLod:
++ texop = nir_texop_lod;
++ break;
++
++ case SpvOpImageQueryLevels:
++ texop = nir_texop_query_levels;
++ break;
++
++ case SpvOpImageQuerySamples:
++ default:
++ unreachable("Unhandled opcode");
++ }
++
++ /* Now we need to handle some number of optional arguments */
++ if (idx < count) {
++ uint32_t operands = w[idx++];
++
++ if (operands & SpvImageOperandsBiasMask) {
++ assert(texop == nir_texop_tex);
++ texop = nir_texop_txb;
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias);
++ }
++
++ if (operands & SpvImageOperandsLodMask) {
++ assert(texop == nir_texop_txl || texop == nir_texop_txf ||
++ texop == nir_texop_txs);
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
++ }
++
++ if (operands & SpvImageOperandsGradMask) {
++ assert(texop == nir_texop_tex);
++ texop = nir_texop_txd;
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx);
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy);
++ }
++
++ if (operands & SpvImageOperandsOffsetMask ||
++ operands & SpvImageOperandsConstOffsetMask)
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset);
++
++ if (operands & SpvImageOperandsConstOffsetsMask)
++ assert(!"Constant offsets to texture gather not yet implemented");
++
++ if (operands & SpvImageOperandsSampleMask) {
++ assert(texop == nir_texop_txf);
++ texop = nir_texop_txf_ms;
++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
++ }
++ }
++ /* We should have now consumed exactly all of the arguments */
++ assert(idx == count);
++
++ nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
++ instr->op = texop;
++
++ memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
++
++ const struct glsl_type *image_type;
++ if (sampled.image) {
++ image_type = sampled.image->var->var->interface_type;
++ } else {
++ image_type = sampled.sampler->var->var->interface_type;
++ }
++
++ instr->sampler_dim = glsl_get_sampler_dim(image_type);
++ instr->is_array = glsl_sampler_type_is_array(image_type);
++ instr->is_shadow = glsl_sampler_type_is_shadow(image_type);
++ instr->is_new_style_shadow = instr->is_shadow;
++
++ if (has_coord) {
++ switch (instr->sampler_dim) {
++ case GLSL_SAMPLER_DIM_1D:
++ case GLSL_SAMPLER_DIM_BUF:
++ instr->coord_components = 1;
++ break;
++ case GLSL_SAMPLER_DIM_2D:
++ case GLSL_SAMPLER_DIM_RECT:
++ instr->coord_components = 2;
++ break;
++ case GLSL_SAMPLER_DIM_3D:
++ case GLSL_SAMPLER_DIM_CUBE:
++ case GLSL_SAMPLER_DIM_MS:
++ instr->coord_components = 3;
++ break;
++ default:
++ assert("Invalid sampler type");
++ }
++
++ if (instr->is_array)
++ instr->coord_components++;
++ } else {
++ instr->coord_components = 0;
++ }
++
++ switch (glsl_get_sampler_result_type(image_type)) {
++ case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break;
++ case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break;
++ case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break;
++ case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break;
++ default:
++ unreachable("Invalid base type for sampler result");
++ }
++
++ nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler);
++ instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
++ if (sampled.image) {
++ nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image);
++ instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref));
++ } else {
++ instr->texture = NULL;
++ }
++
++ nir_ssa_dest_init(&instr->instr, &instr->dest,
++ nir_tex_instr_dest_size(instr), NULL);
++
++ assert(glsl_get_vector_elements(ret_type->type) ==
++ nir_tex_instr_dest_size(instr));
++
++ val->ssa = vtn_create_ssa_value(b, ret_type->type);
++ val->ssa->def = &instr->dest.ssa;
++
++ nir_builder_instr_insert(&b->nb, &instr->instr);
++}
++
++static nir_ssa_def *
++get_image_coord(struct vtn_builder *b, uint32_t value)
++{
++ struct vtn_ssa_value *coord = vtn_ssa_value(b, value);
++
++ /* The image_load_store intrinsics assume a 4-dim coordinate */
++ unsigned dim = glsl_get_vector_elements(coord->type);
++ unsigned swizzle[4];
++ for (unsigned i = 0; i < 4; i++)
++ swizzle[i] = MIN2(i, dim - 1);
++
++ return nir_swizzle(&b->nb, coord->def, swizzle, 4, false);
++}
++
++static void
++vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ /* Just get this one out of the way */
++ if (opcode == SpvOpImageTexelPointer) {
++ struct vtn_value *val =
++ vtn_push_value(b, w[2], vtn_value_type_image_pointer);
++ val->image = ralloc(b, struct vtn_image_pointer);
++
++ val->image->image =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++ val->image->coord = get_image_coord(b, w[4]);
++ val->image->sample = vtn_ssa_value(b, w[5])->def;
++ return;
++ }
++
++ struct vtn_image_pointer image;
++
++ switch (opcode) {
++ case SpvOpAtomicExchange:
++ case SpvOpAtomicCompareExchange:
++ case SpvOpAtomicCompareExchangeWeak:
++ case SpvOpAtomicIIncrement:
++ case SpvOpAtomicIDecrement:
++ case SpvOpAtomicIAdd:
++ case SpvOpAtomicISub:
++ case SpvOpAtomicSMin:
++ case SpvOpAtomicUMin:
++ case SpvOpAtomicSMax:
++ case SpvOpAtomicUMax:
++ case SpvOpAtomicAnd:
++ case SpvOpAtomicOr:
++ case SpvOpAtomicXor:
++ image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image;
++ break;
++
++ case SpvOpImageQuerySize:
++ image.image =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++ image.coord = NULL;
++ image.sample = NULL;
++ break;
++
++ case SpvOpImageRead:
++ image.image =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++ image.coord = get_image_coord(b, w[4]);
++
++ if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) {
++ assert(w[5] == SpvImageOperandsSampleMask);
++ image.sample = vtn_ssa_value(b, w[6])->def;
++ } else {
++ image.sample = nir_ssa_undef(&b->nb, 1);
++ }
++ break;
++
++ case SpvOpImageWrite:
++ image.image =
++ vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
++ image.coord = get_image_coord(b, w[2]);
++
++ /* texel = w[3] */
++
++ if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) {
++ assert(w[4] == SpvImageOperandsSampleMask);
++ image.sample = vtn_ssa_value(b, w[5])->def;
++ } else {
++ image.sample = nir_ssa_undef(&b->nb, 1);
++ }
++ break;
++
++ default:
++ unreachable("Invalid image opcode");
++ }
++
++ nir_intrinsic_op op;
++ switch (opcode) {
++#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break;
++ OP(ImageQuerySize, size)
++ OP(ImageRead, load)
++ OP(ImageWrite, store)
++ OP(AtomicExchange, atomic_exchange)
++ OP(AtomicCompareExchange, atomic_comp_swap)
++ OP(AtomicIIncrement, atomic_add)
++ OP(AtomicIDecrement, atomic_add)
++ OP(AtomicIAdd, atomic_add)
++ OP(AtomicISub, atomic_add)
++ OP(AtomicSMin, atomic_min)
++ OP(AtomicUMin, atomic_min)
++ OP(AtomicSMax, atomic_max)
++ OP(AtomicUMax, atomic_max)
++ OP(AtomicAnd, atomic_and)
++ OP(AtomicOr, atomic_or)
++ OP(AtomicXor, atomic_xor)
++#undef OP
++ default:
++ unreachable("Invalid image opcode");
++ }
++
++ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
++
++ nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image);
++ intrin->variables[0] =
++ nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref));
++
++ /* ImageQuerySize doesn't take any extra parameters */
++ if (opcode != SpvOpImageQuerySize) {
++ /* The image coordinate is always 4 components but we may not have that
++ * many. Swizzle to compensate.
++ */
++ unsigned swiz[4];
++ for (unsigned i = 0; i < 4; i++)
++ swiz[i] = i < image.coord->num_components ? i : 0;
++ intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord,
++ swiz, 4, false));
++ intrin->src[1] = nir_src_for_ssa(image.sample);
++ }
++
++ switch (opcode) {
++ case SpvOpImageQuerySize:
++ case SpvOpImageRead:
++ break;
++ case SpvOpImageWrite:
++ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
++ break;
++ case SpvOpAtomicIIncrement:
++ intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
++ break;
++ case SpvOpAtomicIDecrement:
++ intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
++ break;
++
++ case SpvOpAtomicExchange:
++ case SpvOpAtomicIAdd:
++ case SpvOpAtomicSMin:
++ case SpvOpAtomicUMin:
++ case SpvOpAtomicSMax:
++ case SpvOpAtomicUMax:
++ case SpvOpAtomicAnd:
++ case SpvOpAtomicOr:
++ case SpvOpAtomicXor:
++ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
++ break;
++
++ case SpvOpAtomicCompareExchange:
++ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
++ intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
++ break;
++
++ case SpvOpAtomicISub:
++ intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
++ break;
++
++ default:
++ unreachable("Invalid image opcode");
++ }
++
++ if (opcode != SpvOpImageWrite) {
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
++ nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
++
++ nir_builder_instr_insert(&b->nb, &intrin->instr);
++
++ /* The image intrinsics always return 4 channels but we may not want
++ * that many. Emit a mov to trim it down.
++ */
++ unsigned swiz[4] = {0, 1, 2, 3};
++ val->ssa = vtn_create_ssa_value(b, type->type);
++ val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz,
++ glsl_get_vector_elements(type->type), false);
++ } else {
++ nir_builder_instr_insert(&b->nb, &intrin->instr);
++ }
++}
++
++static nir_intrinsic_op
++get_ssbo_nir_atomic_op(SpvOp opcode)
++{
++ switch (opcode) {
++#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N;
++ OP(AtomicExchange, atomic_exchange)
++ OP(AtomicCompareExchange, atomic_comp_swap)
++ OP(AtomicIIncrement, atomic_add)
++ OP(AtomicIDecrement, atomic_add)
++ OP(AtomicIAdd, atomic_add)
++ OP(AtomicISub, atomic_add)
++ OP(AtomicSMin, atomic_imin)
++ OP(AtomicUMin, atomic_umin)
++ OP(AtomicSMax, atomic_imax)
++ OP(AtomicUMax, atomic_umax)
++ OP(AtomicAnd, atomic_and)
++ OP(AtomicOr, atomic_or)
++ OP(AtomicXor, atomic_xor)
++#undef OP
++ default:
++ unreachable("Invalid SSBO atomic");
++ }
++}
++
++static nir_intrinsic_op
++get_shared_nir_atomic_op(SpvOp opcode)
++{
++ switch (opcode) {
++#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N;
++ OP(AtomicExchange, atomic_exchange)
++ OP(AtomicCompareExchange, atomic_comp_swap)
++ OP(AtomicIIncrement, atomic_add)
++ OP(AtomicIDecrement, atomic_add)
++ OP(AtomicIAdd, atomic_add)
++ OP(AtomicISub, atomic_add)
++ OP(AtomicSMin, atomic_imin)
++ OP(AtomicUMin, atomic_umin)
++ OP(AtomicSMax, atomic_imax)
++ OP(AtomicUMax, atomic_umax)
++ OP(AtomicAnd, atomic_and)
++ OP(AtomicOr, atomic_or)
++ OP(AtomicXor, atomic_xor)
++#undef OP
++ default:
++ unreachable("Invalid shared atomic");
++ }
++}
++
++static void
++fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, nir_src *src)
++{
++ switch (opcode) {
++ case SpvOpAtomicIIncrement:
++ src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
++ break;
++
++ case SpvOpAtomicIDecrement:
++ src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
++ break;
++
++ case SpvOpAtomicISub:
++ src[0] =
++ nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
++ break;
++
++ case SpvOpAtomicCompareExchange:
++ src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
++ src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
++ break;
++ /* Fall through */
++
++ case SpvOpAtomicExchange:
++ case SpvOpAtomicIAdd:
++ case SpvOpAtomicSMin:
++ case SpvOpAtomicUMin:
++ case SpvOpAtomicSMax:
++ case SpvOpAtomicUMax:
++ case SpvOpAtomicAnd:
++ case SpvOpAtomicOr:
++ case SpvOpAtomicXor:
++ src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
++ break;
++
++ default:
++ unreachable("Invalid SPIR-V atomic");
++ }
++}
++
++static void
++vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ struct vtn_access_chain *chain =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++ nir_intrinsic_instr *atomic;
++
++ /*
++ SpvScope scope = w[4];
++ SpvMemorySemanticsMask semantics = w[5];
++ */
++
++ if (chain->var->mode == vtn_variable_mode_workgroup) {
++ nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref;
++ nir_intrinsic_op op = get_shared_nir_atomic_op(opcode);
++ atomic = nir_intrinsic_instr_create(b->nb.shader, op);
++ atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref));
++ fill_common_atomic_sources(b, opcode, w, &atomic->src[0]);
++ } else {
++ assert(chain->var->mode == vtn_variable_mode_ssbo);
++ struct vtn_type *type;
++ nir_ssa_def *offset, *index;
++ offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false);
++
++ nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode);
++
++ atomic = nir_intrinsic_instr_create(b->nb.shader, op);
++ atomic->src[0] = nir_src_for_ssa(index);
++ atomic->src[1] = nir_src_for_ssa(offset);
++ fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
++ }
++
++ nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
++
++ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ val->ssa = rzalloc(b, struct vtn_ssa_value);
++ val->ssa->def = &atomic->dest.ssa;
++ val->ssa->type = type->type;
++
++ nir_builder_instr_insert(&b->nb, &atomic->instr);
++}
++
++static nir_alu_instr *
++create_vec(nir_shader *shader, unsigned num_components)
++{
++ nir_op op;
++ switch (num_components) {
++ case 1: op = nir_op_fmov; break;
++ case 2: op = nir_op_vec2; break;
++ case 3: op = nir_op_vec3; break;
++ case 4: op = nir_op_vec4; break;
++ default: unreachable("bad vector size");
++ }
++
++ nir_alu_instr *vec = nir_alu_instr_create(shader, op);
++ nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
++ vec->dest.write_mask = (1 << num_components) - 1;
++
++ return vec;
++}
++
++struct vtn_ssa_value *
++vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
++{
++ if (src->transposed)
++ return src->transposed;
++
++ struct vtn_ssa_value *dest =
++ vtn_create_ssa_value(b, glsl_transposed_type(src->type));
++
++ for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
++ nir_alu_instr *vec = create_vec(b->shader,
++ glsl_get_matrix_columns(src->type));
++ if (glsl_type_is_vector_or_scalar(src->type)) {
++ vec->src[0].src = nir_src_for_ssa(src->def);
++ vec->src[0].swizzle[0] = i;
++ } else {
++ for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
++ vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
++ vec->src[j].swizzle[0] = i;
++ }
++ }
++ nir_builder_instr_insert(&b->nb, &vec->instr);
++ dest->elems[i]->def = &vec->dest.dest.ssa;
++ }
++
++ dest->transposed = src;
++
++ return dest;
++}
++
++nir_ssa_def *
++vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index)
++{
++ unsigned swiz[4] = { index };
++ return nir_swizzle(&b->nb, src, swiz, 1, true);
++}
++
++nir_ssa_def *
++vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
++ unsigned index)
++{
++ nir_alu_instr *vec = create_vec(b->shader, src->num_components);
++
++ for (unsigned i = 0; i < src->num_components; i++) {
++ if (i == index) {
++ vec->src[i].src = nir_src_for_ssa(insert);
++ } else {
++ vec->src[i].src = nir_src_for_ssa(src);
++ vec->src[i].swizzle[0] = i;
++ }
++ }
++
++ nir_builder_instr_insert(&b->nb, &vec->instr);
++
++ return &vec->dest.dest.ssa;
++}
++
++nir_ssa_def *
++vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++ nir_ssa_def *index)
++{
++ nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
++ for (unsigned i = 1; i < src->num_components; i++)
++ dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
++ vtn_vector_extract(b, src, i), dest);
++
++ return dest;
++}
++
++nir_ssa_def *
++vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++ nir_ssa_def *insert, nir_ssa_def *index)
++{
++ nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0);
++ for (unsigned i = 1; i < src->num_components; i++)
++ dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
++ vtn_vector_insert(b, src, insert, i), dest);
++
++ return dest;
++}
++
++static nir_ssa_def *
++vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
++ nir_ssa_def *src0, nir_ssa_def *src1,
++ const uint32_t *indices)
++{
++ nir_alu_instr *vec = create_vec(b->shader, num_components);
++
++ nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
++ nir_builder_instr_insert(&b->nb, &undef->instr);
++
++ for (unsigned i = 0; i < num_components; i++) {
++ uint32_t index = indices[i];
++ if (index == 0xffffffff) {
++ vec->src[i].src = nir_src_for_ssa(&undef->def);
++ } else if (index < src0->num_components) {
++ vec->src[i].src = nir_src_for_ssa(src0);
++ vec->src[i].swizzle[0] = index;
++ } else {
++ vec->src[i].src = nir_src_for_ssa(src1);
++ vec->src[i].swizzle[0] = index - src0->num_components;
++ }
++ }
++
++ nir_builder_instr_insert(&b->nb, &vec->instr);
++
++ return &vec->dest.dest.ssa;
++}
++
++/*
++ * Concatentates a number of vectors/scalars together to produce a vector
++ */
++static nir_ssa_def *
++vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
++ unsigned num_srcs, nir_ssa_def **srcs)
++{
++ nir_alu_instr *vec = create_vec(b->shader, num_components);
++
++ unsigned dest_idx = 0;
++ for (unsigned i = 0; i < num_srcs; i++) {
++ nir_ssa_def *src = srcs[i];
++ for (unsigned j = 0; j < src->num_components; j++) {
++ vec->src[dest_idx].src = nir_src_for_ssa(src);
++ vec->src[dest_idx].swizzle[0] = j;
++ dest_idx++;
++ }
++ }
++
++ nir_builder_instr_insert(&b->nb, &vec->instr);
++
++ return &vec->dest.dest.ssa;
++}
++
++static struct vtn_ssa_value *
++vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
++{
++ struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
++ dest->type = src->type;
++
++ if (glsl_type_is_vector_or_scalar(src->type)) {
++ dest->def = src->def;
++ } else {
++ unsigned elems = glsl_get_length(src->type);
++
++ dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
++ for (unsigned i = 0; i < elems; i++)
++ dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
++ }
++
++ return dest;
++}
++
++static struct vtn_ssa_value *
++vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
++ struct vtn_ssa_value *insert, const uint32_t *indices,
++ unsigned num_indices)
++{
++ struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
++
++ struct vtn_ssa_value *cur = dest;
++ unsigned i;
++ for (i = 0; i < num_indices - 1; i++) {
++ cur = cur->elems[indices[i]];
++ }
++
++ if (glsl_type_is_vector_or_scalar(cur->type)) {
++ /* According to the SPIR-V spec, OpCompositeInsert may work down to
++ * the component granularity. In that case, the last index will be
++ * the index to insert the scalar into the vector.
++ */
++
++ cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]);
++ } else {
++ cur->elems[indices[i]] = insert;
++ }
++
++ return dest;
++}
++
++static struct vtn_ssa_value *
++vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
++ const uint32_t *indices, unsigned num_indices)
++{
++ struct vtn_ssa_value *cur = src;
++ for (unsigned i = 0; i < num_indices; i++) {
++ if (glsl_type_is_vector_or_scalar(cur->type)) {
++ assert(i == num_indices - 1);
++ /* According to the SPIR-V spec, OpCompositeExtract may work down to
++ * the component granularity. The last index will be the index of the
++ * vector to extract.
++ */
++
++ struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value);
++ ret->type = glsl_scalar_type(glsl_get_base_type(cur->type));
++ ret->def = vtn_vector_extract(b, cur->def, indices[i]);
++ return ret;
++ } else {
++ cur = cur->elems[indices[i]];
++ }
++ }
++
++ return cur;
++}
++
++static void
++vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ const struct glsl_type *type =
++ vtn_value(b, w[1], vtn_value_type_type)->type->type;
++ val->ssa = vtn_create_ssa_value(b, type);
++
++ switch (opcode) {
++ case SpvOpVectorExtractDynamic:
++ val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
++ vtn_ssa_value(b, w[4])->def);
++ break;
++
++ case SpvOpVectorInsertDynamic:
++ val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
++ vtn_ssa_value(b, w[4])->def,
++ vtn_ssa_value(b, w[5])->def);
++ break;
++
++ case SpvOpVectorShuffle:
++ val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type),
++ vtn_ssa_value(b, w[3])->def,
++ vtn_ssa_value(b, w[4])->def,
++ w + 5);
++ break;
++
++ case SpvOpCompositeConstruct: {
++ unsigned elems = count - 3;
++ if (glsl_type_is_vector_or_scalar(type)) {
++ nir_ssa_def *srcs[4];
++ for (unsigned i = 0; i < elems; i++)
++ srcs[i] = vtn_ssa_value(b, w[3 + i])->def;
++ val->ssa->def =
++ vtn_vector_construct(b, glsl_get_vector_elements(type),
++ elems, srcs);
++ } else {
++ val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++ for (unsigned i = 0; i < elems; i++)
++ val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
++ }
++ break;
++ }
++ case SpvOpCompositeExtract:
++ val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
++ w + 4, count - 4);
++ break;
++
++ case SpvOpCompositeInsert:
++ val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
++ vtn_ssa_value(b, w[3]),
++ w + 5, count - 5);
++ break;
++
++ case SpvOpCopyObject:
++ val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
++ break;
++
++ default:
++ unreachable("unknown composite operation");
++ }
++}
++
++static void
++vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ nir_intrinsic_op intrinsic_op;
++ switch (opcode) {
++ case SpvOpEmitVertex:
++ case SpvOpEmitStreamVertex:
++ intrinsic_op = nir_intrinsic_emit_vertex;
++ break;
++ case SpvOpEndPrimitive:
++ case SpvOpEndStreamPrimitive:
++ intrinsic_op = nir_intrinsic_end_primitive;
++ break;
++ case SpvOpMemoryBarrier:
++ intrinsic_op = nir_intrinsic_memory_barrier;
++ break;
++ case SpvOpControlBarrier:
++ intrinsic_op = nir_intrinsic_barrier;
++ break;
++ default:
++ unreachable("unknown barrier instruction");
++ }
++
++ nir_intrinsic_instr *intrin =
++ nir_intrinsic_instr_create(b->shader, intrinsic_op);
++
++ if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
++ intrin->const_index[0] = w[1];
++
++ nir_builder_instr_insert(&b->nb, &intrin->instr);
++}
++
++static unsigned
++gl_primitive_from_spv_execution_mode(SpvExecutionMode mode)
++{
++ switch (mode) {
++ case SpvExecutionModeInputPoints:
++ case SpvExecutionModeOutputPoints:
++ return 0; /* GL_POINTS */
++ case SpvExecutionModeInputLines:
++ return 1; /* GL_LINES */
++ case SpvExecutionModeInputLinesAdjacency:
++ return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */
++ case SpvExecutionModeTriangles:
++ return 4; /* GL_TRIANGLES */
++ case SpvExecutionModeInputTrianglesAdjacency:
++ return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */
++ case SpvExecutionModeQuads:
++ return 7; /* GL_QUADS */
++ case SpvExecutionModeIsolines:
++ return 0x8E7A; /* GL_ISOLINES */
++ case SpvExecutionModeOutputLineStrip:
++ return 3; /* GL_LINE_STRIP */
++ case SpvExecutionModeOutputTriangleStrip:
++ return 5; /* GL_TRIANGLE_STRIP */
++ default:
++ assert(!"Invalid primitive type");
++ return 4;
++ }
++}
++
++static unsigned
++vertices_in_from_spv_execution_mode(SpvExecutionMode mode)
++{
++ switch (mode) {
++ case SpvExecutionModeInputPoints:
++ return 1;
++ case SpvExecutionModeInputLines:
++ return 2;
++ case SpvExecutionModeInputLinesAdjacency:
++ return 4;
++ case SpvExecutionModeTriangles:
++ return 3;
++ case SpvExecutionModeInputTrianglesAdjacency:
++ return 6;
++ default:
++ assert(!"Invalid GS input mode");
++ return 0;
++ }
++}
++
++static gl_shader_stage
++stage_for_execution_model(SpvExecutionModel model)
++{
++ switch (model) {
++ case SpvExecutionModelVertex:
++ return MESA_SHADER_VERTEX;
++ case SpvExecutionModelTessellationControl:
++ return MESA_SHADER_TESS_CTRL;
++ case SpvExecutionModelTessellationEvaluation:
++ return MESA_SHADER_TESS_EVAL;
++ case SpvExecutionModelGeometry:
++ return MESA_SHADER_GEOMETRY;
++ case SpvExecutionModelFragment:
++ return MESA_SHADER_FRAGMENT;
++ case SpvExecutionModelGLCompute:
++ return MESA_SHADER_COMPUTE;
++ default:
++ unreachable("Unsupported execution model");
++ }
++}
++
++static bool
++vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ switch (opcode) {
++ case SpvOpSource:
++ case SpvOpSourceExtension:
++ case SpvOpSourceContinued:
++ case SpvOpExtension:
++ /* Unhandled, but these are for debug so that's ok. */
++ break;
++
++ case SpvOpCapability:
++ switch ((SpvCapability)w[1]) {
++ case SpvCapabilityMatrix:
++ case SpvCapabilityShader:
++ case SpvCapabilityGeometry:
++ break;
++ default:
++ assert(!"Unsupported capability");
++ }
++ break;
++
++ case SpvOpExtInstImport:
++ vtn_handle_extension(b, opcode, w, count);
++ break;
++
++ case SpvOpMemoryModel:
++ assert(w[1] == SpvAddressingModelLogical);
++ assert(w[2] == SpvMemoryModelGLSL450);
++ break;
++
++ case SpvOpEntryPoint: {
++ struct vtn_value *entry_point = &b->values[w[2]];
++ /* Let this be a name label regardless */
++ unsigned name_words;
++ entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
++
++ if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
++ stage_for_execution_model(w[1]) != b->entry_point_stage)
++ break;
++
++ assert(b->entry_point == NULL);
++ b->entry_point = entry_point;
++ break;
++ }
++
++ case SpvOpString:
++ vtn_push_value(b, w[1], vtn_value_type_string)->str =
++ vtn_string_literal(b, &w[2], count - 2, NULL);
++ break;
++
++ case SpvOpName:
++ b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
++ break;
++
++ case SpvOpMemberName:
++ /* TODO */
++ break;
++
++ case SpvOpExecutionMode:
++ case SpvOpDecorationGroup:
++ case SpvOpDecorate:
++ case SpvOpMemberDecorate:
++ case SpvOpGroupDecorate:
++ case SpvOpGroupMemberDecorate:
++ vtn_handle_decoration(b, opcode, w, count);
++ break;
++
++ default:
++ return false; /* End of preamble */
++ }
++
++ return true;
++}
++
++static void
++vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
++ const struct vtn_decoration *mode, void *data)
++{
++ assert(b->entry_point == entry_point);
++
++ switch(mode->exec_mode) {
++ case SpvExecutionModeOriginUpperLeft:
++ case SpvExecutionModeOriginLowerLeft:
++ b->origin_upper_left =
++ (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
++ break;
++
++ case SpvExecutionModeEarlyFragmentTests:
++ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++ b->shader->info.fs.early_fragment_tests = true;
++ break;
++
++ case SpvExecutionModeInvocations:
++ assert(b->shader->stage == MESA_SHADER_GEOMETRY);
++ b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);
++ break;
++
++ case SpvExecutionModeDepthReplacing:
++ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
++ break;
++ case SpvExecutionModeDepthGreater:
++ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
++ break;
++ case SpvExecutionModeDepthLess:
++ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
++ break;
++ case SpvExecutionModeDepthUnchanged:
++ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
++ break;
++
++ case SpvExecutionModeLocalSize:
++ assert(b->shader->stage == MESA_SHADER_COMPUTE);
++ b->shader->info.cs.local_size[0] = mode->literals[0];
++ b->shader->info.cs.local_size[1] = mode->literals[1];
++ b->shader->info.cs.local_size[2] = mode->literals[2];
++ break;
++ case SpvExecutionModeLocalSizeHint:
++ break; /* Nothing do do with this */
++
++ case SpvExecutionModeOutputVertices:
++ assert(b->shader->stage == MESA_SHADER_GEOMETRY);
++ b->shader->info.gs.vertices_out = mode->literals[0];
++ break;
++
++ case SpvExecutionModeInputPoints:
++ case SpvExecutionModeInputLines:
++ case SpvExecutionModeInputLinesAdjacency:
++ case SpvExecutionModeTriangles:
++ case SpvExecutionModeInputTrianglesAdjacency:
++ case SpvExecutionModeQuads:
++ case SpvExecutionModeIsolines:
++ if (b->shader->stage == MESA_SHADER_GEOMETRY) {
++ b->shader->info.gs.vertices_in =
++ vertices_in_from_spv_execution_mode(mode->exec_mode);
++ } else {
++ assert(!"Tesselation shaders not yet supported");
++ }
++ break;
++
++ case SpvExecutionModeOutputPoints:
++ case SpvExecutionModeOutputLineStrip:
++ case SpvExecutionModeOutputTriangleStrip:
++ assert(b->shader->stage == MESA_SHADER_GEOMETRY);
++ b->shader->info.gs.output_primitive =
++ gl_primitive_from_spv_execution_mode(mode->exec_mode);
++ break;
++
++ case SpvExecutionModeSpacingEqual:
++ case SpvExecutionModeSpacingFractionalEven:
++ case SpvExecutionModeSpacingFractionalOdd:
++ case SpvExecutionModeVertexOrderCw:
++ case SpvExecutionModeVertexOrderCcw:
++ case SpvExecutionModePointMode:
++ assert(!"TODO: Add tessellation metadata");
++ break;
++
++ case SpvExecutionModePixelCenterInteger:
++ case SpvExecutionModeXfb:
++ assert(!"Unhandled execution mode");
++ break;
++
++ case SpvExecutionModeVecTypeHint:
++ case SpvExecutionModeContractionOff:
++ break; /* OpenCL */
++ }
++}
++
++static bool
++vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ switch (opcode) {
++ case SpvOpSource:
++ case SpvOpSourceContinued:
++ case SpvOpSourceExtension:
++ case SpvOpExtension:
++ case SpvOpCapability:
++ case SpvOpExtInstImport:
++ case SpvOpMemoryModel:
++ case SpvOpEntryPoint:
++ case SpvOpExecutionMode:
++ case SpvOpString:
++ case SpvOpName:
++ case SpvOpMemberName:
++ case SpvOpDecorationGroup:
++ case SpvOpDecorate:
++ case SpvOpMemberDecorate:
++ case SpvOpGroupDecorate:
++ case SpvOpGroupMemberDecorate:
++ assert(!"Invalid opcode types and variables section");
++ break;
++
++ case SpvOpTypeVoid:
++ case SpvOpTypeBool:
++ case SpvOpTypeInt:
++ case SpvOpTypeFloat:
++ case SpvOpTypeVector:
++ case SpvOpTypeMatrix:
++ case SpvOpTypeImage:
++ case SpvOpTypeSampler:
++ case SpvOpTypeSampledImage:
++ case SpvOpTypeArray:
++ case SpvOpTypeRuntimeArray:
++ case SpvOpTypeStruct:
++ case SpvOpTypeOpaque:
++ case SpvOpTypePointer:
++ case SpvOpTypeFunction:
++ case SpvOpTypeEvent:
++ case SpvOpTypeDeviceEvent:
++ case SpvOpTypeReserveId:
++ case SpvOpTypeQueue:
++ case SpvOpTypePipe:
++ vtn_handle_type(b, opcode, w, count);
++ break;
++
++ case SpvOpConstantTrue:
++ case SpvOpConstantFalse:
++ case SpvOpConstant:
++ case SpvOpConstantComposite:
++ case SpvOpConstantSampler:
++ case SpvOpConstantNull:
++ case SpvOpSpecConstantTrue:
++ case SpvOpSpecConstantFalse:
++ case SpvOpSpecConstant:
++ case SpvOpSpecConstantComposite:
++ case SpvOpSpecConstantOp:
++ vtn_handle_constant(b, opcode, w, count);
++ break;
++
++ case SpvOpVariable:
++ vtn_handle_variables(b, opcode, w, count);
++ break;
++
++ default:
++ return false; /* End of preamble */
++ }
++
++ return true;
++}
++
++static bool
++vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ switch (opcode) {
++ case SpvOpLabel:
++ break;
++
++ case SpvOpLoopMerge:
++ case SpvOpSelectionMerge:
++ /* This is handled by cfg pre-pass and walk_blocks */
++ break;
++
++ case SpvOpUndef: {
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
++ val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
++ break;
++ }
++
++ case SpvOpExtInst:
++ vtn_handle_extension(b, opcode, w, count);
++ break;
++
++ case SpvOpVariable:
++ case SpvOpLoad:
++ case SpvOpStore:
++ case SpvOpCopyMemory:
++ case SpvOpCopyMemorySized:
++ case SpvOpAccessChain:
++ case SpvOpInBoundsAccessChain:
++ case SpvOpArrayLength:
++ vtn_handle_variables(b, opcode, w, count);
++ break;
++
++ case SpvOpFunctionCall:
++ vtn_handle_function_call(b, opcode, w, count);
++ break;
++
++ case SpvOpSampledImage:
++ case SpvOpImage:
++ case SpvOpImageSampleImplicitLod:
++ case SpvOpImageSampleExplicitLod:
++ case SpvOpImageSampleDrefImplicitLod:
++ case SpvOpImageSampleDrefExplicitLod:
++ case SpvOpImageSampleProjImplicitLod:
++ case SpvOpImageSampleProjExplicitLod:
++ case SpvOpImageSampleProjDrefImplicitLod:
++ case SpvOpImageSampleProjDrefExplicitLod:
++ case SpvOpImageFetch:
++ case SpvOpImageGather:
++ case SpvOpImageDrefGather:
++ case SpvOpImageQuerySizeLod:
++ case SpvOpImageQueryLod:
++ case SpvOpImageQueryLevels:
++ case SpvOpImageQuerySamples:
++ vtn_handle_texture(b, opcode, w, count);
++ break;
++
++ case SpvOpImageRead:
++ case SpvOpImageWrite:
++ case SpvOpImageTexelPointer:
++ vtn_handle_image(b, opcode, w, count);
++ break;
++
++ case SpvOpImageQuerySize: {
++ struct vtn_access_chain *image =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++ if (glsl_type_is_image(image->var->var->interface_type)) {
++ vtn_handle_image(b, opcode, w, count);
++ } else {
++ vtn_handle_texture(b, opcode, w, count);
++ }
++ break;
++ }
++
++ case SpvOpAtomicExchange:
++ case SpvOpAtomicCompareExchange:
++ case SpvOpAtomicCompareExchangeWeak:
++ case SpvOpAtomicIIncrement:
++ case SpvOpAtomicIDecrement:
++ case SpvOpAtomicIAdd:
++ case SpvOpAtomicISub:
++ case SpvOpAtomicSMin:
++ case SpvOpAtomicUMin:
++ case SpvOpAtomicSMax:
++ case SpvOpAtomicUMax:
++ case SpvOpAtomicAnd:
++ case SpvOpAtomicOr:
++ case SpvOpAtomicXor: {
++ struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
++ if (pointer->value_type == vtn_value_type_image_pointer) {
++ vtn_handle_image(b, opcode, w, count);
++ } else {
++ assert(pointer->value_type == vtn_value_type_access_chain);
++ vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count);
++ }
++ break;
++ }
++
++ case SpvOpSNegate:
++ case SpvOpFNegate:
++ case SpvOpNot:
++ case SpvOpAny:
++ case SpvOpAll:
++ case SpvOpConvertFToU:
++ case SpvOpConvertFToS:
++ case SpvOpConvertSToF:
++ case SpvOpConvertUToF:
++ case SpvOpUConvert:
++ case SpvOpSConvert:
++ case SpvOpFConvert:
++ case SpvOpQuantizeToF16:
++ case SpvOpConvertPtrToU:
++ case SpvOpConvertUToPtr:
++ case SpvOpPtrCastToGeneric:
++ case SpvOpGenericCastToPtr:
++ case SpvOpBitcast:
++ case SpvOpIsNan:
++ case SpvOpIsInf:
++ case SpvOpIsFinite:
++ case SpvOpIsNormal:
++ case SpvOpSignBitSet:
++ case SpvOpLessOrGreater:
++ case SpvOpOrdered:
++ case SpvOpUnordered:
++ case SpvOpIAdd:
++ case SpvOpFAdd:
++ case SpvOpISub:
++ case SpvOpFSub:
++ case SpvOpIMul:
++ case SpvOpFMul:
++ case SpvOpUDiv:
++ case SpvOpSDiv:
++ case SpvOpFDiv:
++ case SpvOpUMod:
++ case SpvOpSRem:
++ case SpvOpSMod:
++ case SpvOpFRem:
++ case SpvOpFMod:
++ case SpvOpVectorTimesScalar:
++ case SpvOpDot:
++ case SpvOpIAddCarry:
++ case SpvOpISubBorrow:
++ case SpvOpUMulExtended:
++ case SpvOpSMulExtended:
++ case SpvOpShiftRightLogical:
++ case SpvOpShiftRightArithmetic:
++ case SpvOpShiftLeftLogical:
++ case SpvOpLogicalEqual:
++ case SpvOpLogicalNotEqual:
++ case SpvOpLogicalOr:
++ case SpvOpLogicalAnd:
++ case SpvOpLogicalNot:
++ case SpvOpBitwiseOr:
++ case SpvOpBitwiseXor:
++ case SpvOpBitwiseAnd:
++ case SpvOpSelect:
++ case SpvOpIEqual:
++ case SpvOpFOrdEqual:
++ case SpvOpFUnordEqual:
++ case SpvOpINotEqual:
++ case SpvOpFOrdNotEqual:
++ case SpvOpFUnordNotEqual:
++ case SpvOpULessThan:
++ case SpvOpSLessThan:
++ case SpvOpFOrdLessThan:
++ case SpvOpFUnordLessThan:
++ case SpvOpUGreaterThan:
++ case SpvOpSGreaterThan:
++ case SpvOpFOrdGreaterThan:
++ case SpvOpFUnordGreaterThan:
++ case SpvOpULessThanEqual:
++ case SpvOpSLessThanEqual:
++ case SpvOpFOrdLessThanEqual:
++ case SpvOpFUnordLessThanEqual:
++ case SpvOpUGreaterThanEqual:
++ case SpvOpSGreaterThanEqual:
++ case SpvOpFOrdGreaterThanEqual:
++ case SpvOpFUnordGreaterThanEqual:
++ case SpvOpDPdx:
++ case SpvOpDPdy:
++ case SpvOpFwidth:
++ case SpvOpDPdxFine:
++ case SpvOpDPdyFine:
++ case SpvOpFwidthFine:
++ case SpvOpDPdxCoarse:
++ case SpvOpDPdyCoarse:
++ case SpvOpFwidthCoarse:
++ case SpvOpBitFieldInsert:
++ case SpvOpBitFieldSExtract:
++ case SpvOpBitFieldUExtract:
++ case SpvOpBitReverse:
++ case SpvOpBitCount:
++ case SpvOpTranspose:
++ case SpvOpOuterProduct:
++ case SpvOpMatrixTimesScalar:
++ case SpvOpVectorTimesMatrix:
++ case SpvOpMatrixTimesVector:
++ case SpvOpMatrixTimesMatrix:
++ vtn_handle_alu(b, opcode, w, count);
++ break;
++
++ case SpvOpVectorExtractDynamic:
++ case SpvOpVectorInsertDynamic:
++ case SpvOpVectorShuffle:
++ case SpvOpCompositeConstruct:
++ case SpvOpCompositeExtract:
++ case SpvOpCompositeInsert:
++ case SpvOpCopyObject:
++ vtn_handle_composite(b, opcode, w, count);
++ break;
++
++ case SpvOpEmitVertex:
++ case SpvOpEndPrimitive:
++ case SpvOpEmitStreamVertex:
++ case SpvOpEndStreamPrimitive:
++ case SpvOpControlBarrier:
++ case SpvOpMemoryBarrier:
++ vtn_handle_barrier(b, opcode, w, count);
++ break;
++
++ default:
++ unreachable("Unhandled opcode");
++ }
++
++ return true;
++}
++
++nir_function *
++spirv_to_nir(const uint32_t *words, size_t word_count,
++ struct nir_spirv_specialization *spec, unsigned num_spec,
++ gl_shader_stage stage, const char *entry_point_name,
++ const nir_shader_compiler_options *options)
++{
++ const uint32_t *word_end = words + word_count;
++
++ /* Handle the SPIR-V header (first 4 dwords) */
++ assert(word_count > 5);
++
++ assert(words[0] == SpvMagicNumber);
++ assert(words[1] >= 0x10000);
++ /* words[2] == generator magic */
++ unsigned value_id_bound = words[3];
++ assert(words[4] == 0);
++
++ words+= 5;
++
++ /* Initialize the stn_builder object */
++ struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
++ b->value_id_bound = value_id_bound;
++ b->values = rzalloc_array(b, struct vtn_value, value_id_bound);
++ exec_list_make_empty(&b->functions);
++ b->entry_point_stage = stage;
++ b->entry_point_name = entry_point_name;
++
++ /* Handle all the preamble instructions */
++ words = vtn_foreach_instruction(b, words, word_end,
++ vtn_handle_preamble_instruction);
++
++ if (b->entry_point == NULL) {
++ assert(!"Entry point not found");
++ ralloc_free(b);
++ return NULL;
++ }
++
++ b->shader = nir_shader_create(NULL, stage, options);
++
++ /* Parse execution modes */
++ vtn_foreach_execution_mode(b, b->entry_point,
++ vtn_handle_execution_mode, NULL);
++
++ b->specializations = spec;
++ b->num_specializations = num_spec;
++
++ /* Handle all variable, type, and constant instructions */
++ words = vtn_foreach_instruction(b, words, word_end,
++ vtn_handle_variable_or_type_instruction);
++
++ vtn_build_cfg(b, words, word_end);
++
++ foreach_list_typed(struct vtn_function, func, node, &b->functions) {
++ b->impl = func->impl;
++ b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
++ _mesa_key_pointer_equal);
++
++ vtn_function_emit(b, func, vtn_handle_body_instruction);
++ }
++
++ assert(b->entry_point->value_type == vtn_value_type_function);
++ nir_function *entry_point = b->entry_point->func->impl->function;
++ assert(entry_point);
++
++ ralloc_free(b);
++
++ return entry_point;
++}
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "vtn_private.h"
++
++/*
++ * Normally, column vectors in SPIR-V correspond to a single NIR SSA
++ * definition. But for matrix multiplies, we want to do one routine for
++ * multiplying a matrix by a matrix and then pretend that vectors are matrices
++ * with one column. So we "wrap" these things, and unwrap the result before we
++ * send it off.
++ */
++
++static struct vtn_ssa_value *
++wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val)
++{
++ if (val == NULL)
++ return NULL;
++
++ if (glsl_type_is_matrix(val->type))
++ return val;
++
++ struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
++ dest->type = val->type;
++ dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1);
++ dest->elems[0] = val;
++
++ return dest;
++}
++
++static struct vtn_ssa_value *
++unwrap_matrix(struct vtn_ssa_value *val)
++{
++ if (glsl_type_is_matrix(val->type))
++ return val;
++
++ return val->elems[0];
++}
++
++static struct vtn_ssa_value *
++matrix_multiply(struct vtn_builder *b,
++ struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
++{
++
++ struct vtn_ssa_value *src0 = wrap_matrix(b, _src0);
++ struct vtn_ssa_value *src1 = wrap_matrix(b, _src1);
++ struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed);
++ struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed);
++
++ unsigned src0_rows = glsl_get_vector_elements(src0->type);
++ unsigned src0_columns = glsl_get_matrix_columns(src0->type);
++ unsigned src1_columns = glsl_get_matrix_columns(src1->type);
++
++ const struct glsl_type *dest_type;
++ if (src1_columns > 1) {
++ dest_type = glsl_matrix_type(glsl_get_base_type(src0->type),
++ src0_rows, src1_columns);
++ } else {
++ dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows);
++ }
++ struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type);
++
++ dest = wrap_matrix(b, dest);
++
++ bool transpose_result = false;
++ if (src0_transpose && src1_transpose) {
++ /* transpose(A) * transpose(B) = transpose(B * A) */
++ src1 = src0_transpose;
++ src0 = src1_transpose;
++ src0_transpose = NULL;
++ src1_transpose = NULL;
++ transpose_result = true;
++ }
++
++ if (src0_transpose && !src1_transpose &&
++ glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
++ /* We already have the rows of src0 and the columns of src1 available,
++ * so we can just take the dot product of each row with each column to
++ * get the result.
++ */
++
++ for (unsigned i = 0; i < src1_columns; i++) {
++ nir_ssa_def *vec_src[4];
++ for (unsigned j = 0; j < src0_rows; j++) {
++ vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
++ src1->elems[i]->def);
++ }
++ dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
++ }
++ } else {
++ /* We don't handle the case where src1 is transposed but not src0, since
++ * the general case only uses individual components of src1 so the
++ * optimizer should chew through the transpose we emitted for src1.
++ */
++
++ for (unsigned i = 0; i < src1_columns; i++) {
++ /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
++ dest->elems[i]->def =
++ nir_fmul(&b->nb, src0->elems[0]->def,
++ nir_channel(&b->nb, src1->elems[i]->def, 0));
++ for (unsigned j = 1; j < src0_columns; j++) {
++ dest->elems[i]->def =
++ nir_fadd(&b->nb, dest->elems[i]->def,
++ nir_fmul(&b->nb, src0->elems[j]->def,
++ nir_channel(&b->nb, src1->elems[i]->def, j)));
++ }
++ }
++ }
++
++ dest = unwrap_matrix(dest);
++
++ if (transpose_result)
++ dest = vtn_ssa_transpose(b, dest);
++
++ return dest;
++}
++
++static struct vtn_ssa_value *
++mat_times_scalar(struct vtn_builder *b,
++ struct vtn_ssa_value *mat,
++ nir_ssa_def *scalar)
++{
++ struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type);
++ for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) {
++ if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT)
++ dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar);
++ else
++ dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar);
++ }
++
++ return dest;
++}
++
++static void
++vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
++ struct vtn_value *dest,
++ struct vtn_ssa_value *src0, struct vtn_ssa_value *src1)
++{
++ switch (opcode) {
++ case SpvOpFNegate: {
++ dest->ssa = vtn_create_ssa_value(b, src0->type);
++ unsigned cols = glsl_get_matrix_columns(src0->type);
++ for (unsigned i = 0; i < cols; i++)
++ dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def);
++ break;
++ }
++
++ case SpvOpFAdd: {
++ dest->ssa = vtn_create_ssa_value(b, src0->type);
++ unsigned cols = glsl_get_matrix_columns(src0->type);
++ for (unsigned i = 0; i < cols; i++)
++ dest->ssa->elems[i]->def =
++ nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
++ break;
++ }
++
++ case SpvOpFSub: {
++ dest->ssa = vtn_create_ssa_value(b, src0->type);
++ unsigned cols = glsl_get_matrix_columns(src0->type);
++ for (unsigned i = 0; i < cols; i++)
++ dest->ssa->elems[i]->def =
++ nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
++ break;
++ }
++
++ case SpvOpTranspose:
++ dest->ssa = vtn_ssa_transpose(b, src0);
++ break;
++
++ case SpvOpMatrixTimesScalar:
++ if (src0->transposed) {
++ dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed,
++ src1->def));
++ } else {
++ dest->ssa = mat_times_scalar(b, src0, src1->def);
++ }
++ break;
++
++ case SpvOpVectorTimesMatrix:
++ case SpvOpMatrixTimesVector:
++ case SpvOpMatrixTimesMatrix:
++ if (opcode == SpvOpVectorTimesMatrix) {
++ dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0);
++ } else {
++ dest->ssa = matrix_multiply(b, src0, src1);
++ }
++ break;
++
++ default: unreachable("unknown matrix opcode");
++ }
++}
++
++nir_op
++vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap)
++{
++ /* Indicates that the first two arguments should be swapped. This is
++ * used for implementing greater-than and less-than-or-equal.
++ */
++ *swap = false;
++
++ switch (opcode) {
++ case SpvOpSNegate: return nir_op_ineg;
++ case SpvOpFNegate: return nir_op_fneg;
++ case SpvOpNot: return nir_op_inot;
++ case SpvOpIAdd: return nir_op_iadd;
++ case SpvOpFAdd: return nir_op_fadd;
++ case SpvOpISub: return nir_op_isub;
++ case SpvOpFSub: return nir_op_fsub;
++ case SpvOpIMul: return nir_op_imul;
++ case SpvOpFMul: return nir_op_fmul;
++ case SpvOpUDiv: return nir_op_udiv;
++ case SpvOpSDiv: return nir_op_idiv;
++ case SpvOpFDiv: return nir_op_fdiv;
++ case SpvOpUMod: return nir_op_umod;
++ case SpvOpSMod: return nir_op_imod;
++ case SpvOpFMod: return nir_op_fmod;
++ case SpvOpSRem: return nir_op_irem;
++ case SpvOpFRem: return nir_op_frem;
++
++ case SpvOpShiftRightLogical: return nir_op_ushr;
++ case SpvOpShiftRightArithmetic: return nir_op_ishr;
++ case SpvOpShiftLeftLogical: return nir_op_ishl;
++ case SpvOpLogicalOr: return nir_op_ior;
++ case SpvOpLogicalEqual: return nir_op_ieq;
++ case SpvOpLogicalNotEqual: return nir_op_ine;
++ case SpvOpLogicalAnd: return nir_op_iand;
++ case SpvOpLogicalNot: return nir_op_inot;
++ case SpvOpBitwiseOr: return nir_op_ior;
++ case SpvOpBitwiseXor: return nir_op_ixor;
++ case SpvOpBitwiseAnd: return nir_op_iand;
++ case SpvOpSelect: return nir_op_bcsel;
++ case SpvOpIEqual: return nir_op_ieq;
++
++ case SpvOpBitFieldInsert: return nir_op_bitfield_insert;
++ case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract;
++ case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract;
++ case SpvOpBitReverse: return nir_op_bitfield_reverse;
++ case SpvOpBitCount: return nir_op_bit_count;
++
++ /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */
++ case SpvOpFOrdEqual: return nir_op_feq;
++ case SpvOpFUnordEqual: return nir_op_feq;
++ case SpvOpINotEqual: return nir_op_ine;
++ case SpvOpFOrdNotEqual: return nir_op_fne;
++ case SpvOpFUnordNotEqual: return nir_op_fne;
++ case SpvOpULessThan: return nir_op_ult;
++ case SpvOpSLessThan: return nir_op_ilt;
++ case SpvOpFOrdLessThan: return nir_op_flt;
++ case SpvOpFUnordLessThan: return nir_op_flt;
++ case SpvOpUGreaterThan: *swap = true; return nir_op_ult;
++ case SpvOpSGreaterThan: *swap = true; return nir_op_ilt;
++ case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt;
++ case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt;
++ case SpvOpULessThanEqual: *swap = true; return nir_op_uge;
++ case SpvOpSLessThanEqual: *swap = true; return nir_op_ige;
++ case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge;
++ case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge;
++ case SpvOpUGreaterThanEqual: return nir_op_uge;
++ case SpvOpSGreaterThanEqual: return nir_op_ige;
++ case SpvOpFOrdGreaterThanEqual: return nir_op_fge;
++ case SpvOpFUnordGreaterThanEqual: return nir_op_fge;
++
++ /* Conversions: */
++ case SpvOpConvertFToU: return nir_op_f2u;
++ case SpvOpConvertFToS: return nir_op_f2i;
++ case SpvOpConvertSToF: return nir_op_i2f;
++ case SpvOpConvertUToF: return nir_op_u2f;
++ case SpvOpBitcast: return nir_op_imov;
++ case SpvOpUConvert:
++ case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
++ /* TODO: NIR is 32-bit only; these are no-ops. */
++ case SpvOpSConvert: return nir_op_imov;
++ case SpvOpFConvert: return nir_op_fmov;
++
++ /* Derivatives: */
++ case SpvOpDPdx: return nir_op_fddx;
++ case SpvOpDPdy: return nir_op_fddy;
++ case SpvOpDPdxFine: return nir_op_fddx_fine;
++ case SpvOpDPdyFine: return nir_op_fddy_fine;
++ case SpvOpDPdxCoarse: return nir_op_fddx_coarse;
++ case SpvOpDPdyCoarse: return nir_op_fddy_coarse;
++
++ default:
++ unreachable("No NIR equivalent");
++ }
++}
++
++void
++vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ const struct glsl_type *type =
++ vtn_value(b, w[1], vtn_value_type_type)->type->type;
++
++ /* Collect the various SSA sources */
++ const unsigned num_inputs = count - 3;
++ struct vtn_ssa_value *vtn_src[4] = { NULL, };
++ for (unsigned i = 0; i < num_inputs; i++)
++ vtn_src[i] = vtn_ssa_value(b, w[i + 3]);
++
++ if (glsl_type_is_matrix(vtn_src[0]->type) ||
++ (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
++ vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
++ return;
++ }
++
++ val->ssa = vtn_create_ssa_value(b, type);
++ nir_ssa_def *src[4] = { NULL, };
++ for (unsigned i = 0; i < num_inputs; i++) {
++ assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type));
++ src[i] = vtn_src[i]->def;
++ }
++
++ switch (opcode) {
++ case SpvOpAny:
++ if (src[0]->num_components == 1) {
++ val->ssa->def = nir_imov(&b->nb, src[0]);
++ } else {
++ nir_op op;
++ switch (src[0]->num_components) {
++ case 2: op = nir_op_bany_inequal2; break;
++ case 3: op = nir_op_bany_inequal3; break;
++ case 4: op = nir_op_bany_inequal4; break;
++ }
++ val->ssa->def = nir_build_alu(&b->nb, op, src[0],
++ nir_imm_int(&b->nb, NIR_FALSE),
++ NULL, NULL);
++ }
++ return;
++
++ case SpvOpAll:
++ if (src[0]->num_components == 1) {
++ val->ssa->def = nir_imov(&b->nb, src[0]);
++ } else {
++ nir_op op;
++ switch (src[0]->num_components) {
++ case 2: op = nir_op_ball_iequal2; break;
++ case 3: op = nir_op_ball_iequal3; break;
++ case 4: op = nir_op_ball_iequal4; break;
++ }
++ val->ssa->def = nir_build_alu(&b->nb, op, src[0],
++ nir_imm_int(&b->nb, NIR_TRUE),
++ NULL, NULL);
++ }
++ return;
++
++ case SpvOpOuterProduct: {
++ for (unsigned i = 0; i < src[1]->num_components; i++) {
++ val->ssa->elems[i]->def =
++ nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i));
++ }
++ return;
++ }
++
++ case SpvOpDot:
++ val->ssa->def = nir_fdot(&b->nb, src[0], src[1]);
++ return;
++
++ case SpvOpIAddCarry:
++ assert(glsl_type_is_struct(val->ssa->type));
++ val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]);
++ val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]);
++ return;
++
++ case SpvOpISubBorrow:
++ assert(glsl_type_is_struct(val->ssa->type));
++ val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]);
++ val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]);
++ return;
++
++ case SpvOpUMulExtended:
++ assert(glsl_type_is_struct(val->ssa->type));
++ val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
++ val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]);
++ return;
++
++ case SpvOpSMulExtended:
++ assert(glsl_type_is_struct(val->ssa->type));
++ val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
++ val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]);
++ return;
++
++ case SpvOpFwidth:
++ val->ssa->def = nir_fadd(&b->nb,
++ nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
++ nir_fabs(&b->nb, nir_fddx(&b->nb, src[1])));
++ return;
++ case SpvOpFwidthFine:
++ val->ssa->def = nir_fadd(&b->nb,
++ nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
++ nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1])));
++ return;
++ case SpvOpFwidthCoarse:
++ val->ssa->def = nir_fadd(&b->nb,
++ nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
++ nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1])));
++ return;
++
++ case SpvOpVectorTimesScalar:
++ /* The builder will take care of splatting for us. */
++ val->ssa->def = nir_fmul(&b->nb, src[0], src[1]);
++ return;
++
++ case SpvOpIsNan:
++ val->ssa->def = nir_fne(&b->nb, src[0], src[0]);
++ return;
++
++ case SpvOpIsInf:
++ val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]),
++ nir_imm_float(&b->nb, INFINITY));
++ return;
++
++ default: {
++ bool swap;
++ nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
++
++ if (swap) {
++ nir_ssa_def *tmp = src[0];
++ src[0] = src[1];
++ src[1] = tmp;
++ }
++
++ val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
++ return;
++ } /* default */
++ }
++}
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "vtn_private.h"
++#include "nir/nir_vla.h"
++
++static bool
++vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ switch (opcode) {
++ case SpvOpFunction: {
++ assert(b->func == NULL);
++ b->func = rzalloc(b, struct vtn_function);
++
++ list_inithead(&b->func->body);
++ b->func->control = w[3];
++
++ const struct glsl_type *result_type =
++ vtn_value(b, w[1], vtn_value_type_type)->type->type;
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function);
++ val->func = b->func;
++
++ const struct glsl_type *func_type =
++ vtn_value(b, w[4], vtn_value_type_type)->type->type;
++
++ assert(glsl_get_function_return_type(func_type) == result_type);
++
++ nir_function *func =
++ nir_function_create(b->shader, ralloc_strdup(b->shader, val->name));
++
++ func->num_params = glsl_get_length(func_type);
++ func->params = ralloc_array(b->shader, nir_parameter, func->num_params);
++ for (unsigned i = 0; i < func->num_params; i++) {
++ const struct glsl_function_param *param =
++ glsl_get_function_param(func_type, i);
++ func->params[i].type = param->type;
++ if (param->in) {
++ if (param->out) {
++ func->params[i].param_type = nir_parameter_inout;
++ } else {
++ func->params[i].param_type = nir_parameter_in;
++ }
++ } else {
++ if (param->out) {
++ func->params[i].param_type = nir_parameter_out;
++ } else {
++ assert(!"Parameter is neither in nor out");
++ }
++ }
++ }
++
++ func->return_type = glsl_get_function_return_type(func_type);
++
++ b->func->impl = nir_function_impl_create(func);
++ if (!glsl_type_is_void(func->return_type)) {
++ b->func->impl->return_var =
++ nir_local_variable_create(b->func->impl, func->return_type, "ret");
++ }
++
++ b->func_param_idx = 0;
++ break;
++ }
++
++ case SpvOpFunctionEnd:
++ b->func->end = w;
++ b->func = NULL;
++ break;
++
++ case SpvOpFunctionParameter: {
++ struct vtn_value *val =
++ vtn_push_value(b, w[2], vtn_value_type_access_chain);
++
++ assert(b->func_param_idx < b->func->impl->num_params);
++ unsigned idx = b->func_param_idx++;
++
++ nir_variable *param =
++ nir_local_variable_create(b->func->impl,
++ b->func->impl->function->params[idx].type,
++ val->name);
++ b->func->impl->params[idx] = param;
++
++ struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable);
++ vtn_var->mode = vtn_variable_mode_param;
++ vtn_var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
++ vtn_var->var = param;
++ vtn_var->chain.var = vtn_var;
++ vtn_var->chain.length = 0;
++
++ val->access_chain = &vtn_var->chain;
++ break;
++ }
++
++ case SpvOpLabel: {
++ assert(b->block == NULL);
++ b->block = rzalloc(b, struct vtn_block);
++ b->block->node.type = vtn_cf_node_type_block;
++ b->block->label = w;
++ vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block;
++
++ if (b->func->start_block == NULL) {
++ /* This is the first block encountered for this function. In this
++ * case, we set the start block and add it to the list of
++ * implemented functions that we'll walk later.
++ */
++ b->func->start_block = b->block;
++ exec_list_push_tail(&b->functions, &b->func->node);
++ }
++ break;
++ }
++
++ case SpvOpSelectionMerge:
++ case SpvOpLoopMerge:
++ assert(b->block && b->block->merge == NULL);
++ b->block->merge = w;
++ break;
++
++ case SpvOpBranch:
++ case SpvOpBranchConditional:
++ case SpvOpSwitch:
++ case SpvOpKill:
++ case SpvOpReturn:
++ case SpvOpReturnValue:
++ case SpvOpUnreachable:
++ assert(b->block && b->block->branch == NULL);
++ b->block->branch = w;
++ b->block = NULL;
++ break;
++
++ default:
++ /* Continue on as per normal */
++ return true;
++ }
++
++ return true;
++}
++
++static void
++vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch,
++ struct vtn_block *break_block,
++ uint32_t block_id, uint32_t val, bool is_default)
++{
++ struct vtn_block *case_block =
++ vtn_value(b, block_id, vtn_value_type_block)->block;
++
++ /* Don't create dummy cases that just break */
++ if (case_block == break_block)
++ return;
++
++ if (case_block->switch_case == NULL) {
++ struct vtn_case *c = ralloc(b, struct vtn_case);
++
++ list_inithead(&c->body);
++ c->start_block = case_block;
++ c->fallthrough = NULL;
++ nir_array_init(&c->values, b);
++ c->is_default = false;
++ c->visited = false;
++
++ list_addtail(&c->link, &swtch->cases);
++
++ case_block->switch_case = c;
++ }
++
++ if (is_default) {
++ case_block->switch_case->is_default = true;
++ } else {
++ nir_array_add(&case_block->switch_case->values, uint32_t, val);
++ }
++}
++
++/* This function performs a depth-first search of the cases and puts them
++ * in fall-through order.
++ */
++static void
++vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse)
++{
++ if (cse->visited)
++ return;
++
++ cse->visited = true;
++
++ list_del(&cse->link);
++
++ if (cse->fallthrough) {
++ vtn_order_case(swtch, cse->fallthrough);
++
++ /* If we have a fall-through, place this case right before the case it
++ * falls through to. This ensures that fallthroughs come one after
++ * the other. These two can never get separated because that would
++ * imply something else falling through to the same case. Also, this
++ * can't break ordering because the DFS ensures that this case is
++ * visited before anything that falls through to it.
++ */
++ list_addtail(&cse->link, &cse->fallthrough->link);
++ } else {
++ list_add(&cse->link, &swtch->cases);
++ }
++}
++
++static enum vtn_branch_type
++vtn_get_branch_type(struct vtn_block *block,
++ struct vtn_case *swcase, struct vtn_block *switch_break,
++ struct vtn_block *loop_break, struct vtn_block *loop_cont)
++{
++ if (block->switch_case) {
++ /* This branch is actually a fallthrough */
++ assert(swcase->fallthrough == NULL ||
++ swcase->fallthrough == block->switch_case);
++ swcase->fallthrough = block->switch_case;
++ return vtn_branch_type_switch_fallthrough;
++ } else if (block == switch_break) {
++ return vtn_branch_type_switch_break;
++ } else if (block == loop_break) {
++ return vtn_branch_type_loop_break;
++ } else if (block == loop_cont) {
++ return vtn_branch_type_loop_continue;
++ } else {
++ return vtn_branch_type_none;
++ }
++}
++
++static void
++vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list,
++ struct vtn_block *start, struct vtn_case *switch_case,
++ struct vtn_block *switch_break,
++ struct vtn_block *loop_break, struct vtn_block *loop_cont,
++ struct vtn_block *end)
++{
++ struct vtn_block *block = start;
++ while (block != end) {
++ if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge &&
++ !block->loop) {
++ struct vtn_loop *loop = ralloc(b, struct vtn_loop);
++
++ loop->node.type = vtn_cf_node_type_loop;
++ list_inithead(&loop->body);
++ list_inithead(&loop->cont_body);
++ loop->control = block->merge[3];
++
++ list_addtail(&loop->node.link, cf_list);
++ block->loop = loop;
++
++ struct vtn_block *new_loop_break =
++ vtn_value(b, block->merge[1], vtn_value_type_block)->block;
++ struct vtn_block *new_loop_cont =
++ vtn_value(b, block->merge[2], vtn_value_type_block)->block;
++
++ /* Note: This recursive call will start with the current block as
++ * its start block. If we weren't careful, we would get here
++ * again and end up in infinite recursion. This is why we set
++ * block->loop above and check for it before creating one. This
++ * way, we only create the loop once and the second call that
++ * tries to handle this loop goes to the cases below and gets
++ * handled as a regular block.
++ *
++ * Note: When we make the recursive walk calls, we pass NULL for
++ * the switch break since you have to break out of the loop first.
++ * We do, however, still pass the current switch case because it's
++ * possible that the merge block for the loop is the start of
++ * another case.
++ */
++ vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL,
++ new_loop_break, new_loop_cont, NULL );
++ vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL,
++ new_loop_break, NULL, block);
++
++ block = new_loop_break;
++ continue;
++ }
++
++ assert(block->node.link.next == NULL);
++ list_addtail(&block->node.link, cf_list);
++
++ switch (*block->branch & SpvOpCodeMask) {
++ case SpvOpBranch: {
++ struct vtn_block *branch_block =
++ vtn_value(b, block->branch[1], vtn_value_type_block)->block;
++
++ block->branch_type = vtn_get_branch_type(branch_block,
++ switch_case, switch_break,
++ loop_break, loop_cont);
++
++ if (block->branch_type != vtn_branch_type_none)
++ return;
++
++ block = branch_block;
++ continue;
++ }
++
++ case SpvOpReturn:
++ case SpvOpReturnValue:
++ block->branch_type = vtn_branch_type_return;
++ return;
++
++ case SpvOpKill:
++ block->branch_type = vtn_branch_type_discard;
++ return;
++
++ case SpvOpBranchConditional: {
++ struct vtn_block *then_block =
++ vtn_value(b, block->branch[2], vtn_value_type_block)->block;
++ struct vtn_block *else_block =
++ vtn_value(b, block->branch[3], vtn_value_type_block)->block;
++
++ struct vtn_if *if_stmt = ralloc(b, struct vtn_if);
++
++ if_stmt->node.type = vtn_cf_node_type_if;
++ if_stmt->condition = block->branch[1];
++ list_inithead(&if_stmt->then_body);
++ list_inithead(&if_stmt->else_body);
++
++ list_addtail(&if_stmt->node.link, cf_list);
++
++ if (block->merge &&
++ (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) {
++ if_stmt->control = block->merge[2];
++ }
++
++ if_stmt->then_type = vtn_get_branch_type(then_block,
++ switch_case, switch_break,
++ loop_break, loop_cont);
++ if_stmt->else_type = vtn_get_branch_type(else_block,
++ switch_case, switch_break,
++ loop_break, loop_cont);
++
++ if (if_stmt->then_type == vtn_branch_type_none &&
++ if_stmt->else_type == vtn_branch_type_none) {
++ /* Neither side of the if is something we can short-circuit. */
++ assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
++ struct vtn_block *merge_block =
++ vtn_value(b, block->merge[1], vtn_value_type_block)->block;
++
++ vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block,
++ switch_case, switch_break,
++ loop_break, loop_cont, merge_block);
++ vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block,
++ switch_case, switch_break,
++ loop_break, loop_cont, merge_block);
++
++ enum vtn_branch_type merge_type =
++ vtn_get_branch_type(merge_block, switch_case, switch_break,
++ loop_break, loop_cont);
++ if (merge_type == vtn_branch_type_none) {
++ block = merge_block;
++ continue;
++ } else {
++ return;
++ }
++ } else if (if_stmt->then_type != vtn_branch_type_none &&
++ if_stmt->else_type != vtn_branch_type_none) {
++ /* Both sides were short-circuited. We're done here. */
++ return;
++ } else {
++ /* Exeactly one side of the branch could be short-circuited.
++ * We set the branch up as a predicated break/continue and we
++ * continue on with the other side as if it were what comes
++ * after the if.
++ */
++ if (if_stmt->then_type == vtn_branch_type_none) {
++ block = then_block;
++ } else {
++ block = else_block;
++ }
++ continue;
++ }
++ unreachable("Should have returned or continued");
++ }
++
++ case SpvOpSwitch: {
++ assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
++ struct vtn_block *break_block =
++ vtn_value(b, block->merge[1], vtn_value_type_block)->block;
++
++ struct vtn_switch *swtch = ralloc(b, struct vtn_switch);
++
++ swtch->node.type = vtn_cf_node_type_switch;
++ swtch->selector = block->branch[1];
++ list_inithead(&swtch->cases);
++
++ list_addtail(&swtch->node.link, cf_list);
++
++ /* First, we go through and record all of the cases. */
++ const uint32_t *branch_end =
++ block->branch + (block->branch[0] >> SpvWordCountShift);
++
++ vtn_add_case(b, swtch, break_block, block->branch[2], 0, true);
++ for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2)
++ vtn_add_case(b, swtch, break_block, w[1], w[0], false);
++
++ /* Now, we go through and walk the blocks. While we walk through
++ * the blocks, we also gather the much-needed fall-through
++ * information.
++ */
++ list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) {
++ assert(cse->start_block != break_block);
++ vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse,
++ break_block, NULL, loop_cont, NULL);
++ }
++
++ /* Finally, we walk over all of the cases one more time and put
++ * them in fall-through order.
++ */
++ for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) {
++ struct vtn_block *case_block =
++ vtn_value(b, *w, vtn_value_type_block)->block;
++
++ if (case_block == break_block)
++ continue;
++
++ assert(case_block->switch_case);
++
++ vtn_order_case(swtch, case_block->switch_case);
++ }
++
++ block = break_block;
++ continue;
++ }
++
++ case SpvOpUnreachable:
++ return;
++
++ default:
++ unreachable("Unhandled opcode");
++ }
++ }
++}
++
++void
++vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end)
++{
++ vtn_foreach_instruction(b, words, end,
++ vtn_cfg_handle_prepass_instruction);
++
++ foreach_list_typed(struct vtn_function, func, node, &b->functions) {
++ vtn_cfg_walk_blocks(b, &func->body, func->start_block,
++ NULL, NULL, NULL, NULL, NULL);
++ }
++}
++
++static bool
++vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ if (opcode == SpvOpLabel)
++ return true; /* Nothing to do */
++
++ /* If this isn't a phi node, stop. */
++ if (opcode != SpvOpPhi)
++ return false;
++
++ /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot.
++ * For each phi, we create a variable with the appropreate type and
++ * do a load from that variable. Then, in a second pass, we add
++ * stores to that variable to each of the predecessor blocks.
++ *
++ * We could do something more intelligent here. However, in order to
++ * handle loops and things properly, we really need dominance
++ * information. It would end up basically being the into-SSA
++ * algorithm all over again. It's easier if we just let
++ * lower_vars_to_ssa do that for us instead of repeating it here.
++ */
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++
++ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
++ nir_variable *phi_var =
++ nir_local_variable_create(b->nb.impl, type->type, "phi");
++ _mesa_hash_table_insert(b->phi_table, w, phi_var);
++
++ val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var));
++
++ return true;
++}
++
++static bool
++vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ if (opcode != SpvOpPhi)
++ return true;
++
++ struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w);
++ assert(phi_entry);
++ nir_variable *phi_var = phi_entry->data;
++
++ for (unsigned i = 3; i < count; i += 2) {
++ struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
++ struct vtn_block *pred =
++ vtn_value(b, w[i + 1], vtn_value_type_block)->block;
++
++ b->nb.cursor = nir_after_block_before_jump(pred->end_block);
++
++ vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
++ }
++
++ return true;
++}
++
++static void
++vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type,
++ nir_variable *switch_fall_var, bool *has_switch_break)
++{
++ switch (branch_type) {
++ case vtn_branch_type_switch_break:
++ nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
++ *has_switch_break = true;
++ break;
++ case vtn_branch_type_switch_fallthrough:
++ break; /* Nothing to do */
++ case vtn_branch_type_loop_break:
++ nir_jump(&b->nb, nir_jump_break);
++ break;
++ case vtn_branch_type_loop_continue:
++ nir_jump(&b->nb, nir_jump_continue);
++ break;
++ case vtn_branch_type_return:
++ nir_jump(&b->nb, nir_jump_return);
++ break;
++ case vtn_branch_type_discard: {
++ nir_intrinsic_instr *discard =
++ nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard);
++ nir_builder_instr_insert(&b->nb, &discard->instr);
++ break;
++ }
++ default:
++ unreachable("Invalid branch type");
++ }
++}
++
++static void
++vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
++ nir_variable *switch_fall_var, bool *has_switch_break,
++ vtn_instruction_handler handler)
++{
++ list_for_each_entry(struct vtn_cf_node, node, cf_list, link) {
++ switch (node->type) {
++ case vtn_cf_node_type_block: {
++ struct vtn_block *block = (struct vtn_block *)node;
++
++ const uint32_t *block_start = block->label;
++ const uint32_t *block_end = block->merge ? block->merge :
++ block->branch;
++
++ block_start = vtn_foreach_instruction(b, block_start, block_end,
++ vtn_handle_phis_first_pass);
++
++ vtn_foreach_instruction(b, block_start, block_end, handler);
++
++ block->end_block = nir_cursor_current_block(b->nb.cursor);
++
++ if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
++ struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
++ vtn_local_store(b, src,
++ nir_deref_var_create(b, b->impl->return_var));
++ }
++
++ if (block->branch_type != vtn_branch_type_none) {
++ vtn_emit_branch(b, block->branch_type,
++ switch_fall_var, has_switch_break);
++ }
++
++ break;
++ }
++
++ case vtn_cf_node_type_if: {
++ struct vtn_if *vtn_if = (struct vtn_if *)node;
++
++ nir_if *if_stmt = nir_if_create(b->shader);
++ if_stmt->condition =
++ nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def);
++ nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node);
++
++ bool sw_break = false;
++
++ b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
++ if (vtn_if->then_type == vtn_branch_type_none) {
++ vtn_emit_cf_list(b, &vtn_if->then_body,
++ switch_fall_var, &sw_break, handler);
++ } else {
++ vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break);
++ }
++
++ b->nb.cursor = nir_after_cf_list(&if_stmt->else_list);
++ if (vtn_if->else_type == vtn_branch_type_none) {
++ vtn_emit_cf_list(b, &vtn_if->else_body,
++ switch_fall_var, &sw_break, handler);
++ } else {
++ vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break);
++ }
++
++ b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node);
++
++ /* If we encountered a switch break somewhere inside of the if,
++ * then it would have been handled correctly by calling
++ * emit_cf_list or emit_branch for the interrior. However, we
++ * need to predicate everything following on wether or not we're
++ * still going.
++ */
++ if (sw_break) {
++ *has_switch_break = true;
++
++ nir_if *switch_if = nir_if_create(b->shader);
++ switch_if->condition =
++ nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var));
++ nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node);
++
++ b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
++ }
++ break;
++ }
++
++ case vtn_cf_node_type_loop: {
++ struct vtn_loop *vtn_loop = (struct vtn_loop *)node;
++
++ nir_loop *loop = nir_loop_create(b->shader);
++ nir_cf_node_insert(b->nb.cursor, &loop->cf_node);
++
++ b->nb.cursor = nir_after_cf_list(&loop->body);
++ vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler);
++
++ if (!list_empty(&vtn_loop->cont_body)) {
++ /* If we have a non-trivial continue body then we need to put
++ * it at the beginning of the loop with a flag to ensure that
++ * it doesn't get executed in the first iteration.
++ */
++ nir_variable *do_cont =
++ nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont");
++
++ b->nb.cursor = nir_before_cf_node(&loop->cf_node);
++ nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1);
++
++ b->nb.cursor = nir_before_cf_list(&loop->body);
++ nir_if *cont_if = nir_if_create(b->shader);
++ cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont));
++ nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node);
++
++ b->nb.cursor = nir_after_cf_list(&cont_if->then_list);
++ vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler);
++
++ b->nb.cursor = nir_after_cf_node(&cont_if->cf_node);
++ nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1);
++
++ b->has_loop_continue = true;
++ }
++
++ b->nb.cursor = nir_after_cf_node(&loop->cf_node);
++ break;
++ }
++
++ case vtn_cf_node_type_switch: {
++ struct vtn_switch *vtn_switch = (struct vtn_switch *)node;
++
++ /* First, we create a variable to keep track of whether or not the
++ * switch is still going at any given point. Any switch breaks
++ * will set this variable to false.
++ */
++ nir_variable *fall_var =
++ nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall");
++ nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
++
++ /* Next, we gather up all of the conditions. We have to do this
++ * up-front because we also need to build an "any" condition so
++ * that we can use !any for default.
++ */
++ const int num_cases = list_length(&vtn_switch->cases);
++ NIR_VLA(nir_ssa_def *, conditions, num_cases);
++
++ nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def;
++ /* An accumulation of all conditions. Used for the default */
++ nir_ssa_def *any = NULL;
++
++ int i = 0;
++ list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
++ if (cse->is_default) {
++ conditions[i++] = NULL;
++ continue;
++ }
++
++ nir_ssa_def *cond = NULL;
++ nir_array_foreach(&cse->values, uint32_t, val) {
++ nir_ssa_def *is_val =
++ nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val));
++
++ cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val;
++ }
++
++ any = any ? nir_ior(&b->nb, any, cond) : cond;
++ conditions[i++] = cond;
++ }
++ assert(i == num_cases);
++
++ /* Now we can walk the list of cases and actually emit code */
++ i = 0;
++ list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
++ /* Figure out the condition */
++ nir_ssa_def *cond = conditions[i++];
++ if (cse->is_default) {
++ assert(cond == NULL);
++ cond = nir_inot(&b->nb, any);
++ }
++ /* Take fallthrough into account */
++ cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var));
++
++ nir_if *case_if = nir_if_create(b->nb.shader);
++ case_if->condition = nir_src_for_ssa(cond);
++ nir_cf_node_insert(b->nb.cursor, &case_if->cf_node);
++
++ bool has_break = false;
++ b->nb.cursor = nir_after_cf_list(&case_if->then_list);
++ nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1);
++ vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler);
++ (void)has_break; /* We don't care */
++
++ b->nb.cursor = nir_after_cf_node(&case_if->cf_node);
++ }
++ assert(i == num_cases);
++
++ break;
++ }
++
++ default:
++ unreachable("Invalid CF node type");
++ }
++ }
++}
++
++void
++vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
++ vtn_instruction_handler instruction_handler)
++{
++ nir_builder_init(&b->nb, func->impl);
++ b->nb.cursor = nir_after_cf_list(&func->impl->body);
++ b->has_loop_continue = false;
++ b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
++ _mesa_key_pointer_equal);
++
++ vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler);
++
++ vtn_foreach_instruction(b, func->start_block->label, func->end,
++ vtn_handle_phi_second_pass);
++
++ /* Continue blocks for loops get inserted before the body of the loop
++ * but instructions in the continue may use SSA defs in the loop body.
++ * Therefore, we need to repair SSA to insert the needed phi nodes.
++ */
++ if (b->has_loop_continue)
++ nir_repair_ssa_impl(func->impl);
++}
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ * Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "vtn_private.h"
++#include "GLSL.std.450.h"
++
++#define M_PIf ((float) M_PI)
++#define M_PI_2f ((float) M_PI_2)
++#define M_PI_4f ((float) M_PI_4)
++
++static nir_ssa_def *
++build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
++{
++ unsigned swiz[4] = {1, 0, 0, 0};
++ nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
++ return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
++}
++
++static nir_ssa_def *
++build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
++{
++ unsigned yzx[4] = {1, 2, 0, 0};
++ unsigned zxy[4] = {2, 0, 1, 0};
++
++ nir_ssa_def *prod0 =
++ nir_fmul(b, col[0],
++ nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
++ nir_swizzle(b, col[2], zxy, 3, true)));
++ nir_ssa_def *prod1 =
++ nir_fmul(b, col[0],
++ nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
++ nir_swizzle(b, col[2], yzx, 3, true)));
++
++ nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
++
++ return nir_fadd(b, nir_channel(b, diff, 0),
++ nir_fadd(b, nir_channel(b, diff, 1),
++ nir_channel(b, diff, 2)));
++}
++
++static nir_ssa_def *
++build_mat4_det(nir_builder *b, nir_ssa_def **col)
++{
++ nir_ssa_def *subdet[4];
++ for (unsigned i = 0; i < 4; i++) {
++ unsigned swiz[3];
++ for (unsigned j = 0, k = 0; j < 3; j++, k++) {
++ if (k == i)
++ k++; /* skip column */
++ swiz[j] = k;
++ }
++
++ nir_ssa_def *subcol[3];
++ subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
++ subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
++ subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
++
++ subdet[i] = build_mat3_det(b, subcol);
++ }
++
++ nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
++
++ return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
++ nir_channel(b, prod, 1)),
++ nir_fsub(b, nir_channel(b, prod, 2),
++ nir_channel(b, prod, 3)));
++}
++
++static nir_ssa_def *
++build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
++{
++ unsigned size = glsl_get_vector_elements(src->type);
++
++ nir_ssa_def *cols[4];
++ for (unsigned i = 0; i < size; i++)
++ cols[i] = src->elems[i]->def;
++
++ switch(size) {
++ case 2: return build_mat2_det(&b->nb, cols);
++ case 3: return build_mat3_det(&b->nb, cols);
++ case 4: return build_mat4_det(&b->nb, cols);
++ default:
++ unreachable("Invalid matrix size");
++ }
++}
++
++/* Computes the determinate of the submatrix given by taking src and
++ * removing the specified row and column.
++ */
++static nir_ssa_def *
++build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
++ unsigned size, unsigned row, unsigned col)
++{
++ assert(row < size && col < size);
++ if (size == 2) {
++ return nir_channel(b, src->elems[1 - col]->def, 1 - row);
++ } else {
++ /* Swizzle to get all but the specified row */
++ unsigned swiz[3];
++ for (unsigned j = 0; j < 4; j++)
++ swiz[j - (j > row)] = j;
++
++ /* Grab all but the specified column */
++ nir_ssa_def *subcol[3];
++ for (unsigned j = 0; j < size; j++) {
++ if (j != col) {
++ subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
++ swiz, size - 1, true);
++ }
++ }
++
++ if (size == 3) {
++ return build_mat2_det(b, subcol);
++ } else {
++ assert(size == 4);
++ return build_mat3_det(b, subcol);
++ }
++ }
++}
++
++static struct vtn_ssa_value *
++matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
++{
++ nir_ssa_def *adj_col[4];
++ unsigned size = glsl_get_vector_elements(src->type);
++
++ /* Build up an adjugate matrix */
++ for (unsigned c = 0; c < size; c++) {
++ nir_ssa_def *elem[4];
++ for (unsigned r = 0; r < size; r++) {
++ elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
++
++ if ((r + c) % 2)
++ elem[r] = nir_fneg(&b->nb, elem[r]);
++ }
++
++ adj_col[c] = nir_vec(&b->nb, elem, size);
++ }
++
++ nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
++
++ struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
++ for (unsigned i = 0; i < size; i++)
++ val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
++
++ return val;
++}
++
++static nir_ssa_def*
++build_length(nir_builder *b, nir_ssa_def *vec)
++{
++ switch (vec->num_components) {
++ case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
++ case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
++ case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
++ case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
++ default:
++ unreachable("Invalid number of components");
++ }
++}
++
++static inline nir_ssa_def *
++build_fclamp(nir_builder *b,
++ nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
++{
++ return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
++}
++
++/**
++ * Return e^x.
++ */
++static nir_ssa_def *
++build_exp(nir_builder *b, nir_ssa_def *x)
++{
++ return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
++}
++
++/**
++ * Return ln(x) - the natural logarithm of x.
++ */
++static nir_ssa_def *
++build_log(nir_builder *b, nir_ssa_def *x)
++{
++ return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
++}
++
++static nir_ssa_def *
++build_asin(nir_builder *b, nir_ssa_def *x)
++{
++ /*
++ * asin(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955)))
++ */
++ nir_ssa_def *abs_x = nir_fabs(b, x);
++ return nir_fmul(b, nir_fsign(b, x),
++ nir_fsub(b, nir_imm_float(b, M_PI_2f),
++ nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
++ nir_fadd(b, nir_imm_float(b, M_PI_2f),
++ nir_fmul(b, abs_x,
++ nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
++ nir_fmul(b, abs_x,
++ nir_fadd(b, nir_imm_float(b, 0.086566724f),
++ nir_fmul(b, abs_x,
++ nir_imm_float(b, -0.03102955f))))))))));
++}
++
++static nir_ssa_def *
++build_acos(nir_builder *b, nir_ssa_def *x)
++{
++ /*
++ * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318)))
++ */
++ nir_ssa_def *abs_x = nir_fabs(b, x);
++ nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
++ nir_fadd(b, nir_imm_float(b, M_PI_2f),
++ nir_fmul(b, abs_x,
++ nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
++ nir_fmul(b, abs_x,
++ nir_fadd(b, nir_imm_float(b, 0.08132463f),
++ nir_fmul(b, abs_x,
++ nir_imm_float(b, -0.02363318f))))))));
++ return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)),
++ nir_fsub(b, nir_imm_float(b, M_PI), poly),
++ poly);
++}
++
++/**
++ * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
++ */
++static nir_ssa_def *
++build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
++{
++ nir_ssa_def *accum = xs[0];
++
++ for (int i = 1; i < terms; i++)
++ accum = nir_fadd(b, accum, xs[i]);
++
++ return accum;
++}
++
++static nir_ssa_def *
++build_atan(nir_builder *b, nir_ssa_def *y_over_x)
++{
++ nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
++ nir_ssa_def *one = nir_imm_float(b, 1.0f);
++
++ /*
++ * range-reduction, first step:
++ *
++ * / y_over_x if |y_over_x| <= 1.0;
++ * x = <
++ * \ 1.0 / y_over_x otherwise
++ */
++ nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
++ nir_fmax(b, abs_y_over_x, one));
++
++ /*
++ * approximate atan by evaluating polynomial:
++ *
++ * x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
++ * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
++ * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
++ */
++ nir_ssa_def *x_2 = nir_fmul(b, x, x);
++ nir_ssa_def *x_3 = nir_fmul(b, x_2, x);
++ nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2);
++ nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2);
++ nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2);
++ nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
++
++ nir_ssa_def *polynomial_terms[] = {
++ nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)),
++ nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)),
++ nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)),
++ nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)),
++ nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)),
++ nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
++ };
++
++ nir_ssa_def *tmp =
++ build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
++
++ /* range-reduction fixup */
++ tmp = nir_fadd(b, tmp,
++ nir_fmul(b,
++ nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
++ nir_fadd(b, nir_fmul(b, tmp,
++ nir_imm_float(b, -2.0f)),
++ nir_imm_float(b, M_PI_2f))));
++
++ /* sign fixup */
++ return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
++}
++
++static nir_ssa_def *
++build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
++{
++ nir_ssa_def *zero = nir_imm_float(b, 0.0f);
++
++ /* If |x| >= 1.0e-8 * |y|: */
++ nir_ssa_def *condition =
++ nir_fge(b, nir_fabs(b, x),
++ nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
++
++ /* Then...call atan(y/x) and fix it up: */
++ nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
++ nir_ssa_def *r_then =
++ nir_bcsel(b, nir_flt(b, x, zero),
++ nir_fadd(b, atan1,
++ nir_bcsel(b, nir_fge(b, y, zero),
++ nir_imm_float(b, M_PIf),
++ nir_imm_float(b, -M_PIf))),
++ atan1);
++
++ /* Else... */
++ nir_ssa_def *r_else =
++ nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
++
++ return nir_bcsel(b, condition, r_then, r_else);
++}
++
++static nir_ssa_def *
++build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
++{
++ nir_ssa_def *abs_x = nir_fabs(b, x);
++ nir_ssa_def *zero = nir_imm_float(b, 0.0f);
++
++ /* Single-precision floating-point values are stored as
++ * 1 sign bit;
++ * 8 exponent bits;
++ * 23 mantissa bits.
++ *
++ * An exponent shift of 23 will shift the mantissa out, leaving only the
++ * exponent and sign bit (which itself may be zero, if the absolute value
++ * was taken before the bitcast and shift.
++ */
++ nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
++ nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
++
++ nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
++
++ /* Exponent of floating-point values in the range [0.5, 1.0). */
++ nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
++
++ nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
++
++ *exponent =
++ nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
++ nir_bcsel(b, is_not_zero, exponent_bias, zero));
++
++ return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
++ nir_bcsel(b, is_not_zero, exponent_value, zero));
++}
++
++static void
++handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
++ const uint32_t *w, unsigned count)
++{
++ struct nir_builder *nb = &b->nb;
++ const struct glsl_type *dest_type =
++ vtn_value(b, w[1], vtn_value_type_type)->type->type;
++
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ val->ssa = vtn_create_ssa_value(b, dest_type);
++
++ /* Collect the various SSA sources */
++ unsigned num_inputs = count - 5;
++ nir_ssa_def *src[3];
++ for (unsigned i = 0; i < num_inputs; i++)
++ src[i] = vtn_ssa_value(b, w[i + 5])->def;
++
++ nir_op op;
++ switch (entrypoint) {
++ case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */
++ case GLSLstd450RoundEven: op = nir_op_fround_even; break;
++ case GLSLstd450Trunc: op = nir_op_ftrunc; break;
++ case GLSLstd450FAbs: op = nir_op_fabs; break;
++ case GLSLstd450SAbs: op = nir_op_iabs; break;
++ case GLSLstd450FSign: op = nir_op_fsign; break;
++ case GLSLstd450SSign: op = nir_op_isign; break;
++ case GLSLstd450Floor: op = nir_op_ffloor; break;
++ case GLSLstd450Ceil: op = nir_op_fceil; break;
++ case GLSLstd450Fract: op = nir_op_ffract; break;
++ case GLSLstd450Radians:
++ val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
++ return;
++ case GLSLstd450Degrees:
++ val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
++ return;
++ case GLSLstd450Sin: op = nir_op_fsin; break;
++ case GLSLstd450Cos: op = nir_op_fcos; break;
++ case GLSLstd450Tan:
++ val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
++ nir_fcos(nb, src[0]));
++ return;
++ case GLSLstd450Pow: op = nir_op_fpow; break;
++ case GLSLstd450Exp2: op = nir_op_fexp2; break;
++ case GLSLstd450Log2: op = nir_op_flog2; break;
++ case GLSLstd450Sqrt: op = nir_op_fsqrt; break;
++ case GLSLstd450InverseSqrt: op = nir_op_frsq; break;
++
++ case GLSLstd450Modf: {
++ nir_ssa_def *sign = nir_fsign(nb, src[0]);
++ nir_ssa_def *abs = nir_fabs(nb, src[0]);
++ val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
++ nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
++ nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
++ return;
++ }
++
++ case GLSLstd450ModfStruct: {
++ nir_ssa_def *sign = nir_fsign(nb, src[0]);
++ nir_ssa_def *abs = nir_fabs(nb, src[0]);
++ assert(glsl_type_is_struct(val->ssa->type));
++ val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
++ val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
++ return;
++ }
++
++ case GLSLstd450FMin: op = nir_op_fmin; break;
++ case GLSLstd450UMin: op = nir_op_umin; break;
++ case GLSLstd450SMin: op = nir_op_imin; break;
++ case GLSLstd450FMax: op = nir_op_fmax; break;
++ case GLSLstd450UMax: op = nir_op_umax; break;
++ case GLSLstd450SMax: op = nir_op_imax; break;
++ case GLSLstd450FMix: op = nir_op_flrp; break;
++ case GLSLstd450Step:
++ val->ssa->def = nir_sge(nb, src[1], src[0]);
++ return;
++
++ case GLSLstd450Fma: op = nir_op_ffma; break;
++ case GLSLstd450Ldexp: op = nir_op_ldexp; break;
++
++ /* Packing/Unpacking functions */
++ case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break;
++ case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break;
++ case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break;
++ case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break;
++ case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break;
++ case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break;
++ case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break;
++ case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break;
++ case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break;
++ case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break;
++
++ case GLSLstd450Length:
++ val->ssa->def = build_length(nb, src[0]);
++ return;
++ case GLSLstd450Distance:
++ val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
++ return;
++ case GLSLstd450Normalize:
++ val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
++ return;
++
++ case GLSLstd450Exp:
++ val->ssa->def = build_exp(nb, src[0]);
++ return;
++
++ case GLSLstd450Log:
++ val->ssa->def = build_log(nb, src[0]);
++ return;
++
++ case GLSLstd450FClamp:
++ val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
++ return;
++ case GLSLstd450UClamp:
++ val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
++ return;
++ case GLSLstd450SClamp:
++ val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
++ return;
++
++ case GLSLstd450Cross: {
++ unsigned yzx[4] = { 1, 2, 0, 0 };
++ unsigned zxy[4] = { 2, 0, 1, 0 };
++ val->ssa->def =
++ nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
++ nir_swizzle(nb, src[1], zxy, 3, true)),
++ nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
++ nir_swizzle(nb, src[1], yzx, 3, true)));
++ return;
++ }
++
++ case GLSLstd450SmoothStep: {
++ /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
++ nir_ssa_def *t =
++ build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
++ nir_fsub(nb, src[1], src[0])),
++ nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
++ /* result = t * t * (3 - 2 * t) */
++ val->ssa->def =
++ nir_fmul(nb, t, nir_fmul(nb, t,
++ nir_fsub(nb, nir_imm_float(nb, 3.0),
++ nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
++ return;
++ }
++
++ case GLSLstd450FaceForward:
++ val->ssa->def =
++ nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
++ nir_imm_float(nb, 0.0)),
++ src[0], nir_fneg(nb, src[0]));
++ return;
++
++ case GLSLstd450Reflect:
++ /* I - 2 * dot(N, I) * N */
++ val->ssa->def =
++ nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
++ nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
++ src[1])));
++ return;
++
++ case GLSLstd450Refract: {
++ nir_ssa_def *I = src[0];
++ nir_ssa_def *N = src[1];
++ nir_ssa_def *eta = src[2];
++ nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
++ nir_ssa_def *one = nir_imm_float(nb, 1.0);
++ nir_ssa_def *zero = nir_imm_float(nb, 0.0);
++ /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
++ nir_ssa_def *k =
++ nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
++ nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
++ nir_ssa_def *result =
++ nir_fsub(nb, nir_fmul(nb, eta, I),
++ nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
++ nir_fsqrt(nb, k)), N));
++ /* XXX: bcsel, or if statement? */
++ val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
++ return;
++ }
++
++ case GLSLstd450Sinh:
++ /* 0.5 * (e^x - e^(-x)) */
++ val->ssa->def =
++ nir_fmul(nb, nir_imm_float(nb, 0.5f),
++ nir_fsub(nb, build_exp(nb, src[0]),
++ build_exp(nb, nir_fneg(nb, src[0]))));
++ return;
++
++ case GLSLstd450Cosh:
++ /* 0.5 * (e^x + e^(-x)) */
++ val->ssa->def =
++ nir_fmul(nb, nir_imm_float(nb, 0.5f),
++ nir_fadd(nb, build_exp(nb, src[0]),
++ build_exp(nb, nir_fneg(nb, src[0]))));
++ return;
++
++ case GLSLstd450Tanh:
++ /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
++ val->ssa->def =
++ nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
++ nir_fsub(nb, build_exp(nb, src[0]),
++ build_exp(nb, nir_fneg(nb, src[0])))),
++ nir_fmul(nb, nir_imm_float(nb, 0.5f),
++ nir_fadd(nb, build_exp(nb, src[0]),
++ build_exp(nb, nir_fneg(nb, src[0])))));
++ return;
++
++ case GLSLstd450Asinh:
++ val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
++ build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
++ nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
++ nir_imm_float(nb, 1.0f))))));
++ return;
++ case GLSLstd450Acosh:
++ val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
++ nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
++ nir_imm_float(nb, 1.0f)))));
++ return;
++ case GLSLstd450Atanh: {
++ nir_ssa_def *one = nir_imm_float(nb, 1.0);
++ val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
++ build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
++ nir_fsub(nb, one, src[0]))));
++ return;
++ }
++
++ case GLSLstd450FindILsb: op = nir_op_find_lsb; break;
++ case GLSLstd450FindSMsb: op = nir_op_ifind_msb; break;
++ case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break;
++
++ case GLSLstd450Asin:
++ val->ssa->def = build_asin(nb, src[0]);
++ return;
++
++ case GLSLstd450Acos:
++ val->ssa->def = build_acos(nb, src[0]);
++ return;
++
++ case GLSLstd450Atan:
++ val->ssa->def = build_atan(nb, src[0]);
++ return;
++
++ case GLSLstd450Atan2:
++ val->ssa->def = build_atan2(nb, src[0], src[1]);
++ return;
++
++ case GLSLstd450Frexp: {
++ nir_ssa_def *exponent;
++ val->ssa->def = build_frexp(nb, src[0], &exponent);
++ nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
++ return;
++ }
++
++ case GLSLstd450FrexpStruct: {
++ assert(glsl_type_is_struct(val->ssa->type));
++ val->ssa->elems[0]->def = build_frexp(nb, src[0],
++ &val->ssa->elems[1]->def);
++ return;
++ }
++
++ case GLSLstd450PackDouble2x32:
++ case GLSLstd450UnpackDouble2x32:
++ default:
++ unreachable("Unhandled opcode");
++ }
++
++ nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
++ nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
++ glsl_get_vector_elements(val->ssa->type), val->name);
++ instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
++ val->ssa->def = &instr->dest.dest.ssa;
++
++ for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
++ instr->src[i].src = nir_src_for_ssa(src[i]);
++
++ nir_builder_instr_insert(nb, &instr->instr);
++}
++
++bool
++vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
++ const uint32_t *w, unsigned count)
++{
++ switch ((enum GLSLstd450)ext_opcode) {
++ case GLSLstd450Determinant: {
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ val->ssa = rzalloc(b, struct vtn_ssa_value);
++ val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
++ val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
++ break;
++ }
++
++ case GLSLstd450MatrixInverse: {
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
++ break;
++ }
++
++ case GLSLstd450InterpolateAtCentroid:
++ case GLSLstd450InterpolateAtSample:
++ case GLSLstd450InterpolateAtOffset:
++ unreachable("Unhandled opcode");
++
++ default:
++ handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
++ }
++
++ return true;
++}
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ * Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "nir/nir.h"
++#include "nir/nir_builder.h"
++#include "nir/nir_array.h"
++#include "nir_spirv.h"
++#include "spirv.h"
++
++struct vtn_builder;
++struct vtn_decoration;
++
++enum vtn_value_type {
++ vtn_value_type_invalid = 0,
++ vtn_value_type_undef,
++ vtn_value_type_string,
++ vtn_value_type_decoration_group,
++ vtn_value_type_type,
++ vtn_value_type_constant,
++ vtn_value_type_access_chain,
++ vtn_value_type_function,
++ vtn_value_type_block,
++ vtn_value_type_ssa,
++ vtn_value_type_extension,
++ vtn_value_type_image_pointer,
++ vtn_value_type_sampled_image,
++};
++
++enum vtn_branch_type {
++ vtn_branch_type_none,
++ vtn_branch_type_switch_break,
++ vtn_branch_type_switch_fallthrough,
++ vtn_branch_type_loop_break,
++ vtn_branch_type_loop_continue,
++ vtn_branch_type_discard,
++ vtn_branch_type_return,
++};
++
++enum vtn_cf_node_type {
++ vtn_cf_node_type_block,
++ vtn_cf_node_type_if,
++ vtn_cf_node_type_loop,
++ vtn_cf_node_type_switch,
++};
++
++struct vtn_cf_node {
++ struct list_head link;
++ enum vtn_cf_node_type type;
++};
++
++struct vtn_loop {
++ struct vtn_cf_node node;
++
++ /* The main body of the loop */
++ struct list_head body;
++
++ /* The "continue" part of the loop. This gets executed after the body
++ * and is where you go when you hit a continue.
++ */
++ struct list_head cont_body;
++
++ SpvLoopControlMask control;
++};
++
++struct vtn_if {
++ struct vtn_cf_node node;
++
++ uint32_t condition;
++
++ enum vtn_branch_type then_type;
++ struct list_head then_body;
++
++ enum vtn_branch_type else_type;
++ struct list_head else_body;
++
++ SpvSelectionControlMask control;
++};
++
++struct vtn_case {
++ struct list_head link;
++
++ struct list_head body;
++
++ /* The block that starts this case */
++ struct vtn_block *start_block;
++
++ /* The fallthrough case, if any */
++ struct vtn_case *fallthrough;
++
++ /* The uint32_t values that map to this case */
++ nir_array values;
++
++ /* True if this is the default case */
++ bool is_default;
++
++ /* Initialized to false; used when sorting the list of cases */
++ bool visited;
++};
++
++struct vtn_switch {
++ struct vtn_cf_node node;
++
++ uint32_t selector;
++
++ struct list_head cases;
++};
++
++struct vtn_block {
++ struct vtn_cf_node node;
++
++ /** A pointer to the label instruction */
++ const uint32_t *label;
++
++ /** A pointer to the merge instruction (or NULL if non exists) */
++ const uint32_t *merge;
++
++ /** A pointer to the branch instruction that ends this block */
++ const uint32_t *branch;
++
++ enum vtn_branch_type branch_type;
++
++ /** Points to the loop that this block starts (if it starts a loop) */
++ struct vtn_loop *loop;
++
++ /** Points to the switch case started by this block (if any) */
++ struct vtn_case *switch_case;
++
++ /** The last block in this SPIR-V block. */
++ nir_block *end_block;
++};
++
++struct vtn_function {
++ struct exec_node node;
++
++ nir_function_impl *impl;
++ struct vtn_block *start_block;
++
++ struct list_head body;
++
++ const uint32_t *end;
++
++ SpvFunctionControlMask control;
++};
++
++typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t,
++ const uint32_t *, unsigned);
++
++void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words,
++ const uint32_t *end);
++void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
++ vtn_instruction_handler instruction_handler);
++
++const uint32_t *
++vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
++ const uint32_t *end, vtn_instruction_handler handler);
++
++struct vtn_ssa_value {
++ union {
++ nir_ssa_def *def;
++ struct vtn_ssa_value **elems;
++ };
++
++ /* For matrices, if this is non-NULL, then this value is actually the
++ * transpose of some other value. The value that `transposed` points to
++ * always dominates this value.
++ */
++ struct vtn_ssa_value *transposed;
++
++ const struct glsl_type *type;
++};
++
++struct vtn_type {
++ const struct glsl_type *type;
++
++ /* The value that declares this type. Used for finding decorations */
++ struct vtn_value *val;
++
++ /* for matrices, whether the matrix is stored row-major */
++ bool row_major;
++
++ /* for structs, the offset of each member */
++ unsigned *offsets;
++
++ /* for structs, whether it was decorated as a "non-SSBO-like" block */
++ bool block;
++
++ /* for structs, whether it was decorated as an "SSBO-like" block */
++ bool buffer_block;
++
++ /* for structs with block == true, whether this is a builtin block (i.e. a
++ * block that contains only builtins).
++ */
++ bool builtin_block;
++
++ /* Image format for image_load_store type images */
++ unsigned image_format;
++
++ /* Access qualifier for storage images */
++ SpvAccessQualifier access_qualifier;
++
++ /* for arrays and matrices, the array stride */
++ unsigned stride;
++
++ /* for arrays, the vtn_type for the elements of the array */
++ struct vtn_type *array_element;
++
++ /* for structures, the vtn_type for each member */
++ struct vtn_type **members;
++
++ /* Whether this type, or a parent type, has been decorated as a builtin */
++ bool is_builtin;
++
++ SpvBuiltIn builtin;
++};
++
++struct vtn_variable;
++
++enum vtn_access_mode {
++ vtn_access_mode_id,
++ vtn_access_mode_literal,
++};
++
++struct vtn_access_link {
++ enum vtn_access_mode mode;
++ uint32_t id;
++};
++
++struct vtn_access_chain {
++ struct vtn_variable *var;
++
++ uint32_t length;
++
++ /* Struct elements and array offsets */
++ struct vtn_access_link link[0];
++};
++
++enum vtn_variable_mode {
++ vtn_variable_mode_local,
++ vtn_variable_mode_global,
++ vtn_variable_mode_param,
++ vtn_variable_mode_ubo,
++ vtn_variable_mode_ssbo,
++ vtn_variable_mode_push_constant,
++ vtn_variable_mode_image,
++ vtn_variable_mode_sampler,
++ vtn_variable_mode_workgroup,
++ vtn_variable_mode_input,
++ vtn_variable_mode_output,
++};
++
++struct vtn_variable {
++ enum vtn_variable_mode mode;
++
++ struct vtn_type *type;
++
++ unsigned descriptor_set;
++ unsigned binding;
++
++ nir_variable *var;
++ nir_variable **members;
++
++ struct vtn_access_chain chain;
++};
++
++struct vtn_image_pointer {
++ struct vtn_access_chain *image;
++ nir_ssa_def *coord;
++ nir_ssa_def *sample;
++};
++
++struct vtn_sampled_image {
++ struct vtn_access_chain *image; /* Image or array of images */
++ struct vtn_access_chain *sampler; /* Sampler */
++};
++
++struct vtn_value {
++ enum vtn_value_type value_type;
++ const char *name;
++ struct vtn_decoration *decoration;
++ union {
++ void *ptr;
++ char *str;
++ struct vtn_type *type;
++ struct {
++ nir_constant *constant;
++ const struct glsl_type *const_type;
++ };
++ struct vtn_access_chain *access_chain;
++ struct vtn_image_pointer *image;
++ struct vtn_sampled_image *sampled_image;
++ struct vtn_function *func;
++ struct vtn_block *block;
++ struct vtn_ssa_value *ssa;
++ vtn_instruction_handler ext_handler;
++ };
++};
++
++#define VTN_DEC_DECORATION -1
++#define VTN_DEC_EXECUTION_MODE -2
++#define VTN_DEC_STRUCT_MEMBER0 0
++
++struct vtn_decoration {
++ struct vtn_decoration *next;
++
++ /* Specifies how to apply this decoration. Negative values represent a
++ * decoration or execution mode. (See the VTN_DEC_ #defines above.)
++ * Non-negative values specify that it applies to a structure member.
++ */
++ int scope;
++
++ const uint32_t *literals;
++ struct vtn_value *group;
++
++ union {
++ SpvDecoration decoration;
++ SpvExecutionMode exec_mode;
++ };
++};
++
++struct vtn_builder {
++ nir_builder nb;
++
++ nir_shader *shader;
++ nir_function_impl *impl;
++ struct vtn_block *block;
++
++ /* Current file, line, and column. Useful for debugging. Set
++ * automatically by vtn_foreach_instruction.
++ */
++ char *file;
++ int line, col;
++
++ /*
++ * In SPIR-V, constants are global, whereas in NIR, the load_const
++ * instruction we use is per-function. So while we parse each function, we
++ * keep a hash table of constants we've resolved to nir_ssa_value's so
++ * far, and we lazily resolve them when we see them used in a function.
++ */
++ struct hash_table *const_table;
++
++ /*
++ * Map from phi instructions (pointer to the start of the instruction)
++ * to the variable corresponding to it.
++ */
++ struct hash_table *phi_table;
++
++ unsigned num_specializations;
++ struct nir_spirv_specialization *specializations;
++
++ unsigned value_id_bound;
++ struct vtn_value *values;
++
++ gl_shader_stage entry_point_stage;
++ const char *entry_point_name;
++ struct vtn_value *entry_point;
++ bool origin_upper_left;
++
++ struct vtn_function *func;
++ struct exec_list functions;
++
++ /* Current function parameter index */
++ unsigned func_param_idx;
++
++ bool has_loop_continue;
++};
++
++static inline struct vtn_value *
++vtn_push_value(struct vtn_builder *b, uint32_t value_id,
++ enum vtn_value_type value_type)
++{
++ assert(value_id < b->value_id_bound);
++ assert(b->values[value_id].value_type == vtn_value_type_invalid);
++
++ b->values[value_id].value_type = value_type;
++
++ return &b->values[value_id];
++}
++
++static inline struct vtn_value *
++vtn_untyped_value(struct vtn_builder *b, uint32_t value_id)
++{
++ assert(value_id < b->value_id_bound);
++ return &b->values[value_id];
++}
++
++static inline struct vtn_value *
++vtn_value(struct vtn_builder *b, uint32_t value_id,
++ enum vtn_value_type value_type)
++{
++ struct vtn_value *val = vtn_untyped_value(b, value_id);
++ assert(val->value_type == value_type);
++ return val;
++}
++
++struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
++
++struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b,
++ const struct glsl_type *type);
++
++struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b,
++ struct vtn_ssa_value *src);
++
++nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src,
++ unsigned index);
++nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++ nir_ssa_def *index);
++nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src,
++ nir_ssa_def *insert, unsigned index);
++nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++ nir_ssa_def *insert, nir_ssa_def *index);
++
++nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id);
++
++nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b,
++ struct vtn_access_chain *chain);
++nir_ssa_def *
++vtn_access_chain_to_offset(struct vtn_builder *b,
++ struct vtn_access_chain *chain,
++ nir_ssa_def **index_out, struct vtn_type **type_out,
++ unsigned *end_idx_out, bool stop_at_matrix);
++
++struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src);
++
++void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++ nir_deref_var *dest);
++
++struct vtn_ssa_value *
++vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src);
++
++void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++ struct vtn_access_chain *dest);
++
++void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count);
++
++
++typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *,
++ struct vtn_value *,
++ int member,
++ const struct vtn_decoration *,
++ void *);
++
++void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
++ vtn_decoration_foreach_cb cb, void *data);
++
++typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *,
++ struct vtn_value *,
++ const struct vtn_decoration *,
++ void *);
++
++void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
++ vtn_execution_mode_foreach_cb cb, void *data);
++
++nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap);
++
++void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count);
++
++bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
++ const uint32_t *words, unsigned count);
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ * Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "vtn_private.h"
++
++static struct vtn_access_chain *
++vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old,
++ unsigned new_ids)
++{
++ struct vtn_access_chain *chain;
++
++ unsigned new_len = old->length + new_ids;
++ chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0]));
++
++ chain->var = old->var;
++ chain->length = new_len;
++
++ for (unsigned i = 0; i < old->length; i++)
++ chain->link[i] = old->link[i];
++
++ return chain;
++}
++
++static nir_ssa_def *
++vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
++ unsigned stride)
++{
++ assert(stride > 0);
++ if (link.mode == vtn_access_mode_literal) {
++ return nir_imm_int(&b->nb, link.id * stride);
++ } else if (stride == 1) {
++ return vtn_ssa_value(b, link.id)->def;
++ } else {
++ return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def,
++ nir_imm_int(&b->nb, stride));
++ }
++}
++
++static struct vtn_type *
++vtn_access_chain_tail_type(struct vtn_builder *b,
++ struct vtn_access_chain *chain)
++{
++ struct vtn_type *type = chain->var->type;
++ for (unsigned i = 0; i < chain->length; i++) {
++ if (glsl_type_is_struct(type->type)) {
++ assert(chain->link[i].mode == vtn_access_mode_literal);
++ type = type->members[chain->link[i].id];
++ } else {
++ type = type->array_element;
++ }
++ }
++ return type;
++}
++
++/* Crawls a chain of array derefs and rewrites the types so that the
++ * lengths stay the same but the terminal type is the one given by
++ * tail_type. This is useful for split structures.
++ */
++static void
++rewrite_deref_types(nir_deref *deref, const struct glsl_type *type)
++{
++ deref->type = type;
++ if (deref->child) {
++ assert(deref->child->deref_type == nir_deref_type_array);
++ assert(glsl_type_is_array(deref->type));
++ rewrite_deref_types(deref->child, glsl_get_array_element(type));
++ }
++}
++
++nir_deref_var *
++vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain)
++{
++ nir_deref_var *deref_var;
++ if (chain->var->var) {
++ deref_var = nir_deref_var_create(b, chain->var->var);
++ } else {
++ assert(chain->var->members);
++ /* Create the deref_var manually. It will get filled out later. */
++ deref_var = rzalloc(b, nir_deref_var);
++ deref_var->deref.deref_type = nir_deref_type_var;
++ }
++
++ struct vtn_type *deref_type = chain->var->type;
++ nir_deref *tail = &deref_var->deref;
++ nir_variable **members = chain->var->members;
++
++ for (unsigned i = 0; i < chain->length; i++) {
++ enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
++ switch (base_type) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_DOUBLE:
++ case GLSL_TYPE_BOOL:
++ case GLSL_TYPE_ARRAY: {
++ deref_type = deref_type->array_element;
++
++ nir_deref_array *deref_arr = nir_deref_array_create(b);
++ deref_arr->deref.type = deref_type->type;
++
++ if (chain->link[i].mode == vtn_access_mode_literal) {
++ deref_arr->deref_array_type = nir_deref_array_type_direct;
++ deref_arr->base_offset = chain->link[i].id;
++ } else {
++ assert(chain->link[i].mode == vtn_access_mode_id);
++ deref_arr->deref_array_type = nir_deref_array_type_indirect;
++ deref_arr->base_offset = 0;
++ deref_arr->indirect =
++ nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def);
++ }
++ tail->child = &deref_arr->deref;
++ tail = tail->child;
++ break;
++ }
++
++ case GLSL_TYPE_STRUCT: {
++ assert(chain->link[i].mode == vtn_access_mode_literal);
++ unsigned idx = chain->link[i].id;
++ deref_type = deref_type->members[idx];
++ if (members) {
++ /* This is a pre-split structure. */
++ deref_var->var = members[idx];
++ rewrite_deref_types(&deref_var->deref, members[idx]->type);
++ assert(tail->type == deref_type->type);
++ members = NULL;
++ } else {
++ nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx);
++ deref_struct->deref.type = deref_type->type;
++ tail->child = &deref_struct->deref;
++ tail = tail->child;
++ }
++ break;
++ }
++ default:
++ unreachable("Invalid type for deref");
++ }
++ }
++
++ assert(members == NULL);
++ return deref_var;
++}
++
++static void
++_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
++ nir_deref *tail, struct vtn_ssa_value *inout)
++{
++ /* The deref tail may contain a deref to select a component of a vector (in
++ * other words, it might not be an actual tail) so we have to save it away
++ * here since we overwrite it later.
++ */
++ nir_deref *old_child = tail->child;
++
++ if (glsl_type_is_vector_or_scalar(tail->type)) {
++ /* Terminate the deref chain in case there is one more link to pick
++ * off a component of the vector.
++ */
++ tail->child = NULL;
++
++ nir_intrinsic_op op = load ? nir_intrinsic_load_var :
++ nir_intrinsic_store_var;
++
++ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
++ intrin->variables[0] =
++ nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));
++ intrin->num_components = glsl_get_vector_elements(tail->type);
++
++ if (load) {
++ nir_ssa_dest_init(&intrin->instr, &intrin->dest,
++ intrin->num_components, NULL);
++ inout->def = &intrin->dest.ssa;
++ } else {
++ intrin->const_index[0] = (1 << intrin->num_components) - 1;
++ intrin->src[0] = nir_src_for_ssa(inout->def);
++ }
++
++ nir_builder_instr_insert(&b->nb, &intrin->instr);
++ } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY ||
++ glsl_type_is_matrix(tail->type)) {
++ unsigned elems = glsl_get_length(tail->type);
++ nir_deref_array *deref_arr = nir_deref_array_create(b);
++ deref_arr->deref_array_type = nir_deref_array_type_direct;
++ deref_arr->deref.type = glsl_get_array_element(tail->type);
++ tail->child = &deref_arr->deref;
++ for (unsigned i = 0; i < elems; i++) {
++ deref_arr->base_offset = i;
++ _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
++ }
++ } else {
++ assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT);
++ unsigned elems = glsl_get_length(tail->type);
++ nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0);
++ tail->child = &deref_struct->deref;
++ for (unsigned i = 0; i < elems; i++) {
++ deref_struct->index = i;
++ deref_struct->deref.type = glsl_get_struct_field(tail->type, i);
++ _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
++ }
++ }
++
++ tail->child = old_child;
++}
++
++nir_deref_var *
++vtn_nir_deref(struct vtn_builder *b, uint32_t id)
++{
++ struct vtn_access_chain *chain =
++ vtn_value(b, id, vtn_value_type_access_chain)->access_chain;
++
++ return vtn_access_chain_to_deref(b, chain);
++}
++
++/*
++ * Gets the NIR-level deref tail, which may have as a child an array deref
++ * selecting which component due to OpAccessChain supporting per-component
++ * indexing in SPIR-V.
++ */
++static nir_deref *
++get_deref_tail(nir_deref_var *deref)
++{
++ nir_deref *cur = &deref->deref;
++ while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child)
++ cur = cur->child;
++
++ return cur;
++}
++
++struct vtn_ssa_value *
++vtn_local_load(struct vtn_builder *b, nir_deref_var *src)
++{
++ nir_deref *src_tail = get_deref_tail(src);
++ struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type);
++ _vtn_local_load_store(b, true, src, src_tail, val);
++
++ if (src_tail->child) {
++ nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child);
++ assert(vec_deref->deref.child == NULL);
++ val->type = vec_deref->deref.type;
++ if (vec_deref->deref_array_type == nir_deref_array_type_direct)
++ val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset);
++ else
++ val->def = vtn_vector_extract_dynamic(b, val->def,
++ vec_deref->indirect.ssa);
++ }
++
++ return val;
++}
++
++void
++vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++ nir_deref_var *dest)
++{
++ nir_deref *dest_tail = get_deref_tail(dest);
++
++ if (dest_tail->child) {
++ struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type);
++ _vtn_local_load_store(b, true, dest, dest_tail, val);
++ nir_deref_array *deref = nir_deref_as_array(dest_tail->child);
++ assert(deref->deref.child == NULL);
++ if (deref->deref_array_type == nir_deref_array_type_direct)
++ val->def = vtn_vector_insert(b, val->def, src->def,
++ deref->base_offset);
++ else
++ val->def = vtn_vector_insert_dynamic(b, val->def, src->def,
++ deref->indirect.ssa);
++ _vtn_local_load_store(b, false, dest, dest_tail, val);
++ } else {
++ _vtn_local_load_store(b, false, dest, dest_tail, src);
++ }
++}
++
++static nir_ssa_def *
++get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
++ struct vtn_type **type, unsigned *chain_idx)
++{
++ /* Push constants have no explicit binding */
++ if (chain->var->mode == vtn_variable_mode_push_constant) {
++ *chain_idx = 0;
++ *type = chain->var->type;
++ return NULL;
++ }
++
++ nir_ssa_def *array_index;
++ if (glsl_type_is_array(chain->var->type->type)) {
++ assert(chain->length > 0);
++ array_index = vtn_access_link_as_ssa(b, chain->link[0], 1);
++ *chain_idx = 1;
++ *type = chain->var->type->array_element;
++ } else {
++ array_index = nir_imm_int(&b->nb, 0);
++ *chain_idx = 0;
++ *type = chain->var->type;
++ }
++
++ nir_intrinsic_instr *instr =
++ nir_intrinsic_instr_create(b->nb.shader,
++ nir_intrinsic_vulkan_resource_index);
++ instr->src[0] = nir_src_for_ssa(array_index);
++ instr->const_index[0] = chain->var->descriptor_set;
++ instr->const_index[1] = chain->var->binding;
++
++ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++ nir_builder_instr_insert(&b->nb, &instr->instr);
++
++ return &instr->dest.ssa;
++}
++
++nir_ssa_def *
++vtn_access_chain_to_offset(struct vtn_builder *b,
++ struct vtn_access_chain *chain,
++ nir_ssa_def **index_out, struct vtn_type **type_out,
++ unsigned *end_idx_out, bool stop_at_matrix)
++{
++ unsigned idx = 0;
++ struct vtn_type *type;
++ *index_out = get_vulkan_resource_index(b, chain, &type, &idx);
++
++ nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
++ for (; idx < chain->length; idx++) {
++ enum glsl_base_type base_type = glsl_get_base_type(type->type);
++ switch (base_type) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_DOUBLE:
++ case GLSL_TYPE_BOOL:
++ /* Some users may not want matrix or vector derefs */
++ if (stop_at_matrix)
++ goto end;
++ /* Fall through */
++
++ case GLSL_TYPE_ARRAY:
++ offset = nir_iadd(&b->nb, offset,
++ vtn_access_link_as_ssa(b, chain->link[idx],
++ type->stride));
++
++ type = type->array_element;
++ break;
++
++ case GLSL_TYPE_STRUCT: {
++ assert(chain->link[idx].mode == vtn_access_mode_literal);
++ unsigned member = chain->link[idx].id;
++ offset = nir_iadd(&b->nb, offset,
++ nir_imm_int(&b->nb, type->offsets[member]));
++ type = type->members[member];
++ break;
++ }
++
++ default:
++ unreachable("Invalid type for deref");
++ }
++ }
++
++end:
++ *type_out = type;
++ if (end_idx_out)
++ *end_idx_out = idx;
++
++ return offset;
++}
++
++static void
++_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
++ nir_ssa_def *index, nir_ssa_def *offset,
++ struct vtn_ssa_value **inout, const struct glsl_type *type)
++{
++ nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
++ instr->num_components = glsl_get_vector_elements(type);
++
++ int src = 0;
++ if (!load) {
++ instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */
++ instr->src[src++] = nir_src_for_ssa((*inout)->def);
++ }
++
++ /* We set the base and size for push constant load to the entire push
++ * constant block for now.
++ */
++ if (op == nir_intrinsic_load_push_constant) {
++ instr->const_index[0] = 0;
++ instr->const_index[1] = 128;
++ }
++
++ if (index)
++ instr->src[src++] = nir_src_for_ssa(index);
++
++ instr->src[src++] = nir_src_for_ssa(offset);
++
++ if (load) {
++ nir_ssa_dest_init(&instr->instr, &instr->dest,
++ instr->num_components, NULL);
++ (*inout)->def = &instr->dest.ssa;
++ }
++
++ nir_builder_instr_insert(&b->nb, &instr->instr);
++
++ if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
++ (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
++}
++
++static void
++_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
++ nir_ssa_def *index, nir_ssa_def *offset,
++ struct vtn_access_chain *chain, unsigned chain_idx,
++ struct vtn_type *type, struct vtn_ssa_value **inout)
++{
++ if (chain && chain_idx >= chain->length)
++ chain = NULL;
++
++ if (load && chain == NULL && *inout == NULL)
++ *inout = vtn_create_ssa_value(b, type->type);
++
++ enum glsl_base_type base_type = glsl_get_base_type(type->type);
++ switch (base_type) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_BOOL:
++ /* This is where things get interesting. At this point, we've hit
++ * a vector, a scalar, or a matrix.
++ */
++ if (glsl_type_is_matrix(type->type)) {
++ if (chain == NULL) {
++ /* Loading the whole matrix */
++ struct vtn_ssa_value *transpose;
++ unsigned num_ops, vec_width;
++ if (type->row_major) {
++ num_ops = glsl_get_vector_elements(type->type);
++ vec_width = glsl_get_matrix_columns(type->type);
++ if (load) {
++ const struct glsl_type *transpose_type =
++ glsl_matrix_type(base_type, vec_width, num_ops);
++ *inout = vtn_create_ssa_value(b, transpose_type);
++ } else {
++ transpose = vtn_ssa_transpose(b, *inout);
++ inout = &transpose;
++ }
++ } else {
++ num_ops = glsl_get_matrix_columns(type->type);
++ vec_width = glsl_get_vector_elements(type->type);
++ }
++
++ for (unsigned i = 0; i < num_ops; i++) {
++ nir_ssa_def *elem_offset =
++ nir_iadd(&b->nb, offset,
++ nir_imm_int(&b->nb, i * type->stride));
++ _vtn_load_store_tail(b, op, load, index, elem_offset,
++ &(*inout)->elems[i],
++ glsl_vector_type(base_type, vec_width));
++ }
++
++ if (load && type->row_major)
++ *inout = vtn_ssa_transpose(b, *inout);
++ } else if (type->row_major) {
++ /* Row-major but with an access chiain. */
++ nir_ssa_def *col_offset =
++ vtn_access_link_as_ssa(b, chain->link[chain_idx],
++ type->array_element->stride);
++ offset = nir_iadd(&b->nb, offset, col_offset);
++
++ if (chain_idx + 1 < chain->length) {
++ /* Picking off a single element */
++ nir_ssa_def *row_offset =
++ vtn_access_link_as_ssa(b, chain->link[chain_idx + 1],
++ type->stride);
++ offset = nir_iadd(&b->nb, offset, row_offset);
++ if (load)
++ *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
++ _vtn_load_store_tail(b, op, load, index, offset, inout,
++ glsl_scalar_type(base_type));
++ } else {
++ /* Grabbing a column; picking one element off each row */
++ unsigned num_comps = glsl_get_vector_elements(type->type);
++ const struct glsl_type *column_type =
++ glsl_get_column_type(type->type);
++
++ nir_ssa_def *comps[4];
++ for (unsigned i = 0; i < num_comps; i++) {
++ nir_ssa_def *elem_offset =
++ nir_iadd(&b->nb, offset,
++ nir_imm_int(&b->nb, i * type->stride));
++
++ struct vtn_ssa_value *comp, temp_val;
++ if (!load) {
++ temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
++ temp_val.type = glsl_scalar_type(base_type);
++ }
++ comp = &temp_val;
++ _vtn_load_store_tail(b, op, load, index, elem_offset,
++ &comp, glsl_scalar_type(base_type));
++ comps[i] = comp->def;
++ }
++
++ if (load) {
++ if (*inout == NULL)
++ *inout = vtn_create_ssa_value(b, column_type);
++
++ (*inout)->def = nir_vec(&b->nb, comps, num_comps);
++ }
++ }
++ } else {
++ /* Column-major with a deref. Fall through to array case. */
++ nir_ssa_def *col_offset =
++ vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
++ offset = nir_iadd(&b->nb, offset, col_offset);
++
++ _vtn_block_load_store(b, op, load, index, offset,
++ chain, chain_idx + 1,
++ type->array_element, inout);
++ }
++ } else if (chain == NULL) {
++ /* Single whole vector */
++ assert(glsl_type_is_vector_or_scalar(type->type));
++ _vtn_load_store_tail(b, op, load, index, offset, inout, type->type);
++ } else {
++ /* Single component of a vector. Fall through to array case. */
++ nir_ssa_def *elem_offset =
++ vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
++ offset = nir_iadd(&b->nb, offset, elem_offset);
++
++ _vtn_block_load_store(b, op, load, index, offset, NULL, 0,
++ type->array_element, inout);
++ }
++ return;
++
++ case GLSL_TYPE_ARRAY: {
++ unsigned elems = glsl_get_length(type->type);
++ for (unsigned i = 0; i < elems; i++) {
++ nir_ssa_def *elem_off =
++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
++ _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
++ type->array_element, &(*inout)->elems[i]);
++ }
++ return;
++ }
++
++ case GLSL_TYPE_STRUCT: {
++ unsigned elems = glsl_get_length(type->type);
++ for (unsigned i = 0; i < elems; i++) {
++ nir_ssa_def *elem_off =
++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
++ _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
++ type->members[i], &(*inout)->elems[i]);
++ }
++ return;
++ }
++
++ default:
++ unreachable("Invalid block member type");
++ }
++}
++
++static struct vtn_ssa_value *
++vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src)
++{
++ nir_intrinsic_op op;
++ switch (src->var->mode) {
++ case vtn_variable_mode_ubo:
++ op = nir_intrinsic_load_ubo;
++ break;
++ case vtn_variable_mode_ssbo:
++ op = nir_intrinsic_load_ssbo;
++ break;
++ case vtn_variable_mode_push_constant:
++ op = nir_intrinsic_load_push_constant;
++ break;
++ default:
++ assert(!"Invalid block variable mode");
++ }
++
++ nir_ssa_def *offset, *index = NULL;
++ struct vtn_type *type;
++ unsigned chain_idx;
++ offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true);
++
++ struct vtn_ssa_value *value = NULL;
++ _vtn_block_load_store(b, op, true, index, offset,
++ src, chain_idx, type, &value);
++ return value;
++}
++
++static void
++vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++ struct vtn_access_chain *dst)
++{
++ nir_ssa_def *offset, *index = NULL;
++ struct vtn_type *type;
++ unsigned chain_idx;
++ offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true);
++
++ _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset,
++ dst, chain_idx, type, &src);
++}
++
++static bool
++vtn_variable_is_external_block(struct vtn_variable *var)
++{
++ return var->mode == vtn_variable_mode_ssbo ||
++ var->mode == vtn_variable_mode_ubo ||
++ var->mode == vtn_variable_mode_push_constant;
++}
++
++static void
++_vtn_variable_load_store(struct vtn_builder *b, bool load,
++ struct vtn_access_chain *chain,
++ struct vtn_type *tail_type,
++ struct vtn_ssa_value **inout)
++{
++ enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
++ switch (base_type) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_BOOL:
++ /* At this point, we have a scalar, vector, or matrix so we know that
++ * there cannot be any structure splitting still in the way. By
++ * stopping at the matrix level rather than the vector level, we
++ * ensure that matrices get loaded in the optimal way even if they
++ * are storred row-major in a UBO.
++ */
++ if (load) {
++ *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain));
++ } else {
++ vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain));
++ }
++ return;
++
++ case GLSL_TYPE_ARRAY:
++ case GLSL_TYPE_STRUCT: {
++ struct vtn_access_chain *new_chain =
++ vtn_access_chain_extend(b, chain, 1);
++ new_chain->link[chain->length].mode = vtn_access_mode_literal;
++ unsigned elems = glsl_get_length(tail_type->type);
++ if (load) {
++ assert(*inout == NULL);
++ *inout = rzalloc(b, struct vtn_ssa_value);
++ (*inout)->type = tail_type->type;
++ (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems);
++ }
++ for (unsigned i = 0; i < elems; i++) {
++ new_chain->link[chain->length].id = i;
++ struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
++ tail_type->array_element : tail_type->members[i];
++ _vtn_variable_load_store(b, load, new_chain, elem_type,
++ &(*inout)->elems[i]);
++ }
++ return;
++ }
++
++ default:
++ unreachable("Invalid access chain type");
++ }
++}
++
++struct vtn_ssa_value *
++vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src)
++{
++ if (vtn_variable_is_external_block(src->var)) {
++ return vtn_block_load(b, src);
++ } else {
++ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
++ struct vtn_ssa_value *val = NULL;
++ _vtn_variable_load_store(b, true, src, tail_type, &val);
++ return val;
++ }
++}
++
++void
++vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++ struct vtn_access_chain *dest)
++{
++ if (vtn_variable_is_external_block(dest->var)) {
++ assert(dest->var->mode == vtn_variable_mode_ssbo);
++ vtn_block_store(b, src, dest);
++ } else {
++ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest);
++ _vtn_variable_load_store(b, false, dest, tail_type, &src);
++ }
++}
++
++static void
++_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
++ struct vtn_access_chain *src, struct vtn_type *tail_type)
++{
++ enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
++ switch (base_type) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_BOOL:
++ /* At this point, we have a scalar, vector, or matrix so we know that
++ * there cannot be any structure splitting still in the way. By
++ * stopping at the matrix level rather than the vector level, we
++ * ensure that matrices get loaded in the optimal way even if they
++ * are storred row-major in a UBO.
++ */
++ vtn_variable_store(b, vtn_variable_load(b, src), dest);
++ return;
++
++ case GLSL_TYPE_ARRAY:
++ case GLSL_TYPE_STRUCT: {
++ struct vtn_access_chain *new_src, *new_dest;
++ new_src = vtn_access_chain_extend(b, src, 1);
++ new_dest = vtn_access_chain_extend(b, dest, 1);
++ new_src->link[src->length].mode = vtn_access_mode_literal;
++ new_dest->link[dest->length].mode = vtn_access_mode_literal;
++ unsigned elems = glsl_get_length(tail_type->type);
++ for (unsigned i = 0; i < elems; i++) {
++ new_src->link[src->length].id = i;
++ new_dest->link[dest->length].id = i;
++ struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
++ tail_type->array_element : tail_type->members[i];
++ _vtn_variable_copy(b, new_dest, new_src, elem_type);
++ }
++ return;
++ }
++
++ default:
++ unreachable("Invalid access chain type");
++ }
++}
++
++static void
++vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
++ struct vtn_access_chain *src)
++{
++ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
++ assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type);
++
++ /* TODO: At some point, we should add a special-case for when we can
++ * just emit a copy_var intrinsic.
++ */
++ _vtn_variable_copy(b, dest, src, tail_type);
++}
++
++static void
++set_mode_system_value(nir_variable_mode *mode)
++{
++ assert(*mode == nir_var_system_value || *mode == nir_var_shader_in);
++ *mode = nir_var_system_value;
++}
++
++static void
++vtn_get_builtin_location(struct vtn_builder *b,
++ SpvBuiltIn builtin, int *location,
++ nir_variable_mode *mode)
++{
++ switch (builtin) {
++ case SpvBuiltInPosition:
++ *location = VARYING_SLOT_POS;
++ break;
++ case SpvBuiltInPointSize:
++ *location = VARYING_SLOT_PSIZ;
++ break;
++ case SpvBuiltInClipDistance:
++ *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */
++ break;
++ case SpvBuiltInCullDistance:
++ /* XXX figure this out */
++ break;
++ case SpvBuiltInVertexIndex:
++ *location = SYSTEM_VALUE_VERTEX_ID;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInVertexId:
++ /* Vulkan defines VertexID to be zero-based and reserves the new
++ * builtin keyword VertexIndex to indicate the non-zero-based value.
++ */
++ *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInInstanceIndex:
++ *location = SYSTEM_VALUE_INSTANCE_INDEX;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInInstanceId:
++ *location = SYSTEM_VALUE_INSTANCE_ID;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInPrimitiveId:
++ *location = VARYING_SLOT_PRIMITIVE_ID;
++ *mode = nir_var_shader_out;
++ break;
++ case SpvBuiltInInvocationId:
++ *location = SYSTEM_VALUE_INVOCATION_ID;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInLayer:
++ *location = VARYING_SLOT_LAYER;
++ *mode = nir_var_shader_out;
++ break;
++ case SpvBuiltInViewportIndex:
++ *location = VARYING_SLOT_VIEWPORT;
++ if (b->shader->stage == MESA_SHADER_GEOMETRY)
++ *mode = nir_var_shader_out;
++ else if (b->shader->stage == MESA_SHADER_FRAGMENT)
++ *mode = nir_var_shader_in;
++ else
++ unreachable("invalid stage for SpvBuiltInViewportIndex");
++ break;
++ case SpvBuiltInTessLevelOuter:
++ case SpvBuiltInTessLevelInner:
++ case SpvBuiltInTessCoord:
++ case SpvBuiltInPatchVertices:
++ unreachable("no tessellation support");
++ case SpvBuiltInFragCoord:
++ *location = VARYING_SLOT_POS;
++ assert(*mode == nir_var_shader_in);
++ break;
++ case SpvBuiltInPointCoord:
++ *location = VARYING_SLOT_PNTC;
++ assert(*mode == nir_var_shader_in);
++ break;
++ case SpvBuiltInFrontFacing:
++ *location = VARYING_SLOT_FACE;
++ assert(*mode == nir_var_shader_in);
++ break;
++ case SpvBuiltInSampleId:
++ *location = SYSTEM_VALUE_SAMPLE_ID;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInSamplePosition:
++ *location = SYSTEM_VALUE_SAMPLE_POS;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInSampleMask:
++ *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInFragDepth:
++ *location = FRAG_RESULT_DEPTH;
++ assert(*mode == nir_var_shader_out);
++ break;
++ case SpvBuiltInNumWorkgroups:
++ *location = SYSTEM_VALUE_NUM_WORK_GROUPS;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInWorkgroupSize:
++ /* This should already be handled */
++ unreachable("unsupported builtin");
++ break;
++ case SpvBuiltInWorkgroupId:
++ *location = SYSTEM_VALUE_WORK_GROUP_ID;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInLocalInvocationId:
++ *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInLocalInvocationIndex:
++ *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInGlobalInvocationId:
++ *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
++ set_mode_system_value(mode);
++ break;
++ case SpvBuiltInHelperInvocation:
++ default:
++ unreachable("unsupported builtin");
++ }
++}
++
++static void
++var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
++ const struct vtn_decoration *dec, void *void_var)
++{
++ struct vtn_variable *vtn_var = void_var;
++
++ /* Handle decorations that apply to a vtn_variable as a whole */
++ switch (dec->decoration) {
++ case SpvDecorationBinding:
++ vtn_var->binding = dec->literals[0];
++ return;
++ case SpvDecorationDescriptorSet:
++ vtn_var->descriptor_set = dec->literals[0];
++ return;
++
++ case SpvDecorationLocation: {
++ unsigned location = dec->literals[0];
++ bool is_vertex_input;
++ if (b->shader->stage == MESA_SHADER_FRAGMENT &&
++ vtn_var->mode == vtn_variable_mode_output) {
++ is_vertex_input = false;
++ location += FRAG_RESULT_DATA0;
++ } else if (b->shader->stage == MESA_SHADER_VERTEX &&
++ vtn_var->mode == vtn_variable_mode_input) {
++ is_vertex_input = true;
++ location += VERT_ATTRIB_GENERIC0;
++ } else if (vtn_var->mode == vtn_variable_mode_input ||
++ vtn_var->mode == vtn_variable_mode_output) {
++ is_vertex_input = false;
++ location += VARYING_SLOT_VAR0;
++ } else {
++ assert(!"Location must be on input or output variable");
++ }
++
++ if (vtn_var->var) {
++ vtn_var->var->data.location = location;
++ vtn_var->var->data.explicit_location = true;
++ } else {
++ assert(vtn_var->members);
++ unsigned length = glsl_get_length(vtn_var->type->type);
++ for (unsigned i = 0; i < length; i++) {
++ vtn_var->members[i]->data.location = location;
++ vtn_var->members[i]->data.explicit_location = true;
++ location +=
++ glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
++ is_vertex_input);
++ }
++ }
++ return;
++ }
++
++ default:
++ break;
++ }
++
++ /* Now we handle decorations that apply to a particular nir_variable */
++ nir_variable *nir_var = vtn_var->var;
++ if (val->value_type == vtn_value_type_access_chain) {
++ assert(val->access_chain->length == 0);
++ assert(val->access_chain->var == void_var);
++ assert(member == -1);
++ } else {
++ assert(val->value_type == vtn_value_type_type);
++ if (member != -1)
++ nir_var = vtn_var->members[member];
++ }
++
++ if (nir_var == NULL)
++ return;
++
++ switch (dec->decoration) {
++ case SpvDecorationRelaxedPrecision:
++ break; /* FIXME: Do nothing with this for now. */
++ case SpvDecorationNoPerspective:
++ nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
++ break;
++ case SpvDecorationFlat:
++ nir_var->data.interpolation = INTERP_QUALIFIER_FLAT;
++ break;
++ case SpvDecorationCentroid:
++ nir_var->data.centroid = true;
++ break;
++ case SpvDecorationSample:
++ nir_var->data.sample = true;
++ break;
++ case SpvDecorationInvariant:
++ nir_var->data.invariant = true;
++ break;
++ case SpvDecorationConstant:
++ assert(nir_var->constant_initializer != NULL);
++ nir_var->data.read_only = true;
++ break;
++ case SpvDecorationNonWritable:
++ nir_var->data.read_only = true;
++ break;
++ case SpvDecorationComponent:
++ nir_var->data.location_frac = dec->literals[0];
++ break;
++ case SpvDecorationIndex:
++ nir_var->data.explicit_index = true;
++ nir_var->data.index = dec->literals[0];
++ break;
++ case SpvDecorationBuiltIn: {
++ SpvBuiltIn builtin = dec->literals[0];
++
++ if (builtin == SpvBuiltInWorkgroupSize) {
++ /* This shouldn't be a builtin. It's actually a constant. */
++ nir_var->data.mode = nir_var_global;
++ nir_var->data.read_only = true;
++
++ nir_constant *c = rzalloc(nir_var, nir_constant);
++ c->value.u[0] = b->shader->info.cs.local_size[0];
++ c->value.u[1] = b->shader->info.cs.local_size[1];
++ c->value.u[2] = b->shader->info.cs.local_size[2];
++ nir_var->constant_initializer = c;
++ break;
++ }
++
++ nir_variable_mode mode = nir_var->data.mode;
++ vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode);
++ nir_var->data.explicit_location = true;
++ nir_var->data.mode = mode;
++
++ if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition)
++ nir_var->data.origin_upper_left = b->origin_upper_left;
++ break;
++ }
++ case SpvDecorationRowMajor:
++ case SpvDecorationColMajor:
++ case SpvDecorationGLSLShared:
++ case SpvDecorationPatch:
++ case SpvDecorationRestrict:
++ case SpvDecorationAliased:
++ case SpvDecorationVolatile:
++ case SpvDecorationCoherent:
++ case SpvDecorationNonReadable:
++ case SpvDecorationUniform:
++ /* This is really nice but we have no use for it right now. */
++ case SpvDecorationCPacked:
++ case SpvDecorationSaturatedConversion:
++ case SpvDecorationStream:
++ case SpvDecorationOffset:
++ case SpvDecorationXfbBuffer:
++ case SpvDecorationFuncParamAttr:
++ case SpvDecorationFPRoundingMode:
++ case SpvDecorationFPFastMathMode:
++ case SpvDecorationLinkageAttributes:
++ case SpvDecorationSpecId:
++ break;
++ default:
++ unreachable("Unhandled variable decoration");
++ }
++}
++
++/* Tries to compute the size of an interface block based on the strides and
++ * offsets that are provided to us in the SPIR-V source.
++ */
++static unsigned
++vtn_type_block_size(struct vtn_type *type)
++{
++ enum glsl_base_type base_type = glsl_get_base_type(type->type);
++ switch (base_type) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_BOOL:
++ case GLSL_TYPE_DOUBLE: {
++ unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) :
++ glsl_get_matrix_columns(type->type);
++ if (cols > 1) {
++ assert(type->stride > 0);
++ return type->stride * cols;
++ } else if (base_type == GLSL_TYPE_DOUBLE) {
++ return glsl_get_vector_elements(type->type) * 8;
++ } else {
++ return glsl_get_vector_elements(type->type) * 4;
++ }
++ }
++
++ case GLSL_TYPE_STRUCT:
++ case GLSL_TYPE_INTERFACE: {
++ unsigned size = 0;
++ unsigned num_fields = glsl_get_length(type->type);
++ for (unsigned f = 0; f < num_fields; f++) {
++ unsigned field_end = type->offsets[f] +
++ vtn_type_block_size(type->members[f]);
++ size = MAX2(size, field_end);
++ }
++ return size;
++ }
++
++ case GLSL_TYPE_ARRAY:
++ assert(type->stride > 0);
++ assert(glsl_get_length(type->type) > 0);
++ return type->stride * glsl_get_length(type->type);
++
++ default:
++ assert(!"Invalid block type");
++ return 0;
++ }
++}
++
++void
++vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
++ const uint32_t *w, unsigned count)
++{
++ switch (opcode) {
++ case SpvOpVariable: {
++ struct vtn_variable *var = rzalloc(b, struct vtn_variable);
++ var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
++
++ var->chain.var = var;
++ var->chain.length = 0;
++
++ struct vtn_value *val =
++ vtn_push_value(b, w[2], vtn_value_type_access_chain);
++ val->access_chain = &var->chain;
++
++ struct vtn_type *without_array = var->type;
++ while(glsl_type_is_array(without_array->type))
++ without_array = without_array->array_element;
++
++ nir_variable_mode nir_mode;
++ switch ((SpvStorageClass)w[3]) {
++ case SpvStorageClassUniform:
++ case SpvStorageClassUniformConstant:
++ if (without_array->block) {
++ var->mode = vtn_variable_mode_ubo;
++ b->shader->info.num_ubos++;
++ } else if (without_array->buffer_block) {
++ var->mode = vtn_variable_mode_ssbo;
++ b->shader->info.num_ssbos++;
++ } else if (glsl_type_is_image(without_array->type)) {
++ var->mode = vtn_variable_mode_image;
++ nir_mode = nir_var_uniform;
++ b->shader->info.num_images++;
++ } else if (glsl_type_is_sampler(without_array->type)) {
++ var->mode = vtn_variable_mode_sampler;
++ nir_mode = nir_var_uniform;
++ b->shader->info.num_textures++;
++ } else {
++ assert(!"Invalid uniform variable type");
++ }
++ break;
++ case SpvStorageClassPushConstant:
++ var->mode = vtn_variable_mode_push_constant;
++ assert(b->shader->num_uniforms == 0);
++ b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
++ break;
++ case SpvStorageClassInput:
++ var->mode = vtn_variable_mode_input;
++ nir_mode = nir_var_shader_in;
++ break;
++ case SpvStorageClassOutput:
++ var->mode = vtn_variable_mode_output;
++ nir_mode = nir_var_shader_out;
++ break;
++ case SpvStorageClassPrivate:
++ var->mode = vtn_variable_mode_global;
++ nir_mode = nir_var_global;
++ break;
++ case SpvStorageClassFunction:
++ var->mode = vtn_variable_mode_local;
++ nir_mode = nir_var_local;
++ break;
++ case SpvStorageClassWorkgroup:
++ var->mode = vtn_variable_mode_workgroup;
++ nir_mode = nir_var_shared;
++ break;
++ case SpvStorageClassCrossWorkgroup:
++ case SpvStorageClassGeneric:
++ case SpvStorageClassAtomicCounter:
++ default:
++ unreachable("Unhandled variable storage class");
++ }
++
++ switch (var->mode) {
++ case vtn_variable_mode_local:
++ case vtn_variable_mode_global:
++ case vtn_variable_mode_image:
++ case vtn_variable_mode_sampler:
++ case vtn_variable_mode_workgroup:
++ /* For these, we create the variable normally */
++ var->var = rzalloc(b->shader, nir_variable);
++ var->var->name = ralloc_strdup(var->var, val->name);
++ var->var->type = var->type->type;
++ var->var->data.mode = nir_mode;
++
++ switch (var->mode) {
++ case vtn_variable_mode_image:
++ case vtn_variable_mode_sampler:
++ var->var->interface_type = without_array->type;
++ break;
++ default:
++ var->var->interface_type = NULL;
++ break;
++ }
++ break;
++
++ case vtn_variable_mode_input:
++ case vtn_variable_mode_output: {
++ /* For inputs and outputs, we immediately split structures. This
++ * is for a couple of reasons. For one, builtins may all come in
++ * a struct and we really want those split out into separate
++ * variables. For another, interpolation qualifiers can be
++ * applied to members of the top-level struct ane we need to be
++ * able to preserve that information.
++ */
++
++ int array_length = -1;
++ struct vtn_type *interface_type = var->type;
++ if (b->shader->stage == MESA_SHADER_GEOMETRY &&
++ glsl_type_is_array(var->type->type)) {
++ /* In Geometry shaders (and some tessellation), inputs come
++ * in per-vertex arrays. However, some builtins come in
++ * non-per-vertex, hence the need for the is_array check. In
++ * any case, there are no non-builtin arrays allowed so this
++ * check should be sufficient.
++ */
++ interface_type = var->type->array_element;
++ array_length = glsl_get_length(var->type->type);
++ }
++
++ if (glsl_type_is_struct(interface_type->type)) {
++ /* It's a struct. Split it. */
++ unsigned num_members = glsl_get_length(interface_type->type);
++ var->members = ralloc_array(b, nir_variable *, num_members);
++
++ for (unsigned i = 0; i < num_members; i++) {
++ const struct glsl_type *mtype = interface_type->members[i]->type;
++ if (array_length >= 0)
++ mtype = glsl_array_type(mtype, array_length);
++
++ var->members[i] = rzalloc(b->shader, nir_variable);
++ var->members[i]->name =
++ ralloc_asprintf(var->members[i], "%s.%d", val->name, i);
++ var->members[i]->type = mtype;
++ var->members[i]->interface_type =
++ interface_type->members[i]->type;
++ var->members[i]->data.mode = nir_mode;
++ }
++ } else {
++ var->var = rzalloc(b->shader, nir_variable);
++ var->var->name = ralloc_strdup(var->var, val->name);
++ var->var->type = var->type->type;
++ var->var->interface_type = interface_type->type;
++ var->var->data.mode = nir_mode;
++ }
++
++ /* For inputs and outputs, we need to grab locations and builtin
++ * information from the interface type.
++ */
++ vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var);
++ break;
++
++ case vtn_variable_mode_param:
++ unreachable("Not created through OpVariable");
++ }
++
++ case vtn_variable_mode_ubo:
++ case vtn_variable_mode_ssbo:
++ case vtn_variable_mode_push_constant:
++ /* These don't need actual variables. */
++ break;
++ }
++
++ if (count > 4) {
++ assert(count == 5);
++ nir_constant *constant =
++ vtn_value(b, w[4], vtn_value_type_constant)->constant;
++ var->var->constant_initializer =
++ nir_constant_clone(constant, var->var);
++ }
++
++ vtn_foreach_decoration(b, val, var_decoration_cb, var);
++
++ if (var->mode == vtn_variable_mode_image ||
++ var->mode == vtn_variable_mode_sampler) {
++ /* XXX: We still need the binding information in the nir_variable
++ * for these. We should fix that.
++ */
++ var->var->data.binding = var->binding;
++ var->var->data.descriptor_set = var->descriptor_set;
++
++ if (var->mode == vtn_variable_mode_image)
++ var->var->data.image.format = without_array->image_format;
++ }
++
++ if (var->mode == vtn_variable_mode_local) {
++ assert(var->members == NULL && var->var != NULL);
++ nir_function_impl_add_variable(b->impl, var->var);
++ } else if (var->var) {
++ nir_shader_add_variable(b->shader, var->var);
++ } else if (var->members) {
++ unsigned count = glsl_get_length(without_array->type);
++ for (unsigned i = 0; i < count; i++) {
++ assert(var->members[i]->data.mode != nir_var_local);
++ nir_shader_add_variable(b->shader, var->members[i]);
++ }
++ } else {
++ assert(var->mode == vtn_variable_mode_ubo ||
++ var->mode == vtn_variable_mode_ssbo ||
++ var->mode == vtn_variable_mode_push_constant);
++ }
++ break;
++ }
++
++ case SpvOpAccessChain:
++ case SpvOpInBoundsAccessChain: {
++ struct vtn_access_chain *base, *chain;
++ struct vtn_value *base_val = vtn_untyped_value(b, w[3]);
++ if (base_val->value_type == vtn_value_type_sampled_image) {
++ /* This is rather insane. SPIR-V allows you to use OpSampledImage
++ * to combine an array of images with a single sampler to get an
++ * array of sampled images that all share the same sampler.
++ * Fortunately, this means that we can more-or-less ignore the
++ * sampler when crawling the access chain, but it does leave us
++ * with this rather awkward little special-case.
++ */
++ base = base_val->sampled_image->image;
++ } else {
++ assert(base_val->value_type == vtn_value_type_access_chain);
++ base = base_val->access_chain;
++ }
++
++ chain = vtn_access_chain_extend(b, base, count - 4);
++
++ unsigned idx = base->length;
++ for (int i = 4; i < count; i++) {
++ struct vtn_value *link_val = vtn_untyped_value(b, w[i]);
++ if (link_val->value_type == vtn_value_type_constant) {
++ chain->link[idx].mode = vtn_access_mode_literal;
++ chain->link[idx].id = link_val->constant->value.u[0];
++ } else {
++ chain->link[idx].mode = vtn_access_mode_id;
++ chain->link[idx].id = w[i];
++ }
++ idx++;
++ }
++
++ if (base_val->value_type == vtn_value_type_sampled_image) {
++ struct vtn_value *val =
++ vtn_push_value(b, w[2], vtn_value_type_sampled_image);
++ val->sampled_image = ralloc(b, struct vtn_sampled_image);
++ val->sampled_image->image = chain;
++ val->sampled_image->sampler = base_val->sampled_image->sampler;
++ } else {
++ struct vtn_value *val =
++ vtn_push_value(b, w[2], vtn_value_type_access_chain);
++ val->access_chain = chain;
++ }
++ break;
++ }
++
++ case SpvOpCopyMemory: {
++ struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain);
++ struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain);
++
++ vtn_variable_copy(b, dest->access_chain, src->access_chain);
++ break;
++ }
++
++ case SpvOpLoad: {
++ struct vtn_access_chain *src =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++
++ if (src->var->mode == vtn_variable_mode_image ||
++ src->var->mode == vtn_variable_mode_sampler) {
++ vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src;
++ return;
++ }
++
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ val->ssa = vtn_variable_load(b, src);
++ break;
++ }
++
++ case SpvOpStore: {
++ struct vtn_access_chain *dest =
++ vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
++ struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]);
++ vtn_variable_store(b, src, dest);
++ break;
++ }
++
++ case SpvOpArrayLength: {
++ struct vtn_access_chain *chain =
++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++
++ const uint32_t offset = chain->var->type->offsets[w[4]];
++ const uint32_t stride = chain->var->type->members[w[4]]->stride;
++
++ unsigned chain_idx;
++ struct vtn_type *type;
++ nir_ssa_def *index =
++ get_vulkan_resource_index(b, chain, &type, &chain_idx);
++
++ nir_intrinsic_instr *instr =
++ nir_intrinsic_instr_create(b->nb.shader,
++ nir_intrinsic_get_buffer_size);
++ instr->src[0] = nir_src_for_ssa(index);
++ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++ nir_builder_instr_insert(&b->nb, &instr->instr);
++ nir_ssa_def *buf_size = &instr->dest.ssa;
++
++ /* array_length = max(buffer_size - offset, 0) / stride */
++ nir_ssa_def *array_length =
++ nir_idiv(&b->nb,
++ nir_imax(&b->nb,
++ nir_isub(&b->nb,
++ buf_size,
++ nir_imm_int(&b->nb, offset)),
++ nir_imm_int(&b->nb, 0u)),
++ nir_imm_int(&b->nb, stride));
++
++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++ val->ssa = vtn_create_ssa_value(b, glsl_uint_type());
++ val->ssa->def = array_length;
++ break;
++ }
++
++ case SpvOpCopyMemorySized:
++ default:
++ unreachable("Unhandled opcode");
++ }
++}
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ * Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++/*
++ * A simple executable that opens a SPIR-V shader, converts it to NIR, and
++ * dumps out the result. This should be useful for testing the
++ * spirv_to_nir code.
++ */
++
++#include "spirv/nir_spirv.h"
++
++#include <sys/mman.h>
++#include <sys/types.h>
++#include <fcntl.h>
++#include <unistd.h>
++
++int main(int argc, char **argv)
++{
++ int fd = open(argv[1], O_RDONLY);
++ off_t len = lseek(fd, 0, SEEK_END);
++
++ assert(len % 4 == 0);
++ size_t word_count = len / 4;
++
++ const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
++ assert(map != NULL);
++
++ nir_function *func = spirv_to_nir(map, word_count, NULL, 0,
++ MESA_SHADER_FRAGMENT, "main", NULL);
++ nir_print_shader(func->shader, stderr);
++}
--- /dev/null
+ /*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #include "nir_types.h"
+ #include "compiler/glsl/ir.h"
+
+ void
+ glsl_print_type(const glsl_type *type, FILE *fp)
+ {
+ if (type->base_type == GLSL_TYPE_ARRAY) {
+ glsl_print_type(type->fields.array, fp);
+ fprintf(fp, "[%u]", type->length);
+ } else if ((type->base_type == GLSL_TYPE_STRUCT)
+ && !is_gl_identifier(type->name)) {
+ fprintf(fp, "%s@%p", type->name, (void *) type);
+ } else {
+ fprintf(fp, "%s", type->name);
+ }
+ }
+
+ void
+ glsl_print_struct(const glsl_type *type, FILE *fp)
+ {
+ assert(type->base_type == GLSL_TYPE_STRUCT);
+
+ fprintf(fp, "struct {\n");
+ for (unsigned i = 0; i < type->length; i++) {
+ fprintf(fp, "\t");
+ glsl_print_type(type->fields.structure[i].type, fp);
+ fprintf(fp, " %s;\n", type->fields.structure[i].name);
+ }
+ fprintf(fp, "}\n");
+ }
+
+ const glsl_type *
+ glsl_get_array_element(const glsl_type* type)
+ {
+ if (type->is_matrix())
+ return type->column_type();
+ return type->fields.array;
+ }
+
+ const glsl_type *
+ glsl_get_struct_field(const glsl_type *type, unsigned index)
+ {
+ return type->fields.structure[index].type;
+ }
+
++const glsl_type *
++glsl_get_function_return_type(const glsl_type *type)
++{
++ return type->fields.parameters[0].type;
++}
++
++const glsl_function_param *
++glsl_get_function_param(const glsl_type *type, unsigned index)
++{
++ return &type->fields.parameters[index + 1];
++}
++
+ const struct glsl_type *
+ glsl_get_column_type(const struct glsl_type *type)
+ {
+ return type->column_type();
+ }
+
+ enum glsl_base_type
+ glsl_get_base_type(const struct glsl_type *type)
+ {
+ return type->base_type;
+ }
+
+ unsigned
+ glsl_get_vector_elements(const struct glsl_type *type)
+ {
+ return type->vector_elements;
+ }
+
+ unsigned
+ glsl_get_components(const struct glsl_type *type)
+ {
+ return type->components();
+ }
+
+ unsigned
+ glsl_get_matrix_columns(const struct glsl_type *type)
+ {
+ return type->matrix_columns;
+ }
+
+ unsigned
+ glsl_get_length(const struct glsl_type *type)
+ {
+ return type->is_matrix() ? type->matrix_columns : type->length;
+ }
+
+ unsigned
+ glsl_get_aoa_size(const struct glsl_type *type)
+ {
+ return type->arrays_of_arrays_size();
+ }
+
++unsigned
++glsl_count_attribute_slots(const struct glsl_type *type,
++ bool vertex_input_slots)
++{
++ return type->count_attribute_slots(vertex_input_slots);
++}
++
+ const char *
+ glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index)
+ {
+ return type->fields.structure[index].name;
+ }
+
++glsl_sampler_dim
++glsl_get_sampler_dim(const struct glsl_type *type)
++{
++ assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
++ return (glsl_sampler_dim)type->sampler_dimensionality;
++}
++
++glsl_base_type
++glsl_get_sampler_result_type(const struct glsl_type *type)
++{
++ assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
++ return (glsl_base_type)type->sampler_type;
++}
++
+ unsigned
+ glsl_get_record_location_offset(const struct glsl_type *type,
+ unsigned length)
+ {
+ return type->record_location_offset(length);
+ }
+
+ bool
+ glsl_type_is_void(const glsl_type *type)
+ {
+ return type->is_void();
+ }
+
++bool
++glsl_type_is_error(const glsl_type *type)
++{
++ return type->is_error();
++}
++
+ bool
+ glsl_type_is_vector(const struct glsl_type *type)
+ {
+ return type->is_vector();
+ }
+
+ bool
+ glsl_type_is_scalar(const struct glsl_type *type)
+ {
+ return type->is_scalar();
+ }
+
+ bool
+ glsl_type_is_vector_or_scalar(const struct glsl_type *type)
+ {
+ return type->is_vector() || type->is_scalar();
+ }
+
+ bool
+ glsl_type_is_matrix(const struct glsl_type *type)
+ {
+ return type->is_matrix();
+ }
+
++bool
++glsl_type_is_array(const struct glsl_type *type)
++{
++ return type->is_array();
++}
++
++bool
++glsl_type_is_struct(const struct glsl_type *type)
++{
++ return type->is_record() || type->is_interface();
++}
++
++bool
++glsl_type_is_sampler(const struct glsl_type *type)
++{
++ return type->is_sampler();
++}
++
++bool
++glsl_type_is_image(const struct glsl_type *type)
++{
++ return type->is_image();
++}
++
++bool
++glsl_sampler_type_is_shadow(const struct glsl_type *type)
++{
++ assert(glsl_type_is_sampler(type));
++ return type->sampler_shadow;
++}
++
++bool
++glsl_sampler_type_is_array(const struct glsl_type *type)
++{
++ assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
++ return type->sampler_array;
++}
++
+ const glsl_type *
+ glsl_void_type(void)
+ {
+ return glsl_type::void_type;
+ }
+
+ const glsl_type *
+ glsl_float_type(void)
+ {
+ return glsl_type::float_type;
+ }
+
+ const glsl_type *
+ glsl_vec_type(unsigned n)
+ {
+ return glsl_type::vec(n);
+ }
+
+ const glsl_type *
+ glsl_vec4_type(void)
+ {
+ return glsl_type::vec4_type;
+ }
+
++const glsl_type *
++glsl_int_type(void)
++{
++ return glsl_type::int_type;
++}
++
+ const glsl_type *
+ glsl_uint_type(void)
+ {
+ return glsl_type::uint_type;
+ }
+
++const glsl_type *
++glsl_bool_type(void)
++{
++ return glsl_type::bool_type;
++}
++
++const glsl_type *
++glsl_scalar_type(enum glsl_base_type base_type)
++{
++ return glsl_type::get_instance(base_type, 1, 1);
++}
++
++const glsl_type *
++glsl_vector_type(enum glsl_base_type base_type, unsigned components)
++{
++ assert(components > 1 && components <= 4);
++ return glsl_type::get_instance(base_type, components, 1);
++}
++
++const glsl_type *
++glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns)
++{
++ assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4);
++ return glsl_type::get_instance(base_type, rows, columns);
++}
++
+ const glsl_type *
+ glsl_array_type(const glsl_type *base, unsigned elements)
+ {
+ return glsl_type::get_array_instance(base, elements);
+ }
++
++const glsl_type *
++glsl_struct_type(const glsl_struct_field *fields,
++ unsigned num_fields, const char *name)
++{
++ return glsl_type::get_record_instance(fields, num_fields, name);
++}
++
++const struct glsl_type *
++glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
++ enum glsl_base_type base_type)
++{
++ return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type);
++}
++
++const struct glsl_type *
++glsl_image_type(enum glsl_sampler_dim dim, bool is_array,
++ enum glsl_base_type base_type)
++{
++ return glsl_type::get_image_instance(dim, is_array, base_type);
++}
++
++const glsl_type *
++glsl_function_type(const glsl_type *return_type,
++ const glsl_function_param *params, unsigned num_params)
++{
++ return glsl_type::get_function_instance(return_type, params, num_params);
++}
++
++const glsl_type *
++glsl_transposed_type(const struct glsl_type *type)
++{
++ return glsl_type::get_instance(type->base_type, type->matrix_columns,
++ type->vector_elements);
++}
--- /dev/null
+ /*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+ #pragma once
+
+ #include <stdio.h>
+ #include <stdbool.h>
+
+ /* C wrapper around compiler/glsl_types.h */
+
+ #include "glsl_types.h"
+
+ #ifdef __cplusplus
+ extern "C" {
+ #else
+ struct glsl_type;
+ #endif
+
+ void glsl_print_type(const struct glsl_type *type, FILE *fp);
+ void glsl_print_struct(const struct glsl_type *type, FILE *fp);
+
+ const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type,
+ unsigned index);
+
+ const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
+
+ const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
+
++const struct glsl_type *
++glsl_get_function_return_type(const struct glsl_type *type);
++
++const struct glsl_function_param *
++glsl_get_function_param(const struct glsl_type *type, unsigned index);
++
+ enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
+
+ unsigned glsl_get_vector_elements(const struct glsl_type *type);
+
+ unsigned glsl_get_components(const struct glsl_type *type);
+
+ unsigned glsl_get_matrix_columns(const struct glsl_type *type);
+
+ unsigned glsl_get_length(const struct glsl_type *type);
+
+ unsigned glsl_get_aoa_size(const struct glsl_type *type);
+
++unsigned glsl_count_attribute_slots(const struct glsl_type *type,
++ bool vertex_input_slots);
++
+ const char *glsl_get_struct_elem_name(const struct glsl_type *type,
+ unsigned index);
+
++enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type);
++enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
++
+ unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+ unsigned length);
+
+ bool glsl_type_is_void(const struct glsl_type *type);
++bool glsl_type_is_error(const struct glsl_type *type);
+ bool glsl_type_is_vector(const struct glsl_type *type);
+ bool glsl_type_is_scalar(const struct glsl_type *type);
+ bool glsl_type_is_vector_or_scalar(const struct glsl_type *type);
+ bool glsl_type_is_matrix(const struct glsl_type *type);
++bool glsl_type_is_array(const struct glsl_type *type);
++bool glsl_type_is_struct(const struct glsl_type *type);
++bool glsl_type_is_sampler(const struct glsl_type *type);
++bool glsl_type_is_image(const struct glsl_type *type);
++bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
++bool glsl_sampler_type_is_array(const struct glsl_type *type);
+
+ const struct glsl_type *glsl_void_type(void);
+ const struct glsl_type *glsl_float_type(void);
+ const struct glsl_type *glsl_vec_type(unsigned n);
+ const struct glsl_type *glsl_vec4_type(void);
++const struct glsl_type *glsl_int_type(void);
+ const struct glsl_type *glsl_uint_type(void);
++const struct glsl_type *glsl_bool_type(void);
++
++const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type);
++const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type,
++ unsigned components);
++const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type,
++ unsigned rows, unsigned columns);
+ const struct glsl_type *glsl_array_type(const struct glsl_type *base,
+ unsigned elements);
++const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
++ unsigned num_fields, const char *name);
++const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
++ bool is_shadow, bool is_array,
++ enum glsl_base_type base_type);
++const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim,
++ bool is_array,
++ enum glsl_base_type base_type);
++const struct glsl_type * glsl_function_type(const struct glsl_type *return_type,
++ const struct glsl_function_param *params,
++ unsigned num_params);
++
++const struct glsl_type *glsl_transposed_type(const struct glsl_type *type);
+
+ #ifdef __cplusplus
+ }
+ #endif
--- /dev/null
+ /*
+ * Mesa 3-D graphics library
+ *
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+ #include "shader_enums.h"
+ #include "util/macros.h"
+ #include "mesa/main/config.h"
+
+ #define ENUM(x) [x] = #x
+ #define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN")
+
+ const char *
+ gl_shader_stage_name(gl_shader_stage stage)
+ {
+ static const char *names[] = {
+ ENUM(MESA_SHADER_VERTEX),
+ ENUM(MESA_SHADER_TESS_CTRL),
+ ENUM(MESA_SHADER_TESS_EVAL),
+ ENUM(MESA_SHADER_GEOMETRY),
+ ENUM(MESA_SHADER_FRAGMENT),
+ ENUM(MESA_SHADER_COMPUTE),
+ };
+ STATIC_ASSERT(ARRAY_SIZE(names) == MESA_SHADER_STAGES);
+ return NAME(stage);
+ }
+
+ /**
+ * Translate a gl_shader_stage to a short shader stage name for debug
+ * printouts and error messages.
+ */
+ const char *
+ _mesa_shader_stage_to_string(unsigned stage)
+ {
+ switch (stage) {
+ case MESA_SHADER_VERTEX: return "vertex";
+ case MESA_SHADER_FRAGMENT: return "fragment";
+ case MESA_SHADER_GEOMETRY: return "geometry";
+ case MESA_SHADER_COMPUTE: return "compute";
+ case MESA_SHADER_TESS_CTRL: return "tessellation control";
+ case MESA_SHADER_TESS_EVAL: return "tessellation evaluation";
+ }
+
+ unreachable("Unknown shader stage.");
+ }
+
+ /**
+ * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+ * for debug printouts and error messages.
+ */
+ const char *
+ _mesa_shader_stage_to_abbrev(unsigned stage)
+ {
+ switch (stage) {
+ case MESA_SHADER_VERTEX: return "VS";
+ case MESA_SHADER_FRAGMENT: return "FS";
+ case MESA_SHADER_GEOMETRY: return "GS";
+ case MESA_SHADER_COMPUTE: return "CS";
+ case MESA_SHADER_TESS_CTRL: return "TCS";
+ case MESA_SHADER_TESS_EVAL: return "TES";
+ }
+
+ unreachable("Unknown shader stage.");
+ }
+
+ const char *
+ gl_vert_attrib_name(gl_vert_attrib attrib)
+ {
+ static const char *names[] = {
+ ENUM(VERT_ATTRIB_POS),
+ ENUM(VERT_ATTRIB_WEIGHT),
+ ENUM(VERT_ATTRIB_NORMAL),
+ ENUM(VERT_ATTRIB_COLOR0),
+ ENUM(VERT_ATTRIB_COLOR1),
+ ENUM(VERT_ATTRIB_FOG),
+ ENUM(VERT_ATTRIB_COLOR_INDEX),
+ ENUM(VERT_ATTRIB_EDGEFLAG),
+ ENUM(VERT_ATTRIB_TEX0),
+ ENUM(VERT_ATTRIB_TEX1),
+ ENUM(VERT_ATTRIB_TEX2),
+ ENUM(VERT_ATTRIB_TEX3),
+ ENUM(VERT_ATTRIB_TEX4),
+ ENUM(VERT_ATTRIB_TEX5),
+ ENUM(VERT_ATTRIB_TEX6),
+ ENUM(VERT_ATTRIB_TEX7),
+ ENUM(VERT_ATTRIB_POINT_SIZE),
+ ENUM(VERT_ATTRIB_GENERIC0),
+ ENUM(VERT_ATTRIB_GENERIC1),
+ ENUM(VERT_ATTRIB_GENERIC2),
+ ENUM(VERT_ATTRIB_GENERIC3),
+ ENUM(VERT_ATTRIB_GENERIC4),
+ ENUM(VERT_ATTRIB_GENERIC5),
+ ENUM(VERT_ATTRIB_GENERIC6),
+ ENUM(VERT_ATTRIB_GENERIC7),
+ ENUM(VERT_ATTRIB_GENERIC8),
+ ENUM(VERT_ATTRIB_GENERIC9),
+ ENUM(VERT_ATTRIB_GENERIC10),
+ ENUM(VERT_ATTRIB_GENERIC11),
+ ENUM(VERT_ATTRIB_GENERIC12),
+ ENUM(VERT_ATTRIB_GENERIC13),
+ ENUM(VERT_ATTRIB_GENERIC14),
+ ENUM(VERT_ATTRIB_GENERIC15),
+ };
+ STATIC_ASSERT(ARRAY_SIZE(names) == VERT_ATTRIB_MAX);
+ return NAME(attrib);
+ }
+
+ const char *
+ gl_varying_slot_name(gl_varying_slot slot)
+ {
+ static const char *names[] = {
+ ENUM(VARYING_SLOT_POS),
+ ENUM(VARYING_SLOT_COL0),
+ ENUM(VARYING_SLOT_COL1),
+ ENUM(VARYING_SLOT_FOGC),
+ ENUM(VARYING_SLOT_TEX0),
+ ENUM(VARYING_SLOT_TEX1),
+ ENUM(VARYING_SLOT_TEX2),
+ ENUM(VARYING_SLOT_TEX3),
+ ENUM(VARYING_SLOT_TEX4),
+ ENUM(VARYING_SLOT_TEX5),
+ ENUM(VARYING_SLOT_TEX6),
+ ENUM(VARYING_SLOT_TEX7),
+ ENUM(VARYING_SLOT_PSIZ),
+ ENUM(VARYING_SLOT_BFC0),
+ ENUM(VARYING_SLOT_BFC1),
+ ENUM(VARYING_SLOT_EDGE),
+ ENUM(VARYING_SLOT_CLIP_VERTEX),
+ ENUM(VARYING_SLOT_CLIP_DIST0),
+ ENUM(VARYING_SLOT_CLIP_DIST1),
+ ENUM(VARYING_SLOT_PRIMITIVE_ID),
+ ENUM(VARYING_SLOT_LAYER),
+ ENUM(VARYING_SLOT_VIEWPORT),
+ ENUM(VARYING_SLOT_FACE),
+ ENUM(VARYING_SLOT_PNTC),
+ ENUM(VARYING_SLOT_TESS_LEVEL_OUTER),
+ ENUM(VARYING_SLOT_TESS_LEVEL_INNER),
+ ENUM(VARYING_SLOT_VAR0),
+ ENUM(VARYING_SLOT_VAR1),
+ ENUM(VARYING_SLOT_VAR2),
+ ENUM(VARYING_SLOT_VAR3),
+ ENUM(VARYING_SLOT_VAR4),
+ ENUM(VARYING_SLOT_VAR5),
+ ENUM(VARYING_SLOT_VAR6),
+ ENUM(VARYING_SLOT_VAR7),
+ ENUM(VARYING_SLOT_VAR8),
+ ENUM(VARYING_SLOT_VAR9),
+ ENUM(VARYING_SLOT_VAR10),
+ ENUM(VARYING_SLOT_VAR11),
+ ENUM(VARYING_SLOT_VAR12),
+ ENUM(VARYING_SLOT_VAR13),
+ ENUM(VARYING_SLOT_VAR14),
+ ENUM(VARYING_SLOT_VAR15),
+ ENUM(VARYING_SLOT_VAR16),
+ ENUM(VARYING_SLOT_VAR17),
+ ENUM(VARYING_SLOT_VAR18),
+ ENUM(VARYING_SLOT_VAR19),
+ ENUM(VARYING_SLOT_VAR20),
+ ENUM(VARYING_SLOT_VAR21),
+ ENUM(VARYING_SLOT_VAR22),
+ ENUM(VARYING_SLOT_VAR23),
+ ENUM(VARYING_SLOT_VAR24),
+ ENUM(VARYING_SLOT_VAR25),
+ ENUM(VARYING_SLOT_VAR26),
+ ENUM(VARYING_SLOT_VAR27),
+ ENUM(VARYING_SLOT_VAR28),
+ ENUM(VARYING_SLOT_VAR29),
+ ENUM(VARYING_SLOT_VAR30),
+ ENUM(VARYING_SLOT_VAR31),
+ };
+ STATIC_ASSERT(ARRAY_SIZE(names) == VARYING_SLOT_MAX);
+ return NAME(slot);
+ }
+
+ const char *
+ gl_system_value_name(gl_system_value sysval)
+ {
+ static const char *names[] = {
+ ENUM(SYSTEM_VALUE_VERTEX_ID),
+ ENUM(SYSTEM_VALUE_INSTANCE_ID),
++ ENUM(SYSTEM_VALUE_INSTANCE_INDEX),
+ ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE),
+ ENUM(SYSTEM_VALUE_BASE_VERTEX),
+ ENUM(SYSTEM_VALUE_INVOCATION_ID),
+ ENUM(SYSTEM_VALUE_FRONT_FACE),
+ ENUM(SYSTEM_VALUE_SAMPLE_ID),
+ ENUM(SYSTEM_VALUE_SAMPLE_POS),
+ ENUM(SYSTEM_VALUE_SAMPLE_MASK_IN),
+ ENUM(SYSTEM_VALUE_TESS_COORD),
+ ENUM(SYSTEM_VALUE_VERTICES_IN),
+ ENUM(SYSTEM_VALUE_PRIMITIVE_ID),
+ ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER),
+ ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
+ ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
++ ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX),
++ ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID),
+ ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
+ ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS),
+ ENUM(SYSTEM_VALUE_VERTEX_CNT),
+ };
+ STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
+ return NAME(sysval);
+ }
+
+ const char *
+ glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
+ {
+ static const char *names[] = {
+ ENUM(INTERP_QUALIFIER_NONE),
+ ENUM(INTERP_QUALIFIER_SMOOTH),
+ ENUM(INTERP_QUALIFIER_FLAT),
+ ENUM(INTERP_QUALIFIER_NOPERSPECTIVE),
+ };
+ STATIC_ASSERT(ARRAY_SIZE(names) == INTERP_QUALIFIER_COUNT);
+ return NAME(qual);
+ }
+
+ const char *
+ gl_frag_result_name(gl_frag_result result)
+ {
+ static const char *names[] = {
+ ENUM(FRAG_RESULT_DEPTH),
+ ENUM(FRAG_RESULT_STENCIL),
+ ENUM(FRAG_RESULT_COLOR),
+ ENUM(FRAG_RESULT_SAMPLE_MASK),
+ ENUM(FRAG_RESULT_DATA0),
+ ENUM(FRAG_RESULT_DATA1),
+ ENUM(FRAG_RESULT_DATA2),
+ ENUM(FRAG_RESULT_DATA3),
+ ENUM(FRAG_RESULT_DATA4),
+ ENUM(FRAG_RESULT_DATA5),
+ ENUM(FRAG_RESULT_DATA6),
+ ENUM(FRAG_RESULT_DATA7),
+ };
+ STATIC_ASSERT(ARRAY_SIZE(names) == FRAG_RESULT_MAX);
+ return NAME(result);
+ }
--- /dev/null
+ /*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ #ifndef SHADER_ENUMS_H
+ #define SHADER_ENUMS_H
+
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+
+ /**
+ * Shader stages. Note that these will become 5 with tessellation.
+ *
+ * The order must match how shaders are ordered in the pipeline.
+ * The GLSL linker assumes that if i<j, then the j-th shader is
+ * executed later than the i-th shader.
+ */
+ typedef enum
+ {
+ MESA_SHADER_VERTEX = 0,
+ MESA_SHADER_TESS_CTRL = 1,
+ MESA_SHADER_TESS_EVAL = 2,
+ MESA_SHADER_GEOMETRY = 3,
+ MESA_SHADER_FRAGMENT = 4,
+ MESA_SHADER_COMPUTE = 5,
+ } gl_shader_stage;
+
+ const char *gl_shader_stage_name(gl_shader_stage stage);
+
+ /**
+ * Translate a gl_shader_stage to a short shader stage name for debug
+ * printouts and error messages.
+ */
+ const char *_mesa_shader_stage_to_string(unsigned stage);
+
+ /**
+ * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+ * for debug printouts and error messages.
+ */
+ const char *_mesa_shader_stage_to_abbrev(unsigned stage);
+
+ #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
+
+
+ /**
+ * Indexes for vertex program attributes.
+ * GL_NV_vertex_program aliases generic attributes over the conventional
+ * attributes. In GL_ARB_vertex_program shader the aliasing is optional.
+ * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
+ * generic attributes are distinct/separate).
+ */
+ typedef enum
+ {
+ VERT_ATTRIB_POS = 0,
+ VERT_ATTRIB_WEIGHT = 1,
+ VERT_ATTRIB_NORMAL = 2,
+ VERT_ATTRIB_COLOR0 = 3,
+ VERT_ATTRIB_COLOR1 = 4,
+ VERT_ATTRIB_FOG = 5,
+ VERT_ATTRIB_COLOR_INDEX = 6,
+ VERT_ATTRIB_EDGEFLAG = 7,
+ VERT_ATTRIB_TEX0 = 8,
+ VERT_ATTRIB_TEX1 = 9,
+ VERT_ATTRIB_TEX2 = 10,
+ VERT_ATTRIB_TEX3 = 11,
+ VERT_ATTRIB_TEX4 = 12,
+ VERT_ATTRIB_TEX5 = 13,
+ VERT_ATTRIB_TEX6 = 14,
+ VERT_ATTRIB_TEX7 = 15,
+ VERT_ATTRIB_POINT_SIZE = 16,
+ VERT_ATTRIB_GENERIC0 = 17,
+ VERT_ATTRIB_GENERIC1 = 18,
+ VERT_ATTRIB_GENERIC2 = 19,
+ VERT_ATTRIB_GENERIC3 = 20,
+ VERT_ATTRIB_GENERIC4 = 21,
+ VERT_ATTRIB_GENERIC5 = 22,
+ VERT_ATTRIB_GENERIC6 = 23,
+ VERT_ATTRIB_GENERIC7 = 24,
+ VERT_ATTRIB_GENERIC8 = 25,
+ VERT_ATTRIB_GENERIC9 = 26,
+ VERT_ATTRIB_GENERIC10 = 27,
+ VERT_ATTRIB_GENERIC11 = 28,
+ VERT_ATTRIB_GENERIC12 = 29,
+ VERT_ATTRIB_GENERIC13 = 30,
+ VERT_ATTRIB_GENERIC14 = 31,
+ VERT_ATTRIB_GENERIC15 = 32,
+ VERT_ATTRIB_MAX = 33
+ } gl_vert_attrib;
+
+ const char *gl_vert_attrib_name(gl_vert_attrib attrib);
+
+ /**
+ * Symbolic constats to help iterating over
+ * specific blocks of vertex attributes.
+ *
+ * VERT_ATTRIB_FF
+ * includes all fixed function attributes as well as
+ * the aliased GL_NV_vertex_program shader attributes.
+ * VERT_ATTRIB_TEX
+ * include the classic texture coordinate attributes.
+ * Is a subset of VERT_ATTRIB_FF.
+ * VERT_ATTRIB_GENERIC
+ * include the OpenGL 2.0+ GLSL generic shader attributes.
+ * These alias the generic GL_ARB_vertex_shader attributes.
+ */
+ #define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i))
+ #define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0
+
+ #define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i))
+ #define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS
+
+ #define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i))
+ #define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS
+
+ /**
+ * Bitflags for vertex attributes.
+ * These are used in bitfields in many places.
+ */
+ /*@{*/
+ #define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS)
+ #define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT)
+ #define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL)
+ #define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0)
+ #define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1)
+ #define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG)
+ #define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX)
+ #define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG)
+ #define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0)
+ #define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1)
+ #define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2)
+ #define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3)
+ #define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4)
+ #define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5)
+ #define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6)
+ #define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7)
+ #define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE)
+ #define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0)
+
+ #define VERT_BIT(i) BITFIELD64_BIT(i)
+ #define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX)
+
+ #define VERT_BIT_FF(i) VERT_BIT(i)
+ #define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX)
+ #define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i))
+ #define VERT_BIT_TEX_ALL \
+ BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX)
+
+ #define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i))
+ #define VERT_BIT_GENERIC_ALL \
+ BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX)
+ /*@}*/
+
+
+ /**
+ * Indexes for vertex shader outputs, geometry shader inputs/outputs, and
+ * fragment shader inputs.
+ *
+ * Note that some of these values are not available to all pipeline stages.
+ *
+ * When this enum is updated, the following code must be updated too:
+ * - vertResults (in prog_print.c's arb_output_attrib_string())
+ * - fragAttribs (in prog_print.c's arb_input_attrib_string())
+ * - _mesa_varying_slot_in_fs()
+ */
+ typedef enum
+ {
+ VARYING_SLOT_POS,
+ VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */
+ VARYING_SLOT_COL1,
+ VARYING_SLOT_FOGC,
+ VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */
+ VARYING_SLOT_TEX1,
+ VARYING_SLOT_TEX2,
+ VARYING_SLOT_TEX3,
+ VARYING_SLOT_TEX4,
+ VARYING_SLOT_TEX5,
+ VARYING_SLOT_TEX6,
+ VARYING_SLOT_TEX7,
+ VARYING_SLOT_PSIZ, /* Does not appear in FS */
+ VARYING_SLOT_BFC0, /* Does not appear in FS */
+ VARYING_SLOT_BFC1, /* Does not appear in FS */
+ VARYING_SLOT_EDGE, /* Does not appear in FS */
+ VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */
+ VARYING_SLOT_CLIP_DIST0,
+ VARYING_SLOT_CLIP_DIST1,
+ VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */
+ VARYING_SLOT_LAYER, /* Appears as VS or GS output */
+ VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */
+ VARYING_SLOT_FACE, /* FS only */
+ VARYING_SLOT_PNTC, /* FS only */
+ VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */
+ VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */
+ VARYING_SLOT_VAR0, /* First generic varying slot */
+ /* the remaining are simply for the benefit of gl_varying_slot_name()
+ * and not to be construed as an upper bound:
+ */
+ VARYING_SLOT_VAR1,
+ VARYING_SLOT_VAR2,
+ VARYING_SLOT_VAR3,
+ VARYING_SLOT_VAR4,
+ VARYING_SLOT_VAR5,
+ VARYING_SLOT_VAR6,
+ VARYING_SLOT_VAR7,
+ VARYING_SLOT_VAR8,
+ VARYING_SLOT_VAR9,
+ VARYING_SLOT_VAR10,
+ VARYING_SLOT_VAR11,
+ VARYING_SLOT_VAR12,
+ VARYING_SLOT_VAR13,
+ VARYING_SLOT_VAR14,
+ VARYING_SLOT_VAR15,
+ VARYING_SLOT_VAR16,
+ VARYING_SLOT_VAR17,
+ VARYING_SLOT_VAR18,
+ VARYING_SLOT_VAR19,
+ VARYING_SLOT_VAR20,
+ VARYING_SLOT_VAR21,
+ VARYING_SLOT_VAR22,
+ VARYING_SLOT_VAR23,
+ VARYING_SLOT_VAR24,
+ VARYING_SLOT_VAR25,
+ VARYING_SLOT_VAR26,
+ VARYING_SLOT_VAR27,
+ VARYING_SLOT_VAR28,
+ VARYING_SLOT_VAR29,
+ VARYING_SLOT_VAR30,
+ VARYING_SLOT_VAR31,
+ } gl_varying_slot;
+
+
+ #define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING)
+ #define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX)
+ #define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING)
+
+ const char *gl_varying_slot_name(gl_varying_slot slot);
+
+ /**
+ * Bitflags for varying slots.
+ */
+ /*@{*/
+ #define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS)
+ #define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0)
+ #define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1)
+ #define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC)
+ #define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0)
+ #define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1)
+ #define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2)
+ #define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3)
+ #define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4)
+ #define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5)
+ #define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6)
+ #define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7)
+ #define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U))
+ #define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \
+ MAX_TEXTURE_COORD_UNITS)
+ #define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
+ #define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
+ #define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
+ #define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
+ #define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
+ #define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
+ #define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)
+ #define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID)
+ #define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER)
+ #define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)
+ #define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE)
+ #define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC)
+ #define VARYING_BIT_TESS_LEVEL_OUTER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER)
+ #define VARYING_BIT_TESS_LEVEL_INNER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER)
+ #define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V))
+ /*@}*/
+
+ /**
+ * Bitflags for system values.
+ */
+ #define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
+ #define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
+ #define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN)
+ #define SYSTEM_BIT_LOCAL_INVOCATION_ID ((uint64_t)1 << SYSTEM_VALUE_LOCAL_INVOCATION_ID)
+
+ /**
+ * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be
+ * one of these values. If a NIR variable's mode is nir_var_system_value, it
+ * will be one of these values.
+ */
+ typedef enum
+ {
+ /**
+ * \name Vertex shader system values
+ */
+ /*@{*/
+ /**
+ * OpenGL-style vertex ID.
+ *
+ * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the
+ * OpenGL 3.3 core profile spec says:
+ *
+ * "gl_VertexID holds the integer index i implicitly passed by
+ * DrawArrays or one of the other drawing commands defined in section
+ * 2.8.3."
+ *
+ * Section 2.8.3 (Drawing Commands) of the same spec says:
+ *
+ * "The commands....are equivalent to the commands with the same base
+ * name (without the BaseVertex suffix), except that the ith element
+ * transferred by the corresponding draw call will be taken from
+ * element indices[i] + basevertex of each enabled array."
+ *
+ * Additionally, the overview in the GL_ARB_shader_draw_parameters spec
+ * says:
+ *
+ * "In unextended GL, vertex shaders have inputs named gl_VertexID and
+ * gl_InstanceID, which contain, respectively the index of the vertex
+ * and instance. The value of gl_VertexID is the implicitly passed
+ * index of the vertex being processed, which includes the value of
+ * baseVertex, for those commands that accept it."
+ *
+ * gl_VertexID gets basevertex added in. This differs from DirectX where
+ * SV_VertexID does \b not get basevertex added in.
+ *
+ * \note
+ * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be
+ * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus
+ * \c SYSTEM_VALUE_BASE_VERTEX.
+ *
+ * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX
+ */
+ SYSTEM_VALUE_VERTEX_ID,
+
+ /**
+ * Instanced ID as supplied to gl_InstanceID
+ *
+ * Values assigned to gl_InstanceID always begin with zero, regardless of
+ * the value of baseinstance.
+ *
+ * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec
+ * says:
+ *
+ * "gl_InstanceID holds the integer instance number of the current
+ * primitive in an instanced draw call (see section 10.5)."
+ *
+ * Through a big chain of pseudocode, section 10.5 describes that
+ * baseinstance is not counted by gl_InstanceID. In that section, notice
+ *
+ * "If an enabled vertex attribute array is instanced (it has a
+ * non-zero divisor as specified by VertexAttribDivisor), the element
+ * index that is transferred to the GL, for all vertices, is given by
+ *
+ * floor(instance/divisor) + baseinstance
+ *
+ * If an array corresponding to an attribute required by a vertex
+ * shader is not enabled, then the corresponding element is taken from
+ * the current attribute state (see section 10.2)."
+ *
+ * Note that baseinstance is \b not included in the value of instance.
+ */
+ SYSTEM_VALUE_INSTANCE_ID,
+
++ /**
++ * Vulkan InstanceIndex.
++ *
++ * InstanceIndex = gl_InstanceID + gl_BaseInstance
++ */
++ SYSTEM_VALUE_INSTANCE_INDEX,
++
+ /**
+ * DirectX-style vertex ID.
+ *
+ * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include
+ * the value of basevertex.
+ *
+ * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX
+ */
+ SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+
+ /**
+ * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar
+ * functions.
+ *
+ * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
+ */
+ SYSTEM_VALUE_BASE_VERTEX,
+
+ /**
+ * Value of \c baseinstance passed to instanced draw entry points
+ *
+ * \sa SYSTEM_VALUE_INSTANCE_ID
+ */
+ SYSTEM_VALUE_BASE_INSTANCE,
+
+ /**
+ * From _ARB_shader_draw_parameters:
+ *
+ * "Additionally, this extension adds a further built-in variable,
+ * gl_DrawID to the shading language. This variable contains the index
+ * of the draw currently being processed by a Multi* variant of a
+ * drawing command (such as MultiDrawElements or
+ * MultiDrawArraysIndirect)."
+ *
+ * If GL_ARB_multi_draw_indirect is not supported, this is always 0.
+ */
+ SYSTEM_VALUE_DRAW_ID,
+ /*@}*/
+
+ /**
+ * \name Geometry shader system values
+ */
+ /*@{*/
+ SYSTEM_VALUE_INVOCATION_ID, /**< (Also in Tessellation Control shader) */
+ /*@}*/
+
+ /**
+ * \name Fragment shader system values
+ */
+ /*@{*/
+ SYSTEM_VALUE_FRAG_COORD,
+ SYSTEM_VALUE_FRONT_FACE,
+ SYSTEM_VALUE_SAMPLE_ID,
+ SYSTEM_VALUE_SAMPLE_POS,
+ SYSTEM_VALUE_SAMPLE_MASK_IN,
+ SYSTEM_VALUE_HELPER_INVOCATION,
+ /*@}*/
+
+ /**
+ * \name Tessellation Evaluation shader system values
+ */
+ /*@{*/
+ SYSTEM_VALUE_TESS_COORD,
+ SYSTEM_VALUE_VERTICES_IN, /**< Tessellation vertices in input patch */
+ SYSTEM_VALUE_PRIMITIVE_ID,
+ SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */
+ SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
+ /*@}*/
+
+ /**
+ * \name Compute shader system values
+ */
+ /*@{*/
+ SYSTEM_VALUE_LOCAL_INVOCATION_ID,
++ SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
++ SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+ SYSTEM_VALUE_WORK_GROUP_ID,
+ SYSTEM_VALUE_NUM_WORK_GROUPS,
+ /*@}*/
+
+ /**
+ * Driver internal vertex-count, used (for example) for drivers to
+ * calculate stride for stream-out outputs. Not externally visible.
+ */
+ SYSTEM_VALUE_VERTEX_CNT,
+
+ SYSTEM_VALUE_MAX /**< Number of values */
+ } gl_system_value;
+
+ const char *gl_system_value_name(gl_system_value sysval);
+
+ /**
+ * The possible interpolation qualifiers that can be applied to a fragment
+ * shader input in GLSL.
+ *
+ * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the
+ * gl_fragment_program data structure to 0 causes the default behavior.
+ */
+ enum glsl_interp_qualifier
+ {
+ INTERP_QUALIFIER_NONE = 0,
+ INTERP_QUALIFIER_SMOOTH,
+ INTERP_QUALIFIER_FLAT,
+ INTERP_QUALIFIER_NOPERSPECTIVE,
+ INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
+ };
+
+ const char *glsl_interp_qualifier_name(enum glsl_interp_qualifier qual);
+
+ /**
+ * Fragment program results
+ */
+ typedef enum
+ {
+ FRAG_RESULT_DEPTH = 0,
+ FRAG_RESULT_STENCIL = 1,
+ /* If a single color should be written to all render targets, this
+ * register is written. No FRAG_RESULT_DATAn will be written.
+ */
+ FRAG_RESULT_COLOR = 2,
+ FRAG_RESULT_SAMPLE_MASK = 3,
+
+ /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
+ * or ARB_fragment_program fragment.color[n]) color results. If
+ * any are written, FRAG_RESULT_COLOR will not be written.
+ * FRAG_RESULT_DATA1 and up are simply for the benefit of
+ * gl_frag_result_name() and not to be construed as an upper bound
+ */
+ FRAG_RESULT_DATA0 = 4,
+ FRAG_RESULT_DATA1,
+ FRAG_RESULT_DATA2,
+ FRAG_RESULT_DATA3,
+ FRAG_RESULT_DATA4,
+ FRAG_RESULT_DATA5,
+ FRAG_RESULT_DATA6,
+ FRAG_RESULT_DATA7,
+ } gl_frag_result;
+
+ const char *gl_frag_result_name(gl_frag_result result);
+
+ #define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
+
+ /**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
+ * a layout qualifier.
+ *
+ * \see enum ir_depth_layout
+ */
+ enum gl_frag_depth_layout
+ {
+ FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
+ FRAG_DEPTH_LAYOUT_ANY,
+ FRAG_DEPTH_LAYOUT_GREATER,
+ FRAG_DEPTH_LAYOUT_LESS,
+ FRAG_DEPTH_LAYOUT_UNCHANGED
+ };
+
+ #ifdef __cplusplus
+ } /* extern "C" */
+ #endif
+
+ #endif /* SHADER_ENUMS_H */
--- /dev/null
- #include "glsl/nir/nir.h"
+/*
+ * Copyright © 2015-2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_compiler.h"
+#include "brw_context.h"
++#include "nir.h"
+#include "main/errors.h"
+#include "util/debug.h"
+
+static void
+shader_debug_log_mesa(void *data, const char *fmt, ...)
+{
+ struct brw_context *brw = (struct brw_context *)data;
+ va_list args;
+
+ va_start(args, fmt);
+ GLuint msg_id = 0;
+ _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_OTHER,
+ MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
+ va_end(args);
+}
+
+static void
+shader_perf_log_mesa(void *data, const char *fmt, ...)
+{
+ struct brw_context *brw = (struct brw_context *)data;
+
+ va_list args;
+ va_start(args, fmt);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ va_list args_copy;
+ va_copy(args_copy, args);
+ vfprintf(stderr, fmt, args_copy);
+ va_end(args_copy);
+ }
+
+ if (brw->perf_debug) {
+ GLuint msg_id = 0;
+ _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_PERFORMANCE,
+ MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
+ }
+ va_end(args);
+}
+
+#define COMMON_OPTIONS \
+ /* In order to help allow for better CSE at the NIR level we tell NIR to \
+ * split all ffma instructions during opt_algebraic and we then re-combine \
+ * them as a later step. \
+ */ \
+ .lower_ffma = true, \
+ .lower_sub = true, \
+ .lower_fdiv = true, \
+ .lower_scmp = true, \
+ .lower_fmod = true, \
+ .lower_bitfield_extract = true, \
+ .lower_bitfield_insert = true, \
+ .lower_uadd_carry = true, \
+ .lower_usub_borrow = true, \
+ .lower_fdiv = true, \
+ .native_integers = true, \
+ .vertex_id_zero_based = true
+
+static const struct nir_shader_compiler_options scalar_nir_options = {
+ COMMON_OPTIONS,
+ .lower_pack_half_2x16 = true,
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_snorm_4x8 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_pack_unorm_4x8 = true,
+ .lower_unpack_half_2x16 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_snorm_4x8 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_unpack_unorm_4x8 = true,
+};
+
+static const struct nir_shader_compiler_options vector_nir_options = {
+ COMMON_OPTIONS,
+
+ /* In the vec4 backend, our dpN instruction replicates its result to all the
+ * components of a vec4. We would like NIR to give us replicated fdot
+ * instructions because it can optimize better for us.
+ */
+ .fdot_replicates = true,
+
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+};
+
+struct brw_compiler *
+brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
+{
+ struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
+
+ compiler->devinfo = devinfo;
+ compiler->shader_debug_log = shader_debug_log_mesa;
+ compiler->shader_perf_log = shader_perf_log_mesa;
+
+ brw_fs_alloc_reg_sets(compiler);
+ brw_vec4_alloc_reg_set(compiler);
+
+ compiler->scalar_stage[MESA_SHADER_VERTEX] =
+ devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
+ compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
+ compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
+ devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
+ compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
+ devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true);
+ compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
+ compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
+
+ /* We want the GLSL compiler to emit code that uses condition codes */
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
+ compiler->glsl_compiler_options[i].MaxIfDepth =
+ devinfo->gen < 6 ? 16 : UINT_MAX;
+
+ compiler->glsl_compiler_options[i].EmitCondCodes = true;
+ compiler->glsl_compiler_options[i].EmitNoNoise = true;
+ compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
+ compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
+ compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
+ compiler->glsl_compiler_options[i].LowerClipDistance = true;
+
+ bool is_scalar = compiler->scalar_stage[i];
+
+ compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+ compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+ compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
+
+ /* !ARB_gpu_shader5 */
+ if (devinfo->gen < 7)
+ compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
+
+ compiler->glsl_compiler_options[i].NirOptions =
+ is_scalar ? &scalar_nir_options : &vector_nir_options;
+
+ compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
+ }
+
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
+
+ if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
+ compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
+
+ compiler->glsl_compiler_options[MESA_SHADER_COMPUTE]
+ .LowerShaderSharedVariables = true;
+
+ return compiler;
+}
--- /dev/null
- -I$(top_srcdir)/src/glsl/nir \
+# Copyright © 2015 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+SUBDIRS = . tests
+
+vulkan_includedir = $(includedir)/vulkan
+
+vulkan_include_HEADERS = \
+ $(top_srcdir)/include/vulkan/vk_platform.h \
+ $(top_srcdir)/include/vulkan/vulkan.h \
+ $(top_srcdir)/include/vulkan/vulkan_intel.h
+
+# Used when generating entrypoints to filter out unwanted extensions
+VULKAN_ENTRYPOINT_CPPFLAGS = \
+ -I$(top_srcdir)/include/vulkan \
+ -DVK_USE_PLATFORM_XCB_KHR \
+ -DVK_USE_PLATFORM_WAYLAND_KHR
+
+lib_LTLIBRARIES = libvulkan.la
+
+check_LTLIBRARIES = libvulkan-test.la
+
+PER_GEN_LIBS = \
+ libanv-gen7.la \
+ libanv-gen75.la \
+ libanv-gen8.la \
+ libanv-gen9.la
+
+noinst_LTLIBRARIES = $(PER_GEN_LIBS)
+
+# The gallium includes are for the util/u_math.h include from main/macros.h
+
+AM_CPPFLAGS = \
+ $(INTEL_CFLAGS) \
+ $(VALGRIND_CFLAGS) \
+ $(DEFINES) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
- -I$(top_builddir)/src/glsl/nir \
++ -I$(top_srcdir)/src/compiler \
+ -I$(top_srcdir)/src/mapi \
+ -I$(top_srcdir)/src/mesa \
+ -I$(top_srcdir)/src/mesa/drivers/dri/common \
+ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/isl/ \
+ -I$(top_builddir)/src \
++ -I$(top_builddir)/src/compiler \
+ -I$(top_builddir)/src/vulkan
+
+libvulkan_la_CFLAGS = $(CFLAGS) -Wno-override-init
+
+VULKAN_SOURCES = \
+ anv_allocator.c \
+ anv_cmd_buffer.c \
+ anv_batch_chain.c \
+ anv_descriptor_set.c \
+ anv_device.c \
+ anv_dump.c \
+ anv_entrypoints.c \
+ anv_entrypoints.h \
+ anv_formats.c \
+ anv_image.c \
+ anv_intel.c \
+ anv_meta.c \
+ anv_meta_clear.c \
+ anv_meta_resolve.c \
+ anv_nir_apply_dynamic_offsets.c \
+ anv_nir_apply_pipeline_layout.c \
+ anv_nir_lower_push_constants.c \
+ anv_pass.c \
+ anv_pipeline.c \
+ anv_private.h \
+ anv_query.c \
+ anv_util.c \
+ anv_wsi.c \
+ anv_wsi_x11.c
+
+BUILT_SOURCES = \
+ anv_entrypoints.h \
+ anv_entrypoints.c
+
+libanv_gen7_la_SOURCES = \
+ genX_cmd_buffer.c \
+ genX_pipeline.c \
+ gen7_cmd_buffer.c \
+ gen7_pipeline.c \
+ gen7_state.c
+libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70
+
+libanv_gen75_la_SOURCES = \
+ genX_cmd_buffer.c \
+ genX_pipeline.c \
+ gen7_cmd_buffer.c \
+ gen7_pipeline.c \
+ gen7_state.c
+libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75
+
+libanv_gen8_la_SOURCES = \
+ genX_cmd_buffer.c \
+ genX_pipeline.c \
+ gen8_cmd_buffer.c \
+ gen8_pipeline.c \
+ gen8_state.c
+libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80
+
+libanv_gen9_la_SOURCES = \
+ genX_cmd_buffer.c \
+ genX_pipeline.c \
+ gen8_cmd_buffer.c \
+ gen8_pipeline.c \
+ gen8_state.c
+libanv_gen9_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=90
+
+if HAVE_EGL_PLATFORM_WAYLAND
+BUILT_SOURCES += \
+ wayland-drm-protocol.c \
+ wayland-drm-client-protocol.h
+
+%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
+ $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
+
+%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
+ $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
+
+AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm
+VULKAN_SOURCES += \
+ wayland-drm-protocol.c \
+ anv_wsi_wayland.c
+libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM
+endif
+
+libvulkan_la_SOURCES = \
+ $(VULKAN_SOURCES) \
+ anv_gem.c
+
+anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
+ $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@
+
+anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
+ $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@
+
+CLEANFILES = $(BUILT_SOURCES)
+
+libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \
+ $(top_builddir)/src/isl/libisl.la \
+ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
+ ../mesa/libmesa.la \
+ ../mesa/drivers/dri/common/libdri_test_stubs.la \
+ -lpthread -ldl -lstdc++ \
+ $(PER_GEN_LIBS)
+
+# Libvulkan with dummy gem. Used for unit tests.
+
+libvulkan_test_la_SOURCES = \
+ $(VULKAN_SOURCES) \
+ anv_gem_stubs.c
+
+libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS)
+libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD)
+
+include $(top_srcdir)/install-lib-links.mk
--- /dev/null
- #include "glsl/nir/nir_builder.h"
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_meta.h"
+#include "anv_private.h"
++#include "nir/nir_builder.h"
+
+struct anv_render_pass anv_meta_dummy_renderpass = {0};
+
+static nir_shader *
+build_nir_vertex_shader(bool attr_flat)
+{
+ nir_builder b;
+
+ const struct glsl_type *vertex_type = glsl_vec4_type();
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+
+ nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vertex_type, "a_pos");
+ pos_in->data.location = VERT_ATTRIB_GENERIC0;
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vertex_type, "gl_Position");
+ pos_out->data.location = VARYING_SLOT_POS;
+ nir_copy_var(&b, pos_out, pos_in);
+
+ /* Add one more pass-through attribute. For clear shaders, this is used
+ * to store the color and for blit shaders it's the texture coordinate.
+ */
+ const struct glsl_type *attr_type = glsl_vec4_type();
+ nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in,
+ attr_type, "a_attr");
+ attr_in->data.location = VERT_ATTRIB_GENERIC1;
+ nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out,
+ attr_type, "v_attr");
+ attr_out->data.location = VARYING_SLOT_VAR0;
+ attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT :
+ INTERP_QUALIFIER_SMOOTH;
+ nir_copy_var(&b, attr_out, attr_in);
+
+ return b.shader;
+}
+
+static nir_shader *
+build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
+{
+ nir_builder b;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
+
+ const struct glsl_type *color_type = glsl_vec4_type();
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ glsl_vec4_type(), "v_attr");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+ /* Swizzle the array index which comes in as Z coordinate into the right
+ * position.
+ */
+ unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
+ nir_ssa_def *const tex_pos =
+ nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
+ (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
+ glsl_get_base_type(color_type));
+ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+ sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+ tex->sampler_dim = tex_dim;
+ tex->op = nir_texop_tex;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(tex_pos);
+ tex->dest_type = nir_type_float; /* TODO */
+ tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ tex->coord_components = tex_pos->num_components;
+ tex->sampler = nir_deref_var_create(tex, sampler);
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ color_type, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
+ nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+
+ return b.shader;
+}
+
+void
+anv_meta_save(struct anv_meta_saved_state *state,
+ const struct anv_cmd_buffer *cmd_buffer,
+ uint32_t dynamic_mask)
+{
+ state->old_pipeline = cmd_buffer->state.pipeline;
+ state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
+ memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings,
+ sizeof(state->old_vertex_bindings));
+
+ state->dynamic_mask = dynamic_mask;
+ anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic,
+ dynamic_mask);
+}
+
+void
+anv_meta_restore(const struct anv_meta_saved_state *state,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ cmd_buffer->state.pipeline = state->old_pipeline;
+ cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+ memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
+ sizeof(state->old_vertex_bindings));
+
+ cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1;
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
+ cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+
+ anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic,
+ state->dynamic_mask);
+ cmd_buffer->state.dirty |= state->dynamic_mask;
+
+ /* Since we've used the pipeline with the VS disabled, set
+ * need_query_wa. See CmdBeginQuery.
+ */
+ cmd_buffer->state.need_query_wa = true;
+}
+
+VkImageViewType
+anv_meta_get_view_type(const struct anv_image *image)
+{
+ switch (image->type) {
+ case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
+ case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
+ case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
+ default:
+ unreachable("bad VkImageViewType");
+ }
+}
+
+/**
+ * When creating a destination VkImageView, this function provides the needed
+ * VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
+ */
+uint32_t
+anv_meta_get_iview_layer(const struct anv_image *dest_image,
+ const VkImageSubresourceLayers *dest_subresource,
+ const VkOffset3D *dest_offset)
+{
+ switch (dest_image->type) {
+ case VK_IMAGE_TYPE_1D:
+ case VK_IMAGE_TYPE_2D:
+ return dest_subresource->baseArrayLayer;
+ case VK_IMAGE_TYPE_3D:
+ /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
+ * but meta does it anyway. When doing so, we translate the
+ * destination's z offset into an array offset.
+ */
+ return dest_offset->z;
+ default:
+ assert(!"bad VkImageType");
+ return 0;
+ }
+}
+
+static VkResult
+anv_device_init_meta_blit_state(struct anv_device *device)
+{
+ VkResult result;
+
+ result = anv_CreateRenderPass(anv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* We don't use a vertex shader for clearing, but instead build and pass
+ * the VUEs directly to the rasterization backend. However, we do need
+ * to provide GLSL source for the vertex shader so that the compiler
+ * does not dead-code our inputs.
+ */
+ struct anv_shader_module vs = {
+ .nir = build_nir_vertex_shader(false),
+ };
+
+ struct anv_shader_module fs_1d = {
+ .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
+ };
+
+ struct anv_shader_module fs_2d = {
+ .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
+ };
+
+ struct anv_shader_module fs_3d = {
+ .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
+ };
+
+ VkPipelineVertexInputStateCreateInfo vi_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 2,
+ .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ {
+ .binding = 0,
+ .stride = 0,
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ {
+ .binding = 1,
+ .stride = 5 * sizeof(float),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ },
+ .vertexAttributeDescriptionCount = 3,
+ .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ {
+ /* VUE Header */
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_UINT,
+ .offset = 0
+ },
+ {
+ /* Position */
+ .location = 1,
+ .binding = 1,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = 0
+ },
+ {
+ /* Texture Coordinate */
+ .location = 2,
+ .binding = 1,
+ .format = VK_FORMAT_R32G32B32_SFLOAT,
+ .offset = 8
+ }
+ }
+ };
+
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL
+ },
+ }
+ };
+ result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+ &ds_layout_info,
+ &device->meta_state.alloc,
+ &device->meta_state.blit.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail_render_pass;
+
+ result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+ &(VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.blit.ds_layout,
+ },
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
+ if (result != VK_SUCCESS)
+ goto fail_descriptor_set_layout;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = anv_shader_module_to_handle(&vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL
+ }, {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+ .pName = "main",
+ .pSpecializationInfo = NULL
+ },
+ };
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = &vi_create_info,
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+ },
+ .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ { .colorWriteMask =
+ VK_COLOR_COMPONENT_A_BIT |
+ VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT },
+ }
+ },
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates = (VkDynamicState[]) {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.blit.pipeline_layout,
+ .renderPass = device->meta_state.blit.render_pass,
+ .subpass = 0,
+ };
+
+ const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
+ .color_attachment_count = -1,
+ .use_repclear = false,
+ .disable_viewport = true,
+ .disable_scissor = true,
+ .disable_vs = true,
+ .use_rectlist = true
+ };
+
+ pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
+ result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ VK_NULL_HANDLE,
+ &vk_pipeline_info, &anv_pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline_layout;
+
+ pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
+ result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ VK_NULL_HANDLE,
+ &vk_pipeline_info, &anv_pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline_1d;
+
+ pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
+ result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ VK_NULL_HANDLE,
+ &vk_pipeline_info, &anv_pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline_2d;
+
+ ralloc_free(vs.nir);
+ ralloc_free(fs_1d.nir);
+ ralloc_free(fs_2d.nir);
+ ralloc_free(fs_3d.nir);
+
+ return VK_SUCCESS;
+
+ fail_pipeline_2d:
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_2d_src,
+ &device->meta_state.alloc);
+
+ fail_pipeline_1d:
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_1d_src,
+ &device->meta_state.alloc);
+
+ fail_pipeline_layout:
+ anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_layout,
+ &device->meta_state.alloc);
+ fail_descriptor_set_layout:
+ anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ device->meta_state.blit.ds_layout,
+ &device->meta_state.alloc);
+ fail_render_pass:
+ anv_DestroyRenderPass(anv_device_to_handle(device),
+ device->meta_state.blit.render_pass,
+ &device->meta_state.alloc);
+
+ ralloc_free(vs.nir);
+ ralloc_free(fs_1d.nir);
+ ralloc_free(fs_2d.nir);
+ ralloc_free(fs_3d.nir);
+ fail:
+ return result;
+}
+
+static void
+meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_meta_saved_state *saved_state)
+{
+ anv_meta_save(saved_state, cmd_buffer,
+ (1 << VK_DYNAMIC_STATE_VIEWPORT));
+}
+
+struct blit_region {
+ VkOffset3D src_offset;
+ VkExtent3D src_extent;
+ VkOffset3D dest_offset;
+ VkExtent3D dest_extent;
+};
+
+/* Returns the user-provided VkBufferImageCopy::imageOffset in units of
+ * elements rather than texels. One element equals one texel or one block
+ * if Image is uncompressed or compressed, respectively.
+ */
+static struct VkOffset3D
+meta_region_offset_el(const struct anv_image * image,
+ const struct VkOffset3D * offset)
+{
+ const struct isl_format_layout * isl_layout = image->format->isl_layout;
+ return (VkOffset3D) {
+ .x = offset->x / isl_layout->bw,
+ .y = offset->y / isl_layout->bh,
+ .z = offset->z / isl_layout->bd,
+ };
+}
+
+/* Returns the user-provided VkBufferImageCopy::imageExtent in units of
+ * elements rather than texels. One element equals one texel or one block
+ * if Image is uncompressed or compressed, respectively.
+ */
+static struct VkExtent3D
+meta_region_extent_el(const VkFormat format,
+ const struct VkExtent3D * extent)
+{
+ const struct isl_format_layout * isl_layout =
+ anv_format_for_vk_format(format)->isl_layout;
+ return (VkExtent3D) {
+ .width = DIV_ROUND_UP(extent->width , isl_layout->bw),
+ .height = DIV_ROUND_UP(extent->height, isl_layout->bh),
+ .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd),
+ };
+}
+
+static void
+meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_image *src_image,
+ struct anv_image_view *src_iview,
+ VkOffset3D src_offset,
+ VkExtent3D src_extent,
+ struct anv_image *dest_image,
+ struct anv_image_view *dest_iview,
+ VkOffset3D dest_offset,
+ VkExtent3D dest_extent,
+ VkFilter blit_filter)
+{
+ struct anv_device *device = cmd_buffer->device;
+ VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1;
+
+ struct blit_vb_data {
+ float pos[2];
+ float tex_coord[3];
+ } *vb_data;
+
+ assert(src_image->samples == dest_image->samples);
+
+ unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+ struct anv_state vb_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+ vb_data[0] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x + dest_extent.width,
+ dest_offset.y + dest_extent.height,
+ },
+ .tex_coord = {
+ (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width,
+ (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
+ },
+ };
+
+ vb_data[1] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x,
+ dest_offset.y + dest_extent.height,
+ },
+ .tex_coord = {
+ (float)src_offset.x / (float)src_iview->extent.width,
+ (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
+ },
+ };
+
+ vb_data[2] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x,
+ dest_offset.y,
+ },
+ .tex_coord = {
+ (float)src_offset.x / (float)src_iview->extent.width,
+ (float)src_offset.y / (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
+ },
+ };
+
+ anv_state_clflush(vb_state);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = vb_size,
+ .bo = &device->dynamic_state_block_pool.bo,
+ .offset = vb_state.offset,
+ };
+
+ anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ (VkBuffer[]) {
+ anv_buffer_to_handle(&vertex_buffer),
+ anv_buffer_to_handle(&vertex_buffer)
+ },
+ (VkDeviceSize[]) {
+ 0,
+ sizeof(struct anv_vue_header),
+ });
+
+ VkSampler sampler;
+ ANV_CALL(CreateSampler)(anv_device_to_handle(device),
+ &(VkSamplerCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = blit_filter,
+ .minFilter = blit_filter,
+ }, &cmd_buffer->pool->alloc, &sampler);
+
+ VkDescriptorSet set;
+ anv_AllocateDescriptorSets(anv_device_to_handle(device),
+ &(VkDescriptorSetAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = dummy_desc_pool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &device->meta_state.blit.ds_layout
+ }, &set);
+ anv_UpdateDescriptorSets(anv_device_to_handle(device),
+ 1, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = sampler,
+ .imageView = anv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ }, 0, NULL);
+
+ VkFramebuffer fb;
+ anv_CreateFramebuffer(anv_device_to_handle(device),
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ anv_image_view_to_handle(dest_iview),
+ },
+ .width = dest_iview->extent.width,
+ .height = dest_iview->extent.height,
+ .layers = 1
+ }, &cmd_buffer->pool->alloc, &fb);
+
+ ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit.render_pass,
+ .framebuffer = fb,
+ .renderArea = {
+ .offset = { dest_offset.x, dest_offset.y },
+ .extent = { dest_extent.width, dest_extent.height },
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ }, VK_SUBPASS_CONTENTS_INLINE);
+
+ VkPipeline pipeline;
+
+ switch (src_image->type) {
+ case VK_IMAGE_TYPE_1D:
+ pipeline = device->meta_state.blit.pipeline_1d_src;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ pipeline = device->meta_state.blit.pipeline_2d_src;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ pipeline = device->meta_state.blit.pipeline_3d_src;
+ break;
+ default:
+ unreachable(!"bad VkImageType");
+ }
+
+ if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
+ anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ }
+
+ anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport) {
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = dest_iview->extent.width,
+ .height = dest_iview->extent.height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ });
+
+ anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit.pipeline_layout, 0, 1,
+ &set, 0, NULL);
+
+ ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+ ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+ /* At the point where we emit the draw call, all data from the
+ * descriptor sets, etc. has been used. We are free to delete it.
+ */
+ anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set));
+ anv_DestroySampler(anv_device_to_handle(device), sampler,
+ &cmd_buffer->pool->alloc);
+ anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
+ &cmd_buffer->pool->alloc);
+}
+
+static void
+meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_meta_saved_state *saved_state)
+{
+ anv_meta_restore(saved_state, cmd_buffer);
+}
+
+static VkFormat
+vk_format_for_size(int bs)
+{
+ /* Note: We intentionally use the 4-channel formats whenever we can.
+ * This is so that, when we do a RGB <-> RGBX copy, the two formats will
+ * line up even though one of them is 3/4 the size of the other.
+ */
+ switch (bs) {
+ case 1: return VK_FORMAT_R8_UINT;
+ case 2: return VK_FORMAT_R8G8_UINT;
+ case 3: return VK_FORMAT_R8G8B8_UINT;
+ case 4: return VK_FORMAT_R8G8B8A8_UINT;
+ case 6: return VK_FORMAT_R16G16B16_UINT;
+ case 8: return VK_FORMAT_R16G16B16A16_UINT;
+ case 12: return VK_FORMAT_R32G32B32_UINT;
+ case 16: return VK_FORMAT_R32G32B32A32_UINT;
+ default:
+ unreachable("Invalid format block size");
+ }
+}
+
+static void
+do_buffer_copy(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_bo *src, uint64_t src_offset,
+ struct anv_bo *dest, uint64_t dest_offset,
+ int width, int height, VkFormat copy_format)
+{
+ VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+
+ VkImageCreateInfo image_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = copy_format,
+ .extent = {
+ .width = width,
+ .height = height,
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = 1,
+ .tiling = VK_IMAGE_TILING_LINEAR,
+ .usage = 0,
+ .flags = 0,
+ };
+
+ VkImage src_image;
+ image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT;
+ anv_CreateImage(vk_device, &image_info,
+ &cmd_buffer->pool->alloc, &src_image);
+
+ VkImage dest_image;
+ image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ anv_CreateImage(vk_device, &image_info,
+ &cmd_buffer->pool->alloc, &dest_image);
+
+ /* We could use a vk call to bind memory, but that would require
+ * creating a dummy memory object etc. so there's really no point.
+ */
+ anv_image_from_handle(src_image)->bo = src;
+ anv_image_from_handle(src_image)->offset = src_offset;
+ anv_image_from_handle(dest_image)->bo = dest;
+ anv_image_from_handle(dest_image)->offset = dest_offset;
+
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = src_image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = copy_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0);
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = dest_image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = copy_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ },
+ cmd_buffer, 0);
+
+ meta_emit_blit(cmd_buffer,
+ anv_image_from_handle(src_image),
+ &src_iview,
+ (VkOffset3D) { 0, 0, 0 },
+ (VkExtent3D) { width, height, 1 },
+ anv_image_from_handle(dest_image),
+ &dest_iview,
+ (VkOffset3D) { 0, 0, 0 },
+ (VkExtent3D) { width, height, 1 },
+ VK_FILTER_NEAREST);
+
+ anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc);
+ anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc);
+}
+
+void anv_CmdCopyBuffer(
+ VkCommandBuffer commandBuffer,
+ VkBuffer srcBuffer,
+ VkBuffer destBuffer,
+ uint32_t regionCount,
+ const VkBufferCopy* pRegions)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
+ ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer);
+
+ struct anv_meta_saved_state saved_state;
+
+ meta_prepare_blit(cmd_buffer, &saved_state);
+
+ for (unsigned r = 0; r < regionCount; r++) {
+ uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
+ uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
+ uint64_t copy_size = pRegions[r].size;
+
+ /* First, we compute the biggest format that can be used with the
+ * given offsets and size.
+ */
+ int bs = 16;
+
+ int fs = ffs(src_offset) - 1;
+ if (fs != -1)
+ bs = MIN2(bs, 1 << fs);
+ assert(src_offset % bs == 0);
+
+ fs = ffs(dest_offset) - 1;
+ if (fs != -1)
+ bs = MIN2(bs, 1 << fs);
+ assert(dest_offset % bs == 0);
+
+ fs = ffs(pRegions[r].size) - 1;
+ if (fs != -1)
+ bs = MIN2(bs, 1 << fs);
+ assert(pRegions[r].size % bs == 0);
+
+ VkFormat copy_format = vk_format_for_size(bs);
+
+ /* This is maximum possible width/height our HW can handle */
+ uint64_t max_surface_dim = 1 << 14;
+
+ /* First, we make a bunch of max-sized copies */
+ uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs;
+ while (copy_size >= max_copy_size) {
+ do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
+ dest_buffer->bo, dest_offset,
+ max_surface_dim, max_surface_dim, copy_format);
+ copy_size -= max_copy_size;
+ src_offset += max_copy_size;
+ dest_offset += max_copy_size;
+ }
+
+ uint64_t height = copy_size / (max_surface_dim * bs);
+ assert(height < max_surface_dim);
+ if (height != 0) {
+ uint64_t rect_copy_size = height * max_surface_dim * bs;
+ do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
+ dest_buffer->bo, dest_offset,
+ max_surface_dim, height, copy_format);
+ copy_size -= rect_copy_size;
+ src_offset += rect_copy_size;
+ dest_offset += rect_copy_size;
+ }
+
+ if (copy_size != 0) {
+ do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
+ dest_buffer->bo, dest_offset,
+ copy_size / bs, 1, copy_format);
+ }
+ }
+
+ meta_finish_blit(cmd_buffer, &saved_state);
+}
+
+void anv_CmdUpdateBuffer(
+ VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize dataSize,
+ const uint32_t* pData)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
+ struct anv_meta_saved_state saved_state;
+
+ meta_prepare_blit(cmd_buffer, &saved_state);
+
+ /* We can't quite grab a full block because the state stream needs a
+ * little data at the top to build its linked list.
+ */
+ const uint32_t max_update_size =
+ cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
+
+ assert(max_update_size < (1 << 14) * 4);
+
+ while (dataSize) {
+ const uint32_t copy_size = MIN2(dataSize, max_update_size);
+
+ struct anv_state tmp_data =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
+
+ memcpy(tmp_data.map, pData, copy_size);
+
+ VkFormat format;
+ int bs;
+ if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) {
+ format = VK_FORMAT_R32G32B32A32_UINT;
+ bs = 16;
+ } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) {
+ format = VK_FORMAT_R32G32_UINT;
+ bs = 8;
+ } else {
+ assert((copy_size & 3) == 0 && (dstOffset & 3) == 0);
+ format = VK_FORMAT_R32_UINT;
+ bs = 4;
+ }
+
+ do_buffer_copy(cmd_buffer,
+ &cmd_buffer->device->dynamic_state_block_pool.bo,
+ tmp_data.offset,
+ dst_buffer->bo, dst_buffer->offset + dstOffset,
+ copy_size / bs, 1, format);
+
+ dataSize -= copy_size;
+ dstOffset += copy_size;
+ pData = (void *)pData + copy_size;
+ }
+}
+
+static VkFormat
+choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect)
+{
+ assert(__builtin_popcount(aspect) == 1);
+
+ struct isl_surf *surf =
+ &anv_image_get_surface_for_aspect_mask(image, aspect)->isl;
+
+ /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT
+ * formats for the source and destination image views.
+ *
+ * From the Vulkan spec (2015-12-30):
+ *
+ * vkCmdCopyImage performs image copies in a similar manner to a host
+ * memcpy. It does not perform general-purpose conversions such as
+ * scaling, resizing, blending, color-space conversion, or format
+ * conversions. Rather, it simply copies raw image data. vkCmdCopyImage
+ * can copy between images with different formats, provided the formats
+ * are compatible as defined below.
+ *
+ * [The spec later defines compatibility as having the same number of
+ * bytes per block].
+ */
+ return vk_format_for_size(isl_format_layouts[surf->format].bs);
+}
+
+static VkFormat
+choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect)
+{
+ assert(__builtin_popcount(aspect) == 1);
+
+ /* vkCmdCopy* commands behave like memcpy. Therefore we choose
+ * compatable UINT formats for the source and destination image views.
+ *
+ * For the buffer, we go back to the original image format and get a
+ * the format as if it were linear. This way, for RGB formats, we get
+ * an RGB format here even if the tiled image is RGBA. XXX: This doesn't
+ * work if the buffer is the destination.
+ */
+ enum isl_format linear_format = anv_get_isl_format(format, aspect,
+ VK_IMAGE_TILING_LINEAR,
+ NULL);
+
+ return vk_format_for_size(isl_format_layouts[linear_format].bs);
+}
+
+void anv_CmdCopyImage(
+ VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkImage destImage,
+ VkImageLayout destImageLayout,
+ uint32_t regionCount,
+ const VkImageCopy* pRegions)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ struct anv_meta_saved_state saved_state;
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * vkCmdCopyImage can be used to copy image data between multisample
+ * images, but both images must have the same number of samples.
+ */
+ assert(src_image->samples == dest_image->samples);
+
+ meta_prepare_blit(cmd_buffer, &saved_state);
+
+ for (unsigned r = 0; r < regionCount; r++) {
+ assert(pRegions[r].srcSubresource.aspectMask ==
+ pRegions[r].dstSubresource.aspectMask);
+
+ VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask;
+
+ VkFormat src_format = choose_iview_format(src_image, aspect);
+ VkFormat dst_format = choose_iview_format(dest_image, aspect);
+
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = srcImage,
+ .viewType = anv_meta_get_view_type(src_image),
+ .format = src_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+ .layerCount = pRegions[r].dstSubresource.layerCount,
+ },
+ },
+ cmd_buffer, 0);
+
+ const VkOffset3D dest_offset = {
+ .x = pRegions[r].dstOffset.x,
+ .y = pRegions[r].dstOffset.y,
+ .z = 0,
+ };
+
+ unsigned num_slices;
+ if (src_image->type == VK_IMAGE_TYPE_3D) {
+ assert(pRegions[r].srcSubresource.layerCount == 1 &&
+ pRegions[r].dstSubresource.layerCount == 1);
+ num_slices = pRegions[r].extent.depth;
+ } else {
+ assert(pRegions[r].srcSubresource.layerCount ==
+ pRegions[r].dstSubresource.layerCount);
+ assert(pRegions[r].extent.depth == 1);
+ num_slices = pRegions[r].dstSubresource.layerCount;
+ }
+
+ const uint32_t dest_base_array_slice =
+ anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
+ &pRegions[r].dstOffset);
+
+ for (unsigned slice = 0; slice < num_slices; slice++) {
+ VkOffset3D src_offset = pRegions[r].srcOffset;
+ src_offset.z += slice;
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = destImage,
+ .viewType = anv_meta_get_view_type(dest_image),
+ .format = dst_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_base_array_slice + slice,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0);
+
+ meta_emit_blit(cmd_buffer,
+ src_image, &src_iview,
+ src_offset,
+ pRegions[r].extent,
+ dest_image, &dest_iview,
+ dest_offset,
+ pRegions[r].extent,
+ VK_FILTER_NEAREST);
+ }
+ }
+
+ meta_finish_blit(cmd_buffer, &saved_state);
+}
+
+void anv_CmdBlitImage(
+ VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkImage destImage,
+ VkImageLayout destImageLayout,
+ uint32_t regionCount,
+ const VkImageBlit* pRegions,
+ VkFilter filter)
+
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ struct anv_meta_saved_state saved_state;
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * vkCmdBlitImage must not be used for multisampled source or
+ * destination images. Use vkCmdResolveImage for this purpose.
+ */
+ assert(src_image->samples == 1);
+ assert(dest_image->samples == 1);
+
+ anv_finishme("respect VkFilter");
+
+ meta_prepare_blit(cmd_buffer, &saved_state);
+
+ for (unsigned r = 0; r < regionCount; r++) {
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = srcImage,
+ .viewType = anv_meta_get_view_type(src_image),
+ .format = src_image->vk_format,
+ .subresourceRange = {
+ .aspectMask = pRegions[r].srcSubresource.aspectMask,
+ .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0);
+
+ const VkOffset3D dest_offset = {
+ .x = pRegions[r].dstOffsets[0].x,
+ .y = pRegions[r].dstOffsets[0].y,
+ .z = 0,
+ };
+
+ if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
+ pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
+ pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
+ pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
+ anv_finishme("FINISHME: Allow flipping in blits");
+
+ const VkExtent3D dest_extent = {
+ .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
+ .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
+ };
+
+ const VkExtent3D src_extent = {
+ .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
+ .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
+ };
+
+ const uint32_t dest_array_slice =
+ anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
+ &pRegions[r].dstOffsets[0]);
+
+ if (pRegions[r].srcSubresource.layerCount > 1)
+ anv_finishme("FINISHME: copy multiple array layers");
+
+ if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
+ pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
+ anv_finishme("FINISHME: copy multiple depth layers");
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = destImage,
+ .viewType = anv_meta_get_view_type(dest_image),
+ .format = dest_image->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_array_slice,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0);
+
+ meta_emit_blit(cmd_buffer,
+ src_image, &src_iview,
+ pRegions[r].srcOffsets[0], src_extent,
+ dest_image, &dest_iview,
+ dest_offset, dest_extent,
+ filter);
+ }
+
+ meta_finish_blit(cmd_buffer, &saved_state);
+}
+
+static struct anv_image *
+make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format,
+ VkImageUsageFlags usage,
+ VkImageType image_type,
+ const VkAllocationCallbacks *alloc,
+ const VkBufferImageCopy *copy)
+{
+ ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer);
+
+ VkExtent3D extent = copy->imageExtent;
+ if (copy->bufferRowLength)
+ extent.width = copy->bufferRowLength;
+ if (copy->bufferImageHeight)
+ extent.height = copy->bufferImageHeight;
+ extent.depth = 1;
+ extent = meta_region_extent_el(format, &extent);
+
+ VkImageAspectFlags aspect = copy->imageSubresource.aspectMask;
+ VkFormat buffer_format = choose_buffer_format(format, aspect);
+
+ VkImage vk_image;
+ VkResult result = anv_CreateImage(vk_device,
+ &(VkImageCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = buffer_format,
+ .extent = extent,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = 1,
+ .tiling = VK_IMAGE_TILING_LINEAR,
+ .usage = usage,
+ .flags = 0,
+ }, alloc, &vk_image);
+ assert(result == VK_SUCCESS);
+
+ ANV_FROM_HANDLE(anv_image, image, vk_image);
+
+ /* We could use a vk call to bind memory, but that would require
+ * creating a dummy memory object etc. so there's really no point.
+ */
+ image->bo = buffer->bo;
+ image->offset = buffer->offset + copy->bufferOffset;
+
+ return image;
+}
+
+void anv_CmdCopyBufferToImage(
+ VkCommandBuffer commandBuffer,
+ VkBuffer srcBuffer,
+ VkImage destImage,
+ VkImageLayout destImageLayout,
+ uint32_t regionCount,
+ const VkBufferImageCopy* pRegions)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ struct anv_meta_saved_state saved_state;
+
+ /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
+ * VK_SAMPLE_COUNT_1_BIT."
+ */
+ assert(dest_image->samples == 1);
+
+ meta_prepare_blit(cmd_buffer, &saved_state);
+
+ for (unsigned r = 0; r < regionCount; r++) {
+ VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
+
+ VkFormat image_format = choose_iview_format(dest_image, aspect);
+
+ struct anv_image *src_image =
+ make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format,
+ VK_IMAGE_USAGE_SAMPLED_BIT,
+ dest_image->type, &cmd_buffer->pool->alloc,
+ &pRegions[r]);
+
+ const uint32_t dest_base_array_slice =
+ anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource,
+ &pRegions[r].imageOffset);
+
+ unsigned num_slices_3d = pRegions[r].imageExtent.depth;
+ unsigned num_slices_array = pRegions[r].imageSubresource.layerCount;
+ unsigned slice_3d = 0;
+ unsigned slice_array = 0;
+ while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = anv_image_to_handle(src_image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = src_image->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ },
+ cmd_buffer, 0);
+
+ uint32_t img_x = 0;
+ uint32_t img_y = 0;
+ uint32_t img_o = 0;
+ if (isl_format_is_compressed(dest_image->format->surface_format))
+ isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev,
+ &dest_image->color_surface.isl,
+ pRegions[r].imageSubresource.mipLevel,
+ pRegions[r].imageSubresource.baseArrayLayer + slice_array,
+ pRegions[r].imageOffset.z + slice_3d,
+ &img_o, &img_x, &img_y);
+
+ VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset);
+ dest_offset_el.x += img_x;
+ dest_offset_el.y += img_y;
+ dest_offset_el.z = 0;
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = anv_image_to_handle(dest_image),
+ .viewType = anv_meta_get_view_type(dest_image),
+ .format = image_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = pRegions[r].imageSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_base_array_slice +
+ slice_array + slice_3d,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, img_o);
+
+ const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format,
+ &pRegions[r].imageExtent);
+
+ meta_emit_blit(cmd_buffer,
+ src_image,
+ &src_iview,
+ (VkOffset3D){0, 0, 0},
+ img_extent_el,
+ dest_image,
+ &dest_iview,
+ dest_offset_el,
+ img_extent_el,
+ VK_FILTER_NEAREST);
+
+ /* Once we've done the blit, all of the actual information about
+ * the image is embedded in the command buffer so we can just
+ * increment the offset directly in the image effectively
+ * re-binding it to different backing memory.
+ */
+ src_image->offset += src_image->extent.width *
+ src_image->extent.height *
+ src_image->format->isl_layout->bs;
+
+ if (dest_image->type == VK_IMAGE_TYPE_3D)
+ slice_3d++;
+ else
+ slice_array++;
+ }
+
+ anv_DestroyImage(vk_device, anv_image_to_handle(src_image),
+ &cmd_buffer->pool->alloc);
+ }
+
+ meta_finish_blit(cmd_buffer, &saved_state);
+}
+
+void anv_CmdCopyImageToBuffer(
+ VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkBuffer destBuffer,
+ uint32_t regionCount,
+ const VkBufferImageCopy* pRegions)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ struct anv_meta_saved_state saved_state;
+
+
+ /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to
+ * VK_SAMPLE_COUNT_1_BIT."
+ */
+ assert(src_image->samples == 1);
+
+ meta_prepare_blit(cmd_buffer, &saved_state);
+
+ for (unsigned r = 0; r < regionCount; r++) {
+ VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
+
+ VkFormat image_format = choose_iview_format(src_image, aspect);
+
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = srcImage,
+ .viewType = anv_meta_get_view_type(src_image),
+ .format = image_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = pRegions[r].imageSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer,
+ .layerCount = pRegions[r].imageSubresource.layerCount,
+ },
+ },
+ cmd_buffer, 0);
+
+ struct anv_image *dest_image =
+ make_image_for_buffer(vk_device, destBuffer, src_image->vk_format,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ src_image->type, &cmd_buffer->pool->alloc,
+ &pRegions[r]);
+
+ unsigned num_slices;
+ if (src_image->type == VK_IMAGE_TYPE_3D) {
+ assert(pRegions[r].imageSubresource.layerCount == 1);
+ num_slices = pRegions[r].imageExtent.depth;
+ } else {
+ assert(pRegions[r].imageExtent.depth == 1);
+ num_slices = pRegions[r].imageSubresource.layerCount;
+ }
+
+ for (unsigned slice = 0; slice < num_slices; slice++) {
+ VkOffset3D src_offset = pRegions[r].imageOffset;
+ src_offset.z += slice;
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = anv_image_to_handle(dest_image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = dest_image->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0);
+
+ meta_emit_blit(cmd_buffer,
+ anv_image_from_handle(srcImage),
+ &src_iview,
+ src_offset,
+ pRegions[r].imageExtent,
+ dest_image,
+ &dest_iview,
+ (VkOffset3D) { 0, 0, 0 },
+ pRegions[r].imageExtent,
+ VK_FILTER_NEAREST);
+
+ /* Once we've done the blit, all of the actual information about
+ * the image is embedded in the command buffer so we can just
+ * increment the offset directly in the image effectively
+ * re-binding it to different backing memory.
+ */
+ dest_image->offset += dest_image->extent.width *
+ dest_image->extent.height *
+ src_image->format->isl_layout->bs;
+ }
+
+ anv_DestroyImage(vk_device, anv_image_to_handle(dest_image),
+ &cmd_buffer->pool->alloc);
+ }
+
+ meta_finish_blit(cmd_buffer, &saved_state);
+}
+
+static void *
+meta_alloc(void* _device, size_t size, size_t alignment,
+ VkSystemAllocationScope allocationScope)
+{
+ struct anv_device *device = _device;
+ return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+}
+
+static void *
+meta_realloc(void* _device, void *original, size_t size, size_t alignment,
+ VkSystemAllocationScope allocationScope)
+{
+ struct anv_device *device = _device;
+ return device->alloc.pfnReallocation(device->alloc.pUserData, original,
+ size, alignment,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+}
+
+static void
+meta_free(void* _device, void *data)
+{
+ struct anv_device *device = _device;
+ return device->alloc.pfnFree(device->alloc.pUserData, data);
+}
+
+VkResult
+anv_device_init_meta(struct anv_device *device)
+{
+ VkResult result;
+
+ device->meta_state.alloc = (VkAllocationCallbacks) {
+ .pUserData = device,
+ .pfnAllocation = meta_alloc,
+ .pfnReallocation = meta_realloc,
+ .pfnFree = meta_free,
+ };
+
+ result = anv_device_init_meta_clear_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_clear;
+
+ result = anv_device_init_meta_resolve_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_resolve;
+
+ result = anv_device_init_meta_blit_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_blit;
+
+ return VK_SUCCESS;
+
+fail_blit:
+ anv_device_finish_meta_resolve_state(device);
+fail_resolve:
+ anv_device_finish_meta_clear_state(device);
+fail_clear:
+ return result;
+}
+
+void
+anv_device_finish_meta(struct anv_device *device)
+{
+ anv_device_finish_meta_resolve_state(device);
+ anv_device_finish_meta_clear_state(device);
+
+ /* Blit */
+ anv_DestroyRenderPass(anv_device_to_handle(device),
+ device->meta_state.blit.render_pass,
+ &device->meta_state.alloc);
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_1d_src,
+ &device->meta_state.alloc);
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_2d_src,
+ &device->meta_state.alloc);
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_3d_src,
+ &device->meta_state.alloc);
+ anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_layout,
+ &device->meta_state.alloc);
+ anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ device->meta_state.blit.ds_layout,
+ &device->meta_state.alloc);
+}
--- /dev/null
- #include "glsl/nir/nir_builder.h"
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_meta.h"
+#include "anv_private.h"
++#include "nir/nir_builder.h"
+
+/** Vertex attributes for color clears. */
+struct color_clear_vattrs {
+ struct anv_vue_header vue_header;
+ float position[2]; /**< 3DPRIM_RECTLIST */
+ VkClearColorValue color;
+};
+
+/** Vertex attributes for depthstencil clears. */
+struct depthstencil_clear_vattrs {
+ struct anv_vue_header vue_header;
+ float position[2]; /*<< 3DPRIM_RECTLIST */
+};
+
+static void
+meta_clear_begin(struct anv_meta_saved_state *saved_state,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ anv_meta_save(saved_state, cmd_buffer,
+ (1 << VK_DYNAMIC_STATE_VIEWPORT) |
+ (1 << VK_DYNAMIC_STATE_SCISSOR) |
+ (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE));
+
+ cmd_buffer->state.dynamic.viewport.count = 0;
+ cmd_buffer->state.dynamic.scissor.count = 0;
+}
+
+static void
+meta_clear_end(struct anv_meta_saved_state *saved_state,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ anv_meta_restore(saved_state, cmd_buffer);
+}
+
+static void
+build_color_shaders(struct nir_shader **out_vs,
+ struct nir_shader **out_fs,
+ uint32_t frag_output)
+{
+ nir_builder vs_b;
+ nir_builder fs_b;
+
+ nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
+ nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
+
+ vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
+ fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
+
+ const struct glsl_type *position_type = glsl_vec4_type();
+ const struct glsl_type *color_type = glsl_vec4_type();
+
+ nir_variable *vs_in_pos =
+ nir_variable_create(vs_b.shader, nir_var_shader_in, position_type,
+ "a_position");
+ vs_in_pos->data.location = VERT_ATTRIB_GENERIC0;
+
+ nir_variable *vs_out_pos =
+ nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
+ "gl_Position");
+ vs_out_pos->data.location = VARYING_SLOT_POS;
+
+ nir_variable *vs_in_color =
+ nir_variable_create(vs_b.shader, nir_var_shader_in, color_type,
+ "a_color");
+ vs_in_color->data.location = VERT_ATTRIB_GENERIC1;
+
+ nir_variable *vs_out_color =
+ nir_variable_create(vs_b.shader, nir_var_shader_out, color_type,
+ "v_color");
+ vs_out_color->data.location = VARYING_SLOT_VAR0;
+ vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT;
+
+ nir_variable *fs_in_color =
+ nir_variable_create(fs_b.shader, nir_var_shader_in, color_type,
+ "v_color");
+ fs_in_color->data.location = vs_out_color->data.location;
+ fs_in_color->data.interpolation = vs_out_color->data.interpolation;
+
+ nir_variable *fs_out_color =
+ nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
+ "f_color");
+ fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
+
+ nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
+ nir_copy_var(&vs_b, vs_out_color, vs_in_color);
+ nir_copy_var(&fs_b, fs_out_color, fs_in_color);
+
+ *out_vs = vs_b.shader;
+ *out_fs = fs_b.shader;
+}
+
+static VkResult
+create_pipeline(struct anv_device *device,
+ uint32_t samples,
+ struct nir_shader *vs_nir,
+ struct nir_shader *fs_nir,
+ const VkPipelineVertexInputStateCreateInfo *vi_state,
+ const VkPipelineDepthStencilStateCreateInfo *ds_state,
+ const VkPipelineColorBlendStateCreateInfo *cb_state,
+ const VkAllocationCallbacks *alloc,
+ bool use_repclear,
+ struct anv_pipeline **pipeline)
+{
+ VkDevice device_h = anv_device_to_handle(device);
+ VkResult result;
+
+ struct anv_shader_module vs_m = { .nir = vs_nir };
+ struct anv_shader_module fs_m = { .nir = fs_nir };
+
+ VkPipeline pipeline_h = VK_NULL_HANDLE;
+ result = anv_graphics_pipeline_create(device_h,
+ VK_NULL_HANDLE,
+ &(VkGraphicsPipelineCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = fs_nir ? 2 : 1,
+ .pStages = (VkPipelineShaderStageCreateInfo[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = anv_shader_module_to_handle(&vs_m),
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = anv_shader_module_to_handle(&fs_m),
+ .pName = "main",
+ },
+ },
+ .pVertexInputState = vi_state,
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .pViewports = NULL, /* dynamic */
+ .scissorCount = 1,
+ .pScissors = NULL, /* dynamic */
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasEnable = false,
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = samples,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]) { ~0 },
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pDepthStencilState = ds_state,
+ .pColorBlendState = cb_state,
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ /* The meta clear pipeline declares all state as dynamic.
+ * As a consequence, vkCmdBindPipeline writes no dynamic state
+ * to the cmd buffer. Therefore, at the end of the meta clear,
+ * we need only restore dynamic state was vkCmdSet.
+ */
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates = (VkDynamicState[]) {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass),
+ .subpass = 0,
+ },
+ &(struct anv_graphics_pipeline_create_info) {
+ .color_attachment_count = MAX_RTS,
+ .use_repclear = use_repclear,
+ .disable_viewport = true,
+ .disable_vs = true,
+ .use_rectlist = true
+ },
+ alloc,
+ &pipeline_h);
+
+ ralloc_free(vs_nir);
+ ralloc_free(fs_nir);
+
+ *pipeline = anv_pipeline_from_handle(pipeline_h);
+
+ return result;
+}
+
+static VkResult
+create_color_pipeline(struct anv_device *device,
+ uint32_t samples,
+ uint32_t frag_output,
+ struct anv_pipeline **pipeline)
+{
+ struct nir_shader *vs_nir;
+ struct nir_shader *fs_nir;
+ build_color_shaders(&vs_nir, &fs_nir, frag_output);
+
+ const VkPipelineVertexInputStateCreateInfo vi_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ {
+ .binding = 0,
+ .stride = sizeof(struct color_clear_vattrs),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ },
+ .vertexAttributeDescriptionCount = 3,
+ .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ {
+ /* VUE Header */
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_UINT,
+ .offset = offsetof(struct color_clear_vattrs, vue_header),
+ },
+ {
+ /* Position */
+ .location = 1,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct color_clear_vattrs, position),
+ },
+ {
+ /* Color */
+ .location = 2,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_SFLOAT,
+ .offset = offsetof(struct color_clear_vattrs, color),
+ },
+ },
+ };
+
+ const VkPipelineDepthStencilStateCreateInfo ds_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = false,
+ .depthWriteEnable = false,
+ .depthBoundsTestEnable = false,
+ .stencilTestEnable = false,
+ };
+
+ const VkPipelineColorBlendStateCreateInfo cb_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ {
+ .blendEnable = false,
+ .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
+ VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT,
+ },
+ },
+ };
+
+ /* Disable repclear because we do not want the compiler to replace the
+ * shader. We need the shader to write to the specified color attachment,
+ * but the repclear shader writes to all color attachments.
+ */
+ return
+ create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state,
+ &cb_state, &device->meta_state.alloc,
+ /*use_repclear*/ false, pipeline);
+}
+
+static void
+destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline)
+{
+ if (!pipeline)
+ return;
+
+ ANV_CALL(DestroyPipeline)(anv_device_to_handle(device),
+ anv_pipeline_to_handle(pipeline),
+ &device->meta_state.alloc);
+}
+
+void
+anv_device_finish_meta_clear_state(struct anv_device *device)
+{
+ struct anv_meta_state *state = &device->meta_state;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+ for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
+ destroy_pipeline(device, state->clear[i].color_pipelines[j]);
+ }
+
+ destroy_pipeline(device, state->clear[i].depth_only_pipeline);
+ destroy_pipeline(device, state->clear[i].stencil_only_pipeline);
+ destroy_pipeline(device, state->clear[i].depthstencil_pipeline);
+ }
+}
+
+static void
+emit_color_clear(struct anv_cmd_buffer *cmd_buffer,
+ const VkClearAttachment *clear_att,
+ const VkClearRect *clear_rect)
+{
+ struct anv_device *device = cmd_buffer->device;
+ const struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const uint32_t subpass_att = clear_att->colorAttachment;
+ const uint32_t pass_att = subpass->color_attachments[subpass_att];
+ const struct anv_image_view *iview = fb->attachments[pass_att];
+ const uint32_t samples = iview->image->samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ struct anv_pipeline *pipeline =
+ device->meta_state.clear[samples_log2].color_pipelines[subpass_att];
+ VkClearColorValue clear_value = clear_att->clearValue.color;
+
+ VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+ VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline);
+
+ assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
+ assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(clear_att->colorAttachment < subpass->color_count);
+
+ const struct color_clear_vattrs vertex_data[3] = {
+ {
+ .vue_header = { 0 },
+ .position = {
+ clear_rect->rect.offset.x,
+ clear_rect->rect.offset.y,
+ },
+ .color = clear_value,
+ },
+ {
+ .vue_header = { 0 },
+ .position = {
+ clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ clear_rect->rect.offset.y,
+ },
+ .color = clear_value,
+ },
+ {
+ .vue_header = { 0 },
+ .position = {
+ clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ clear_rect->rect.offset.y + clear_rect->rect.extent.height,
+ },
+ .color = clear_value,
+ },
+ };
+
+ struct anv_state state =
+ anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = sizeof(vertex_data),
+ .bo = &device->dynamic_state_block_pool.bo,
+ .offset = state.offset,
+ };
+
+ ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1,
+ (VkViewport[]) {
+ {
+ .x = 0,
+ .y = 0,
+ .width = fb->width,
+ .height = fb->height,
+ .minDepth = 0.0,
+ .maxDepth = 1.0,
+ },
+ });
+
+ ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1,
+ (VkRect2D[]) {
+ {
+ .offset = { 0, 0 },
+ .extent = { fb->width, fb->height },
+ }
+ });
+
+ ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1,
+ (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) },
+ (VkDeviceSize[]) { 0 });
+
+ if (cmd_buffer->state.pipeline != pipeline) {
+ ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline_h);
+ }
+
+ ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+}
+
+
+static void
+build_depthstencil_shader(struct nir_shader **out_vs)
+{
+ nir_builder vs_b;
+
+ nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
+
+ vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
+
+ const struct glsl_type *position_type = glsl_vec4_type();
+
+ nir_variable *vs_in_pos =
+ nir_variable_create(vs_b.shader, nir_var_shader_in, position_type,
+ "a_position");
+ vs_in_pos->data.location = VERT_ATTRIB_GENERIC0;
+
+ nir_variable *vs_out_pos =
+ nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
+ "gl_Position");
+ vs_out_pos->data.location = VARYING_SLOT_POS;
+
+ nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
+
+ *out_vs = vs_b.shader;
+}
+
+static VkResult
+create_depthstencil_pipeline(struct anv_device *device,
+ VkImageAspectFlags aspects,
+ uint32_t samples,
+ struct anv_pipeline **pipeline)
+{
+ struct nir_shader *vs_nir;
+
+ build_depthstencil_shader(&vs_nir);
+
+ const VkPipelineVertexInputStateCreateInfo vi_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ {
+ .binding = 0,
+ .stride = sizeof(struct depthstencil_clear_vattrs),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ },
+ .vertexAttributeDescriptionCount = 2,
+ .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ {
+ /* VUE Header */
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_UINT,
+ .offset = offsetof(struct depthstencil_clear_vattrs, vue_header),
+ },
+ {
+ /* Position */
+ .location = 1,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct depthstencil_clear_vattrs, position),
+ },
+ },
+ };
+
+ const VkPipelineDepthStencilStateCreateInfo ds_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+ .depthBoundsTestEnable = false,
+ .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
+ .front = {
+ .passOp = VK_STENCIL_OP_REPLACE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .writeMask = UINT32_MAX,
+ .reference = 0, /* dynamic */
+ },
+ .back = { 0 /* dont care */ },
+ };
+
+ const VkPipelineColorBlendStateCreateInfo cb_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 0,
+ .pAttachments = NULL,
+ };
+
+ return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state,
+ &cb_state, &device->meta_state.alloc,
+ /*use_repclear*/ true, pipeline);
+}
+
+static void
+emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer,
+ const VkClearAttachment *clear_att,
+ const VkClearRect *clear_rect)
+{
+ struct anv_device *device = cmd_buffer->device;
+ struct anv_meta_state *meta_state = &device->meta_state;
+ const struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const uint32_t pass_att = subpass->depth_stencil_attachment;
+ const struct anv_image_view *iview = fb->attachments[pass_att];
+ const uint32_t samples = iview->image->samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+ VkImageAspectFlags aspects = clear_att->aspectMask;
+
+ VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+
+ assert(samples_log2 < ARRAY_SIZE(meta_state->clear));
+ assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT ||
+ aspects == VK_IMAGE_ASPECT_STENCIL_BIT ||
+ aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
+ VK_IMAGE_ASPECT_STENCIL_BIT));
+ assert(pass_att != VK_ATTACHMENT_UNUSED);
+
+ const struct depthstencil_clear_vattrs vertex_data[3] = {
+ {
+ .vue_header = { 0 },
+ .position = {
+ clear_rect->rect.offset.x,
+ clear_rect->rect.offset.y,
+ },
+ },
+ {
+ .vue_header = { 0 },
+ .position = {
+ clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ clear_rect->rect.offset.y,
+ },
+ },
+ {
+ .vue_header = { 0 },
+ .position = {
+ clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ clear_rect->rect.offset.y + clear_rect->rect.extent.height,
+ },
+ },
+ };
+
+ struct anv_state state =
+ anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = sizeof(vertex_data),
+ .bo = &device->dynamic_state_block_pool.bo,
+ .offset = state.offset,
+ };
+
+ ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1,
+ (VkViewport[]) {
+ {
+ .x = 0,
+ .y = 0,
+ .width = fb->width,
+ .height = fb->height,
+
+ /* Ignored when clearing only stencil. */
+ .minDepth = clear_value.depth,
+ .maxDepth = clear_value.depth,
+ },
+ });
+
+ ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1,
+ (VkRect2D[]) {
+ {
+ .offset = { 0, 0 },
+ .extent = { fb->width, fb->height },
+ }
+ });
+
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
+ clear_value.stencil);
+ }
+
+ ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1,
+ (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) },
+ (VkDeviceSize[]) { 0 });
+
+ struct anv_pipeline *pipeline;
+ switch (aspects) {
+ case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
+ pipeline = meta_state->clear[samples_log2].depthstencil_pipeline;
+ break;
+ case VK_IMAGE_ASPECT_DEPTH_BIT:
+ pipeline = meta_state->clear[samples_log2].depth_only_pipeline;
+ break;
+ case VK_IMAGE_ASPECT_STENCIL_BIT:
+ pipeline = meta_state->clear[samples_log2].stencil_only_pipeline;
+ break;
+ default:
+ unreachable("expected depth or stencil aspect");
+ }
+
+ if (cmd_buffer->state.pipeline != pipeline) {
+ ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ anv_pipeline_to_handle(pipeline));
+ }
+
+ ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+}
+
+VkResult
+anv_device_init_meta_clear_state(struct anv_device *device)
+{
+ VkResult res;
+ struct anv_meta_state *state = &device->meta_state;
+
+ zero(device->meta_state.clear);
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+ uint32_t samples = 1 << i;
+
+ for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
+ res = create_color_pipeline(device, samples, /* frag_output */ j,
+ &state->clear[i].color_pipelines[j]);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ res = create_depthstencil_pipeline(device,
+ VK_IMAGE_ASPECT_DEPTH_BIT, samples,
+ &state->clear[i].depth_only_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depthstencil_pipeline(device,
+ VK_IMAGE_ASPECT_STENCIL_BIT, samples,
+ &state->clear[i].stencil_only_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depthstencil_pipeline(device,
+ VK_IMAGE_ASPECT_DEPTH_BIT |
+ VK_IMAGE_ASPECT_STENCIL_BIT, samples,
+ &state->clear[i].depthstencil_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ return VK_SUCCESS;
+
+fail:
+ anv_device_finish_meta_clear_state(device);
+ return res;
+}
+
+/**
+ * The parameters mean that same as those in vkCmdClearAttachments.
+ */
+static void
+emit_clear(struct anv_cmd_buffer *cmd_buffer,
+ const VkClearAttachment *clear_att,
+ const VkClearRect *clear_rect)
+{
+ if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ emit_color_clear(cmd_buffer, clear_att, clear_rect);
+ } else {
+ assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
+ VK_IMAGE_ASPECT_STENCIL_BIT));
+ emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect);
+ }
+}
+
+static bool
+subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
+{
+ const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+ uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+
+ for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+ uint32_t a = cmd_state->subpass->color_attachments[i];
+ if (cmd_state->attachments[a].pending_clear_aspects) {
+ return true;
+ }
+ }
+
+ if (ds != VK_ATTACHMENT_UNUSED &&
+ cmd_state->attachments[ds].pending_clear_aspects) {
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * Emit any pending attachment clears for the current subpass.
+ *
+ * @see anv_attachment_state::pending_clear_aspects
+ */
+void
+anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+ struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ struct anv_meta_saved_state saved_state;
+
+ if (!subpass_needs_clear(cmd_buffer))
+ return;
+
+ meta_clear_begin(&saved_state, cmd_buffer);
+
+ if (cmd_state->framebuffer->layers > 1)
+ anv_finishme("clearing multi-layer framebuffer");
+
+ VkClearRect clear_rect = {
+ .rect = {
+ .offset = { 0, 0 },
+ .extent = { fb->width, fb->height },
+ },
+ .baseArrayLayer = 0,
+ .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+ };
+
+ for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+ uint32_t a = cmd_state->subpass->color_attachments[i];
+
+ if (!cmd_state->attachments[a].pending_clear_aspects)
+ continue;
+
+ assert(cmd_state->attachments[a].pending_clear_aspects ==
+ VK_IMAGE_ASPECT_COLOR_BIT);
+
+ VkClearAttachment clear_att = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = i, /* Use attachment index relative to subpass */
+ .clearValue = cmd_state->attachments[a].clear_value,
+ };
+
+ emit_clear(cmd_buffer, &clear_att, &clear_rect);
+ cmd_state->attachments[a].pending_clear_aspects = 0;
+ }
+
+ uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+
+ if (ds != VK_ATTACHMENT_UNUSED &&
+ cmd_state->attachments[ds].pending_clear_aspects) {
+
+ VkClearAttachment clear_att = {
+ .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
+ .clearValue = cmd_state->attachments[ds].clear_value,
+ };
+
+ emit_clear(cmd_buffer, &clear_att, &clear_rect);
+ cmd_state->attachments[ds].pending_clear_aspects = 0;
+ }
+
+ meta_clear_end(&saved_state, cmd_buffer);
+}
+
+static void
+anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_image *image,
+ VkImageLayout image_layout,
+ const VkClearValue *clear_value,
+ uint32_t range_count,
+ const VkImageSubresourceRange *ranges)
+{
+ VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
+
+ for (uint32_t r = 0; r < range_count; r++) {
+ const VkImageSubresourceRange *range = &ranges[r];
+
+ for (uint32_t l = 0; l < range->levelCount; ++l) {
+ for (uint32_t s = 0; s < range->layerCount; ++s) {
+ struct anv_image_view iview;
+ anv_image_view_init(&iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = anv_image_to_handle(image),
+ .viewType = anv_meta_get_view_type(image),
+ .format = image->vk_format,
+ .subresourceRange = {
+ .aspectMask = range->aspectMask,
+ .baseMipLevel = range->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = range->baseArrayLayer + s,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0);
+
+ VkFramebuffer fb;
+ anv_CreateFramebuffer(device_h,
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ anv_image_view_to_handle(&iview),
+ },
+ .width = iview.extent.width,
+ .height = iview.extent.height,
+ .layers = 1
+ },
+ &cmd_buffer->pool->alloc,
+ &fb);
+
+ VkAttachmentDescription att_desc = {
+ .format = iview.vk_format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = image_layout,
+ .finalLayout = image_layout,
+ };
+
+ VkSubpassDescription subpass_desc = {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = NULL,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ };
+
+ const VkAttachmentReference att_ref = {
+ .attachment = 0,
+ .layout = image_layout,
+ };
+
+ if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ subpass_desc.colorAttachmentCount = 1;
+ subpass_desc.pColorAttachments = &att_ref;
+ } else {
+ subpass_desc.pDepthStencilAttachment = &att_ref;
+ }
+
+ VkRenderPass pass;
+ anv_CreateRenderPass(device_h,
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &att_desc,
+ .subpassCount = 1,
+ .pSubpasses = &subpass_desc,
+ },
+ &cmd_buffer->pool->alloc,
+ &pass);
+
+ ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderArea = {
+ .offset = { 0, 0, },
+ .extent = {
+ .width = iview.extent.width,
+ .height = iview.extent.height,
+ },
+ },
+ .renderPass = pass,
+ .framebuffer = fb,
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ VK_SUBPASS_CONTENTS_INLINE);
+
+ VkClearAttachment clear_att = {
+ .aspectMask = range->aspectMask,
+ .colorAttachment = 0,
+ .clearValue = *clear_value,
+ };
+
+ VkClearRect clear_rect = {
+ .rect = {
+ .offset = { 0, 0 },
+ .extent = { iview.extent.width, iview.extent.height },
+ },
+ .baseArrayLayer = range->baseArrayLayer,
+ .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+ };
+
+ emit_clear(cmd_buffer, &clear_att, &clear_rect);
+
+ ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+ ANV_CALL(DestroyRenderPass)(device_h, pass,
+ &cmd_buffer->pool->alloc);
+ ANV_CALL(DestroyFramebuffer)(device_h, fb,
+ &cmd_buffer->pool->alloc);
+ }
+ }
+ }
+}
+
+void anv_CmdClearColorImage(
+ VkCommandBuffer commandBuffer,
+ VkImage image_h,
+ VkImageLayout imageLayout,
+ const VkClearColorValue* pColor,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange* pRanges)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_image, image, image_h);
+ struct anv_meta_saved_state saved_state;
+
+ meta_clear_begin(&saved_state, cmd_buffer);
+
+ anv_cmd_clear_image(cmd_buffer, image, imageLayout,
+ (const VkClearValue *) pColor,
+ rangeCount, pRanges);
+
+ meta_clear_end(&saved_state, cmd_buffer);
+}
+
+void anv_CmdClearDepthStencilImage(
+ VkCommandBuffer commandBuffer,
+ VkImage image_h,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue* pDepthStencil,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange* pRanges)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_image, image, image_h);
+ struct anv_meta_saved_state saved_state;
+
+ meta_clear_begin(&saved_state, cmd_buffer);
+
+ anv_cmd_clear_image(cmd_buffer, image, imageLayout,
+ (const VkClearValue *) pDepthStencil,
+ rangeCount, pRanges);
+
+ meta_clear_end(&saved_state, cmd_buffer);
+}
+
+void anv_CmdClearAttachments(
+ VkCommandBuffer commandBuffer,
+ uint32_t attachmentCount,
+ const VkClearAttachment* pAttachments,
+ uint32_t rectCount,
+ const VkClearRect* pRects)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct anv_meta_saved_state saved_state;
+
+ meta_clear_begin(&saved_state, cmd_buffer);
+
+ /* FINISHME: We can do better than this dumb loop. It thrashes too much
+ * state.
+ */
+ for (uint32_t a = 0; a < attachmentCount; ++a) {
+ for (uint32_t r = 0; r < rectCount; ++r) {
+ emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]);
+ }
+ }
+
+ meta_clear_end(&saved_state, cmd_buffer);
+}
+
+static void
+do_buffer_fill(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_bo *dest, uint64_t dest_offset,
+ int width, int height, VkFormat fill_format, uint32_t data)
+{
+ VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+
+ VkImageCreateInfo image_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = fill_format,
+ .extent = {
+ .width = width,
+ .height = height,
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = 1,
+ .tiling = VK_IMAGE_TILING_LINEAR,
+ .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .flags = 0,
+ };
+
+ VkImage dest_image;
+ image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ anv_CreateImage(vk_device, &image_info,
+ &cmd_buffer->pool->alloc, &dest_image);
+
+ /* We could use a vk call to bind memory, but that would require
+ * creating a dummy memory object etc. so there's really no point.
+ */
+ anv_image_from_handle(dest_image)->bo = dest;
+ anv_image_from_handle(dest_image)->offset = dest_offset;
+
+ const VkClearValue clear_value = {
+ .color = {
+ .uint32 = { data, data, data, data }
+ }
+ };
+
+ const VkImageSubresourceRange range = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ };
+
+ anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image),
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ &clear_value, 1, &range);
+}
+
+void anv_CmdFillBuffer(
+ VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize fillSize,
+ uint32_t data)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
+ struct anv_meta_saved_state saved_state;
+
+ meta_clear_begin(&saved_state, cmd_buffer);
+
+ VkFormat format;
+ int bs;
+ if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) {
+ format = VK_FORMAT_R32G32B32A32_UINT;
+ bs = 16;
+ } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) {
+ format = VK_FORMAT_R32G32_UINT;
+ bs = 8;
+ } else {
+ assert((fillSize & 3) == 0 && (dstOffset & 3) == 0);
+ format = VK_FORMAT_R32_UINT;
+ bs = 4;
+ }
+
+ /* This is maximum possible width/height our HW can handle */
+ const uint64_t max_surface_dim = 1 << 14;
+
+ /* First, we make a bunch of max-sized copies */
+ const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs;
+ while (fillSize > max_fill_size) {
+ do_buffer_fill(cmd_buffer, dst_buffer->bo,
+ dst_buffer->offset + dstOffset,
+ max_surface_dim, max_surface_dim, format, data);
+ fillSize -= max_fill_size;
+ dstOffset += max_fill_size;
+ }
+
+ uint64_t height = fillSize / (max_surface_dim * bs);
+ assert(height < max_surface_dim);
+ if (height != 0) {
+ const uint64_t rect_fill_size = height * max_surface_dim * bs;
+ do_buffer_fill(cmd_buffer, dst_buffer->bo,
+ dst_buffer->offset + dstOffset,
+ max_surface_dim, height, format, data);
+ fillSize -= rect_fill_size;
+ dstOffset += rect_fill_size;
+ }
+
+ if (fillSize != 0) {
+ do_buffer_fill(cmd_buffer, dst_buffer->bo,
+ dst_buffer->offset + dstOffset,
+ fillSize / bs, 1, format, data);
+ }
+
+ meta_clear_end(&saved_state, cmd_buffer);
+}
--- /dev/null
- #include "glsl/nir/nir_builder.h"
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include "anv_meta.h"
+#include "anv_private.h"
++#include "nir/nir_builder.h"
+
+/**
+ * Vertex attributes used by all pipelines.
+ */
+struct vertex_attrs {
+ struct anv_vue_header vue_header;
+ float position[2]; /**< 3DPRIM_RECTLIST */
+ float tex_position[2];
+};
+
+static void
+meta_resolve_save(struct anv_meta_saved_state *saved_state,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ anv_meta_save(saved_state, cmd_buffer,
+ (1 << VK_DYNAMIC_STATE_VIEWPORT) |
+ (1 << VK_DYNAMIC_STATE_SCISSOR));
+
+ cmd_buffer->state.dynamic.viewport.count = 0;
+ cmd_buffer->state.dynamic.scissor.count = 0;
+}
+
+static void
+meta_resolve_restore(struct anv_meta_saved_state *saved_state,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ anv_meta_restore(saved_state, cmd_buffer);
+}
+
+static VkPipeline *
+get_pipeline_h(struct anv_device *device, uint32_t samples)
+{
+ uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */
+
+ assert(samples >= 2);
+ assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines));
+
+ return &device->meta_state.resolve.pipelines[i];
+}
+
+static nir_shader *
+build_nir_vs(void)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+
+ nir_builder b;
+ nir_variable *a_position;
+ nir_variable *v_position;
+ nir_variable *a_tex_position;
+ nir_variable *v_tex_position;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
+
+ a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "a_position");
+ a_position->data.location = VERT_ATTRIB_GENERIC0;
+
+ v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ "gl_Position");
+ v_position->data.location = VARYING_SLOT_POS;
+
+ a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "a_tex_position");
+ a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
+
+ v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ "v_tex_position");
+ v_tex_position->data.location = VARYING_SLOT_VAR0;
+
+ nir_copy_var(&b, v_position, a_position);
+ nir_copy_var(&b, v_tex_position, a_tex_position);
+
+ return b.shader;
+}
+
+static nir_shader *
+build_nir_fs(uint32_t num_samples)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+
+ const struct glsl_type *sampler2DMS =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+ /*is_shadow*/ false,
+ /*is_array*/ false,
+ GLSL_TYPE_FLOAT);
+
+ nir_builder b;
+ nir_variable *u_tex; /* uniform sampler */
+ nir_variable *v_position; /* vec4, varying fragment position */
+ nir_variable *v_tex_position; /* vec4, varying texture coordinate */
+ nir_variable *f_color; /* vec4, fragment output color */
+ nir_ssa_def *accum; /* vec4, accumulation of sample values */
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ b.shader->info.name = ralloc_asprintf(b.shader,
+ "meta_resolve_fs_samples%02d",
+ num_samples);
+
+ u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS,
+ "u_tex");
+ u_tex->data.descriptor_set = 0;
+ u_tex->data.binding = 0;
+
+ v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "v_position");
+ v_position->data.location = VARYING_SLOT_POS;
+
+ v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "v_tex_position");
+ v_tex_position->data.location = VARYING_SLOT_VAR0;
+
+ f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ "f_color");
+ f_color->data.location = FRAG_RESULT_DATA0;
+
+ accum = nir_imm_vec4(&b, 0, 0, 0, 0);
+
+ nir_ssa_def *tex_position_ivec =
+ nir_f2i(&b, nir_load_var(&b, v_tex_position));
+
+ for (uint32_t i = 0; i < num_samples; ++i) {
+ nir_tex_instr *tex;
+
+ tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2);
+ tex->sampler = nir_deref_var_create(tex, u_tex);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ tex->src[0].src = nir_src_for_ssa(tex_position_ivec);
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->dest_type = nir_type_float;
+ tex->is_array = false;
+ tex->coord_components = 3;
+ nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ accum = nir_fadd(&b, accum, &tex->dest.ssa);
+ }
+
+ accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples));
+ nir_store_var(&b, f_color, accum, /*writemask*/ 4);
+
+ return b.shader;
+}
+
+static VkResult
+create_pass(struct anv_device *device)
+{
+ VkResult result;
+ VkDevice device_h = anv_device_to_handle(device);
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+
+ result = anv_CreateRenderPass(device_h,
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ .samples = 1,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 0,
+ },
+ alloc,
+ &device->meta_state.resolve.pass);
+
+ return result;
+}
+
+static VkResult
+create_pipeline(struct anv_device *device,
+ uint32_t num_samples,
+ VkShaderModule vs_module_h)
+{
+ VkResult result;
+ VkDevice device_h = anv_device_to_handle(device);
+
+ struct anv_shader_module fs_module = {
+ .nir = build_nir_fs(num_samples),
+ };
+
+ if (!fs_module.nir) {
+ /* XXX: Need more accurate error */
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto cleanup;
+ }
+
+ result = anv_graphics_pipeline_create(device_h,
+ VK_NULL_HANDLE,
+ &(VkGraphicsPipelineCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages = (VkPipelineShaderStageCreateInfo[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vs_module_h,
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = anv_shader_module_to_handle(&fs_module),
+ .pName = "main",
+ },
+ },
+ .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ {
+ .binding = 0,
+ .stride = sizeof(struct vertex_attrs),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ },
+ .vertexAttributeDescriptionCount = 3,
+ .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ {
+ /* VUE Header */
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_UINT,
+ .offset = offsetof(struct vertex_attrs, vue_header),
+ },
+ {
+ /* Position */
+ .location = 1,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct vertex_attrs, position),
+ },
+ {
+ /* Texture Coordinate */
+ .location = 2,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct vertex_attrs, tex_position),
+ },
+ },
+ },
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = false,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]) { 0x1 },
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT |
+ VK_COLOR_COMPONENT_A_BIT,
+ },
+ },
+ },
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates = (VkDynamicState[]) {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = device->meta_state.resolve.pipeline_layout,
+ .renderPass = device->meta_state.resolve.pass,
+ .subpass = 0,
+ },
+ &(struct anv_graphics_pipeline_create_info) {
+ .color_attachment_count = -1,
+ .use_repclear = false,
+ .disable_viewport = true,
+ .disable_scissor = true,
+ .disable_vs = true,
+ .use_rectlist = true
+ },
+ &device->meta_state.alloc,
+ get_pipeline_h(device, num_samples));
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ goto cleanup;
+
+cleanup:
+ ralloc_free(fs_module.nir);
+ return result;
+}
+
+void
+anv_device_finish_meta_resolve_state(struct anv_device *device)
+{
+ struct anv_meta_state *state = &device->meta_state;
+ VkDevice device_h = anv_device_to_handle(device);
+ VkRenderPass pass_h = device->meta_state.resolve.pass;
+ VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout;
+ VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout;
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+
+ if (pass_h)
+ ANV_CALL(DestroyRenderPass)(device_h, pass_h,
+ &device->meta_state.alloc);
+
+ if (pipeline_layout_h)
+ ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc);
+
+ if (ds_layout_h)
+ ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc);
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) {
+ VkPipeline pipeline_h = state->resolve.pipelines[i];
+
+ if (pipeline_h) {
+ ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc);
+ }
+ }
+}
+
+VkResult
+anv_device_init_meta_resolve_state(struct anv_device *device)
+{
+ VkResult res = VK_SUCCESS;
+ VkDevice device_h = anv_device_to_handle(device);
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+
+ const isl_sample_count_mask_t sample_count_mask =
+ isl_device_get_sample_counts(&device->isl_dev);
+
+ zero(device->meta_state.resolve);
+
+ struct anv_shader_module vs_module = { .nir = build_nir_vs() };
+ if (!vs_module.nir) {
+ /* XXX: Need more accurate error */
+ res = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail;
+ }
+
+ VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module);
+
+ res = anv_CreateDescriptorSetLayout(device_h,
+ &(VkDescriptorSetLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ },
+ },
+ },
+ alloc,
+ &device->meta_state.resolve.ds_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = anv_CreatePipelineLayout(device_h,
+ &(VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = (VkDescriptorSetLayout[]) {
+ device->meta_state.resolve.ds_layout,
+ },
+ },
+ alloc,
+ &device->meta_state.resolve.pipeline_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_pass(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ for (uint32_t i = 0;
+ i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) {
+
+ uint32_t sample_count = 1 << (1 + i);
+ if (!(sample_count_mask & sample_count))
+ continue;
+
+ res = create_pipeline(device, sample_count, vs_module_h);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ goto cleanup;
+
+fail:
+ anv_device_finish_meta_resolve_state(device);
+
+cleanup:
+ ralloc_free(vs_module.nir);
+
+ return res;
+}
+
+static void
+emit_resolve(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_image_view *src_iview,
+ const VkOffset2D *src_offset,
+ struct anv_image_view *dest_iview,
+ const VkOffset2D *dest_offset,
+ const VkExtent2D *resolve_extent)
+{
+ struct anv_device *device = cmd_buffer->device;
+ VkDevice device_h = anv_device_to_handle(device);
+ VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+ const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct anv_image *src_image = src_iview->image;
+ VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1;
+
+ const struct vertex_attrs vertex_data[3] = {
+ {
+ .vue_header = {0},
+ .position = {
+ dest_offset->x + resolve_extent->width,
+ dest_offset->y + resolve_extent->height,
+ },
+ .tex_position = {
+ src_offset->x + resolve_extent->width,
+ src_offset->y + resolve_extent->height,
+ },
+ },
+ {
+ .vue_header = {0},
+ .position = {
+ dest_offset->x,
+ dest_offset->y + resolve_extent->height,
+ },
+ .tex_position = {
+ src_offset->x,
+ src_offset->y + resolve_extent->height,
+ },
+ },
+ {
+ .vue_header = {0},
+ .position = {
+ dest_offset->x,
+ dest_offset->y,
+ },
+ .tex_position = {
+ src_offset->x,
+ src_offset->y,
+ },
+ },
+ };
+
+ struct anv_state vertex_mem =
+ anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data,
+ sizeof(vertex_data), 16);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = sizeof(vertex_data),
+ .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo,
+ .offset = vertex_mem.offset,
+ };
+
+ VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer);
+
+ anv_CmdBindVertexBuffers(cmd_buffer_h,
+ /*firstBinding*/ 0,
+ /*bindingCount*/ 1,
+ (VkBuffer[]) { vertex_buffer_h },
+ (VkDeviceSize[]) { 0 });
+
+ VkSampler sampler_h;
+ ANV_CALL(CreateSampler)(device_h,
+ &(VkSamplerCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = VK_FILTER_NEAREST,
+ .minFilter = VK_FILTER_NEAREST,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .mipLodBias = 0.0,
+ .anisotropyEnable = false,
+ .compareEnable = false,
+ .minLod = 0.0,
+ .maxLod = 0.0,
+ .unnormalizedCoordinates = false,
+ },
+ &cmd_buffer->pool->alloc,
+ &sampler_h);
+
+ VkDescriptorSet desc_set_h;
+ anv_AllocateDescriptorSets(device_h,
+ &(VkDescriptorSetAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = dummy_desc_pool_h,
+ .descriptorSetCount = 1,
+ .pSetLayouts = (VkDescriptorSetLayout[]) {
+ device->meta_state.blit.ds_layout,
+ },
+ },
+ &desc_set_h);
+
+ ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h);
+
+ anv_UpdateDescriptorSets(device_h,
+ /*writeCount*/ 1,
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = desc_set_h,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = sampler_h,
+ .imageView = anv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ },
+ },
+ },
+ /*copyCount*/ 0,
+ /*copies */ NULL);
+
+ ANV_CALL(CmdSetViewport)(cmd_buffer_h,
+ /*firstViewport*/ 0,
+ /*viewportCount*/ 1,
+ (VkViewport[]) {
+ {
+ .x = 0,
+ .y = 0,
+ .width = fb->width,
+ .height = fb->height,
+ .minDepth = 0.0,
+ .maxDepth = 1.0,
+ },
+ });
+
+ ANV_CALL(CmdSetScissor)(cmd_buffer_h,
+ /*firstScissor*/ 0,
+ /*scissorCount*/ 1,
+ (VkRect2D[]) {
+ {
+ .offset = { 0, 0 },
+ .extent = (VkExtent2D) { fb->width, fb->height },
+ },
+ });
+
+ VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples);
+ ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h);
+
+ if (cmd_buffer->state.pipeline != pipeline) {
+ anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline_h);
+ }
+
+ anv_CmdBindDescriptorSets(cmd_buffer_h,
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.resolve.pipeline_layout,
+ /*firstSet*/ 0,
+ /* setCount */ 1,
+ (VkDescriptorSet[]) {
+ desc_set_h,
+ },
+ /*copyCount*/ 0,
+ /*copies */ NULL);
+
+ ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+
+ /* All objects below are consumed by the draw call. We may safely destroy
+ * them.
+ */
+ anv_descriptor_set_destroy(device, desc_set);
+ anv_DestroySampler(device_h, sampler_h,
+ &cmd_buffer->pool->alloc);
+}
+
+void anv_CmdResolveImage(
+ VkCommandBuffer cmd_buffer_h,
+ VkImage src_image_h,
+ VkImageLayout src_image_layout,
+ VkImage dest_image_h,
+ VkImageLayout dest_image_layout,
+ uint32_t region_count,
+ const VkImageResolve* regions)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h);
+ ANV_FROM_HANDLE(anv_image, src_image, src_image_h);
+ ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h);
+ struct anv_device *device = cmd_buffer->device;
+ struct anv_meta_saved_state state;
+ VkDevice device_h = anv_device_to_handle(device);
+
+ meta_resolve_save(&state, cmd_buffer);
+
+ assert(src_image->samples > 1);
+ assert(dest_image->samples == 1);
+
+ if (src_image->samples >= 16) {
+ /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
+ * glBlitFramebuffer workaround for samples >= 16.
+ */
+ anv_finishme("vkCmdResolveImage: need interpolation workaround when "
+ "samples >= 16");
+ }
+
+ if (src_image->array_size > 1)
+ anv_finishme("vkCmdResolveImage: multisample array images");
+
+ for (uint32_t r = 0; r < region_count; ++r) {
+ const VkImageResolve *region = ®ions[r];
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * - The aspectMask member of srcSubresource and dstSubresource must
+ * only contain VK_IMAGE_ASPECT_COLOR_BIT
+ *
+ * - The layerCount member of srcSubresource and dstSubresource must
+ * match
+ */
+ assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->srcSubresource.layerCount ==
+ region->dstSubresource.layerCount);
+
+ const uint32_t src_base_layer =
+ anv_meta_get_iview_layer(src_image, ®ion->srcSubresource,
+ ®ion->srcOffset);
+
+ const uint32_t dest_base_layer =
+ anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource,
+ ®ion->dstOffset);
+
+ for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
+ ++layer) {
+
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = src_image_h,
+ .viewType = anv_meta_get_view_type(src_image),
+ .format = src_image->format->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ cmd_buffer, 0);
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = dest_image_h,
+ .viewType = anv_meta_get_view_type(dest_image),
+ .format = dest_image->format->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ cmd_buffer, 0);
+
+ VkFramebuffer fb_h;
+ anv_CreateFramebuffer(device_h,
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ anv_image_view_to_handle(&dest_iview),
+ },
+ .width = anv_minify(dest_image->extent.width,
+ region->dstSubresource.mipLevel),
+ .height = anv_minify(dest_image->extent.height,
+ region->dstSubresource.mipLevel),
+ .layers = 1
+ },
+ &cmd_buffer->pool->alloc,
+ &fb_h);
+
+ ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h,
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.resolve.pass,
+ .framebuffer = fb_h,
+ .renderArea = {
+ .offset = {
+ region->dstOffset.x,
+ region->dstOffset.y,
+ },
+ .extent = {
+ region->extent.width,
+ region->extent.height,
+ }
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ VK_SUBPASS_CONTENTS_INLINE);
+
+ emit_resolve(cmd_buffer,
+ &src_iview,
+ &(VkOffset2D) {
+ .x = region->srcOffset.x,
+ .y = region->srcOffset.y,
+ },
+ &dest_iview,
+ &(VkOffset2D) {
+ .x = region->dstOffset.x,
+ .y = region->dstOffset.y,
+ },
+ &(VkExtent2D) {
+ .width = region->extent.width,
+ .height = region->extent.height,
+ });
+
+ ANV_CALL(CmdEndRenderPass)(cmd_buffer_h);
+
+ anv_DestroyFramebuffer(device_h, fb_h,
+ &cmd_buffer->pool->alloc);
+ }
+ }
+
+ meta_resolve_restore(&state, cmd_buffer);
+}
+
+/**
+ * Emit any needed resolves for the current subpass.
+ */
+void
+anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ struct anv_meta_saved_state saved_state;
+
+ /* FINISHME(perf): Skip clears for resolve attachments.
+ *
+ * From the Vulkan 1.0 spec:
+ *
+ * If the first use of an attachment in a render pass is as a resolve
+ * attachment, then the loadOp is effectively ignored as the resolve is
+ * guaranteed to overwrite all pixels in the render area.
+ */
+
+ if (!subpass->has_resolve)
+ return;
+
+ meta_resolve_save(&saved_state, cmd_buffer);
+
+ for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ uint32_t src_att = subpass->color_attachments[i];
+ uint32_t dest_att = subpass->resolve_attachments[i];
+
+ if (dest_att == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct anv_image_view *src_iview = fb->attachments[src_att];
+ struct anv_image_view *dest_iview = fb->attachments[dest_att];
+
+ struct anv_subpass resolve_subpass = {
+ .color_count = 1,
+ .color_attachments = (uint32_t[]) { dest_att },
+ .depth_stencil_attachment = VK_ATTACHMENT_UNUSED,
+ };
+
+ anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+ /* Subpass resolves must respect the render area. We can ignore the
+ * render area here because vkCmdBeginRenderPass set the render area
+ * with 3DSTATE_DRAWING_RECTANGLE.
+ *
+ * XXX(chadv): Does the hardware really respect
+ * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
+ */
+ emit_resolve(cmd_buffer,
+ src_iview,
+ &(VkOffset2D) { 0, 0 },
+ dest_iview,
+ &(VkOffset2D) { 0, 0 },
+ &(VkExtent2D) { fb->width, fb->height });
+ }
+
+ cmd_buffer->state.subpass = subpass;
+ meta_resolve_restore(&saved_state, cmd_buffer);
+}
--- /dev/null
- #include "glsl/nir/nir.h"
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
++#include "nir/nir.h"
+#include "anv_private.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar);
+
+void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ nir_shader *shader,
+ struct brw_stage_prog_data *prog_data);
+bool anv_nir_apply_pipeline_layout(nir_shader *shader,
+ struct brw_stage_prog_data *prog_data,
+ const struct anv_pipeline_layout *layout);
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
- #include "glsl/nir/nir_builder.h"
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
++#include "nir/nir_builder.h"
+
+struct apply_dynamic_offsets_state {
+ nir_shader *shader;
+ nir_builder builder;
+
+ struct anv_pipeline_layout *layout;
+
+ uint32_t indices_start;
+};
+
+static bool
+apply_dynamic_offsets_block(nir_block *block, void *void_state)
+{
+ struct apply_dynamic_offsets_state *state = void_state;
+ struct anv_descriptor_set_layout *set_layout;
+
+ nir_builder *b = &state->builder;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ unsigned block_idx_src;
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ssbo:
+ block_idx_src = 0;
+ break;
+ case nir_intrinsic_store_ssbo:
+ block_idx_src = 1;
+ break;
+ default:
+ continue; /* the loop */
+ }
+
+ nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
+ assert(res_instr->type == nir_instr_type_intrinsic);
+ nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
+ assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+
+ unsigned set = res_intrin->const_index[0];
+ unsigned binding = res_intrin->const_index[1];
+
+ set_layout = state->layout->set[set].layout;
+ if (set_layout->binding[binding].dynamic_offset_index < 0)
+ continue;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ /* First, we need to generate the uniform load for the buffer offset */
+ uint32_t index = state->layout->set[set].dynamic_offset_start +
+ set_layout->binding[binding].dynamic_offset_index;
+
+ nir_intrinsic_instr *offset_load =
+ nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
+ offset_load->num_components = 2;
+ offset_load->const_index[0] = state->indices_start + index * 8;
+ offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
+ nir_imm_int(b, 8)));
+
+ nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL);
+ nir_builder_instr_insert(b, &offset_load->instr);
+
+ nir_src *offset_src = nir_get_io_offset_src(intrin);
+ nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
+ &offset_load->dest.ssa);
+
+ /* In order to avoid out-of-bounds access, we predicate */
+ nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1),
+ offset_src->ssa);
+ nir_if *if_stmt = nir_if_create(b->shader);
+ if_stmt->condition = nir_src_for_ssa(pred);
+ nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
+
+ nir_instr_remove(&intrin->instr);
+ *offset_src = nir_src_for_ssa(new_offset);
+ nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr);
+
+ if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
+ /* It's a load, we need a phi node */
+ nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+ nir_ssa_dest_init(&phi->instr, &phi->dest,
+ intrin->num_components, NULL);
+
+ nir_phi_src *src1 = ralloc(phi, nir_phi_src);
+ struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
+ src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
+ src1->src = nir_src_for_ssa(&intrin->dest.ssa);
+ exec_list_push_tail(&phi->srcs, &src1->node);
+
+ b->cursor = nir_after_cf_list(&if_stmt->else_list);
+ nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
+ (nir_const_value) { .u = { 0, 0, 0, 0 } });
+
+ nir_phi_src *src2 = ralloc(phi, nir_phi_src);
+ struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
+ src2->pred = exec_node_data(nir_block, enode, cf_node.node);
+ src2->src = nir_src_for_ssa(zero);
+ exec_list_push_tail(&phi->srcs, &src2->node);
+
+ assert(intrin->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&phi->dest.ssa));
+
+ nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
+ }
+ }
+
+ return true;
+}
+
+void
+anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ nir_shader *shader,
+ struct brw_stage_prog_data *prog_data)
+{
+ struct apply_dynamic_offsets_state state = {
+ .shader = shader,
+ .layout = pipeline->layout,
+ .indices_start = shader->num_uniforms,
+ };
+
+ if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets)
+ return;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_builder_init(&state.builder, function->impl);
+ nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state);
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+
+ struct anv_push_constants *null_data = NULL;
+ for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
+ prog_data->param[i * 2 + shader->num_uniforms] =
+ (const union gl_constant_value *)&null_data->dynamic[i].offset;
+ prog_data->param[i * 2 + 1 + shader->num_uniforms] =
+ (const union gl_constant_value *)&null_data->dynamic[i].range;
+ }
+
+ shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8;
+}
--- /dev/null
- #include "glsl/nir/nir_builder.h"
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "program/prog_parameter.h"
++#include "nir/nir_builder.h"
+
+struct apply_pipeline_layout_state {
+ nir_shader *shader;
+ nir_builder builder;
+
+ const struct anv_pipeline_layout *layout;
+
+ bool progress;
+};
+
+static uint32_t
+get_surface_index(unsigned set, unsigned binding,
+ struct apply_pipeline_layout_state *state)
+{
+ assert(set < state->layout->num_sets);
+ struct anv_descriptor_set_layout *set_layout =
+ state->layout->set[set].layout;
+
+ gl_shader_stage stage = state->shader->stage;
+
+ assert(binding < set_layout->binding_count);
+
+ assert(set_layout->binding[binding].stage[stage].surface_index >= 0);
+
+ uint32_t surface_index =
+ state->layout->set[set].stage[stage].surface_start +
+ set_layout->binding[binding].stage[stage].surface_index;
+
+ assert(surface_index < state->layout->stage[stage].surface_count);
+
+ return surface_index;
+}
+
+static uint32_t
+get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op,
+ struct apply_pipeline_layout_state *state)
+{
+ assert(set < state->layout->num_sets);
+ struct anv_descriptor_set_layout *set_layout =
+ state->layout->set[set].layout;
+
+ assert(binding < set_layout->binding_count);
+
+ gl_shader_stage stage = state->shader->stage;
+
+ if (set_layout->binding[binding].stage[stage].sampler_index < 0) {
+ assert(tex_op == nir_texop_txf);
+ return 0;
+ }
+
+ uint32_t sampler_index =
+ state->layout->set[set].stage[stage].sampler_start +
+ set_layout->binding[binding].stage[stage].sampler_index;
+
+ assert(sampler_index < state->layout->stage[stage].sampler_count);
+
+ return sampler_index;
+}
+
+static uint32_t
+get_image_index(unsigned set, unsigned binding,
+ struct apply_pipeline_layout_state *state)
+{
+ assert(set < state->layout->num_sets);
+ struct anv_descriptor_set_layout *set_layout =
+ state->layout->set[set].layout;
+
+ assert(binding < set_layout->binding_count);
+
+ gl_shader_stage stage = state->shader->stage;
+
+ assert(set_layout->binding[binding].stage[stage].image_index >= 0);
+
+ uint32_t image_index =
+ state->layout->set[set].stage[stage].image_start +
+ set_layout->binding[binding].stage[stage].image_index;
+
+ assert(image_index < state->layout->stage[stage].image_count);
+
+ return image_index;
+}
+
+static void
+lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
+ struct apply_pipeline_layout_state *state)
+{
+ nir_builder *b = &state->builder;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ uint32_t set = intrin->const_index[0];
+ uint32_t binding = intrin->const_index[1];
+
+ uint32_t surface_index = get_surface_index(set, binding, state);
+
+ nir_const_value *const_block_idx =
+ nir_src_as_const_value(intrin->src[0]);
+
+ nir_ssa_def *block_index;
+ if (const_block_idx) {
+ block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]);
+ } else {
+ block_index = nir_iadd(b, nir_imm_int(b, surface_index),
+ nir_ssa_for_src(b, intrin->src[0], 1));
+ }
+
+ assert(intrin->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
+ nir_instr_remove(&intrin->instr);
+}
+
+static void
+lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
+ unsigned *const_index, nir_tex_src_type src_type,
+ struct apply_pipeline_layout_state *state)
+{
+ if (deref->deref.child) {
+ assert(deref->deref.child->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+
+ *const_index += deref_array->base_offset;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
+ tex->num_srcs + 1);
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ new_srcs[i].src_type = tex->src[i].src_type;
+ nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src);
+ }
+
+ ralloc_free(tex->src);
+ tex->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
+ tex->src[tex->num_srcs].src_type = src_type;
+ tex->num_srcs++;
+ assert(deref_array->indirect.is_ssa);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src,
+ deref_array->indirect);
+ }
+ }
+}
+
+static void
+cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
+{
+ if (deref->deref.child == NULL)
+ return;
+
+ nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+
+ if (deref_array->deref_array_type != nir_deref_array_type_indirect)
+ return;
+
+ nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
+}
+
+static void
+lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+{
+ /* No one should have come by and lowered it already */
+ assert(tex->sampler);
+
+ nir_deref_var *tex_deref = tex->texture ? tex->texture : tex->sampler;
+ tex->texture_index =
+ get_surface_index(tex_deref->var->data.descriptor_set,
+ tex_deref->var->data.binding, state);
+ lower_tex_deref(tex, tex_deref, &tex->texture_index,
+ nir_tex_src_texture_offset, state);
+
+ tex->sampler_index =
+ get_sampler_index(tex->sampler->var->data.descriptor_set,
+ tex->sampler->var->data.binding, tex->op, state);
+ lower_tex_deref(tex, tex->sampler, &tex->sampler_index,
+ nir_tex_src_sampler_offset, state);
+
+ /* The backend only ever uses this to mark used surfaces. We don't care
+ * about that little optimization so it just needs to be non-zero.
+ */
+ tex->texture_array_size = 1;
+
+ if (tex->texture)
+ cleanup_tex_deref(tex, tex->texture);
+ cleanup_tex_deref(tex, tex->sampler);
+ tex->texture = NULL;
+ tex->sampler = NULL;
+}
+
+static bool
+apply_pipeline_layout_block(nir_block *block, void *void_state)
+{
+ struct apply_pipeline_layout_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
+ lower_res_index_intrinsic(intrin, state);
+ state->progress = true;
+ }
+ break;
+ }
+ case nir_instr_type_tex:
+ lower_tex(nir_instr_as_tex(instr), state);
+ /* All texture instructions need lowering */
+ state->progress = true;
+ break;
+ default:
+ continue;
+ }
+ }
+
+ return true;
+}
+
+static void
+setup_vec4_uniform_value(const union gl_constant_value **params,
+ const union gl_constant_value *values,
+ unsigned n)
+{
+ static const gl_constant_value zero = { 0 };
+
+ for (unsigned i = 0; i < n; ++i)
+ params[i] = &values[i];
+
+ for (unsigned i = n; i < 4; ++i)
+ params[i] = &zero;
+}
+
+bool
+anv_nir_apply_pipeline_layout(nir_shader *shader,
+ struct brw_stage_prog_data *prog_data,
+ const struct anv_pipeline_layout *layout)
+{
+ struct apply_pipeline_layout_state state = {
+ .shader = shader,
+ .layout = layout,
+ };
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_builder_init(&state.builder, function->impl);
+ nir_foreach_block(function->impl, apply_pipeline_layout_block, &state);
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+
+ if (layout->stage[shader->stage].image_count > 0) {
+ nir_foreach_variable(var, &shader->uniforms) {
+ if (glsl_type_is_image(var->type) ||
+ (glsl_type_is_array(var->type) &&
+ glsl_type_is_image(glsl_get_array_element(var->type)))) {
+ /* Images are represented as uniform push constants and the actual
+ * information required for reading/writing to/from the image is
+ * storred in the uniform.
+ */
+ unsigned image_index = get_image_index(var->data.descriptor_set,
+ var->data.binding, &state);
+
+ var->data.driver_location = shader->num_uniforms +
+ image_index * BRW_IMAGE_PARAM_SIZE * 4;
+ }
+ }
+
+ struct anv_push_constants *null_data = NULL;
+ const gl_constant_value **param = prog_data->param + shader->num_uniforms;
+ const struct brw_image_param *image_param = null_data->images;
+ for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) {
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+ (const union gl_constant_value *)&image_param->surface_idx, 1);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+ (const union gl_constant_value *)image_param->offset, 2);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+ (const union gl_constant_value *)image_param->size, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+ (const union gl_constant_value *)image_param->stride, 4);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+ (const union gl_constant_value *)image_param->tiling, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+ (const union gl_constant_value *)image_param->swizzling, 2);
+
+ param += BRW_IMAGE_PARAM_SIZE;
+ image_param ++;
+ }
+
+ shader->num_uniforms += layout->stage[shader->stage].image_count *
+ BRW_IMAGE_PARAM_SIZE * 4;
+ }
+
+ return state.progress;
+}
--- /dev/null
- #include "glsl/nir/spirv/nir_spirv.h"
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+#include "brw_nir.h"
+#include "anv_nir.h"
++#include "nir/spirv/nir_spirv.h"
+
+/* Needed for SWIZZLE macros */
+#include "program/prog_instruction.h"
+
+// Shader functions
+
+VkResult anv_CreateShaderModule(
+ VkDevice _device,
+ const VkShaderModuleCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkShaderModule* pShaderModule)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_shader_module *module;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
+ assert(pCreateInfo->flags == 0);
+
+ module = anv_alloc2(&device->alloc, pAllocator,
+ sizeof(*module) + pCreateInfo->codeSize, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (module == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ module->nir = NULL;
+ module->size = pCreateInfo->codeSize;
+ memcpy(module->data, pCreateInfo->pCode, module->size);
+
+ *pShaderModule = anv_shader_module_to_handle(module);
+
+ return VK_SUCCESS;
+}
+
+void anv_DestroyShaderModule(
+ VkDevice _device,
+ VkShaderModule _module,
+ const VkAllocationCallbacks* pAllocator)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_shader_module, module, _module);
+
+ anv_free2(&device->alloc, pAllocator, module);
+}
+
+#define SPIR_V_MAGIC_NUMBER 0x07230203
+
+/* Eventually, this will become part of anv_CreateShader. Unfortunately,
+ * we can't do that yet because we don't have the ability to copy nir.
+ */
+static nir_shader *
+anv_shader_compile_to_nir(struct anv_device *device,
+ struct anv_shader_module *module,
+ const char *entrypoint_name,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info)
+{
+ if (strcmp(entrypoint_name, "main") != 0) {
+ anv_finishme("Multiple shaders per module not really supported");
+ }
+
+ const struct brw_compiler *compiler =
+ device->instance->physicalDevice.compiler;
+ const nir_shader_compiler_options *nir_options =
+ compiler->glsl_compiler_options[stage].NirOptions;
+
+ nir_shader *nir;
+ nir_function *entry_point;
+ if (module->nir) {
+ /* Some things such as our meta clear/blit code will give us a NIR
+ * shader directly. In that case, we just ignore the SPIR-V entirely
+ * and just use the NIR shader */
+ nir = module->nir;
+ nir->options = nir_options;
+ nir_validate_shader(nir);
+
+ assert(exec_list_length(&nir->functions) == 1);
+ struct exec_node *node = exec_list_get_head(&nir->functions);
+ entry_point = exec_node_data(nir_function, node, node);
+ } else {
+ uint32_t *spirv = (uint32_t *) module->data;
+ assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
+ assert(module->size % 4 == 0);
+
+ uint32_t num_spec_entries = 0;
+ struct nir_spirv_specialization *spec_entries = NULL;
+ if (spec_info && spec_info->mapEntryCount > 0) {
+ num_spec_entries = spec_info->mapEntryCount;
+ spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
+ for (uint32_t i = 0; i < num_spec_entries; i++) {
+ const uint32_t *data =
+ spec_info->pData + spec_info->pMapEntries[i].offset;
+ assert((const void *)(data + 1) <=
+ spec_info->pData + spec_info->dataSize);
+
+ spec_entries[i].id = spec_info->pMapEntries[i].constantID;
+ spec_entries[i].data = *data;
+ }
+ }
+
+ entry_point = spirv_to_nir(spirv, module->size / 4,
+ spec_entries, num_spec_entries,
+ stage, entrypoint_name, nir_options);
+ nir = entry_point->shader;
+ assert(nir->stage == stage);
+ nir_validate_shader(nir);
+
+ free(spec_entries);
+
+ nir_lower_returns(nir);
+ nir_validate_shader(nir);
+
+ nir_inline_functions(nir);
+ nir_validate_shader(nir);
+
+ /* Pick off the single entrypoint that we want */
+ foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+ if (func != entry_point)
+ exec_node_remove(&func->node);
+ }
+ assert(exec_list_length(&nir->functions) == 1);
+ entry_point->name = ralloc_strdup(entry_point, "main");
+
+ nir_remove_dead_variables(nir, nir_var_shader_in);
+ nir_remove_dead_variables(nir, nir_var_shader_out);
+ nir_remove_dead_variables(nir, nir_var_system_value);
+ nir_validate_shader(nir);
+
+ nir_lower_outputs_to_temporaries(entry_point->shader, entry_point);
+
+ nir_lower_system_values(nir);
+ nir_validate_shader(nir);
+ }
+
+ /* Vulkan uses the separate-shader linking model */
+ nir->info.separate_shader = true;
+
+ nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]);
+
+ nir_shader_gather_info(nir, entry_point->impl);
+
+ uint32_t indirect_mask = 0;
+ if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
+ indirect_mask |= (1 << nir_var_shader_in);
+ if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
+ indirect_mask |= 1 << nir_var_local;
+
+ nir_lower_indirect_derefs(nir, indirect_mask);
+
+ return nir;
+}
+
+void
+anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
+ struct anv_device *device)
+{
+ cache->device = device;
+ anv_state_stream_init(&cache->program_stream,
+ &device->instruction_block_pool);
+ pthread_mutex_init(&cache->mutex, NULL);
+}
+
+void
+anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
+{
+ anv_state_stream_finish(&cache->program_stream);
+ pthread_mutex_destroy(&cache->mutex);
+}
+
+static uint32_t
+anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+ const void *data, size_t size)
+{
+ pthread_mutex_lock(&cache->mutex);
+
+ struct anv_state state =
+ anv_state_stream_alloc(&cache->program_stream, size, 64);
+
+ pthread_mutex_unlock(&cache->mutex);
+
+ assert(size < cache->program_stream.block_pool->block_size);
+
+ memcpy(state.map, data, size);
+
+ if (!cache->device->info.has_llc)
+ anv_state_clflush(state);
+
+ return state.offset;
+}
+
+VkResult anv_CreatePipelineCache(
+ VkDevice _device,
+ const VkPipelineCacheCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkPipelineCache* pPipelineCache)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_pipeline_cache *cache;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
+ assert(pCreateInfo->flags == 0);
+
+ cache = anv_alloc2(&device->alloc, pAllocator,
+ sizeof(*cache), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (cache == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ anv_pipeline_cache_init(cache, device);
+
+ *pPipelineCache = anv_pipeline_cache_to_handle(cache);
+
+ return VK_SUCCESS;
+}
+
+void anv_DestroyPipelineCache(
+ VkDevice _device,
+ VkPipelineCache _cache,
+ const VkAllocationCallbacks* pAllocator)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
+
+ anv_pipeline_cache_finish(cache);
+
+ anv_free2(&device->alloc, pAllocator, cache);
+}
+
+VkResult anv_GetPipelineCacheData(
+ VkDevice device,
+ VkPipelineCache pipelineCache,
+ size_t* pDataSize,
+ void* pData)
+{
+ *pDataSize = 0;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_MergePipelineCaches(
+ VkDevice device,
+ VkPipelineCache destCache,
+ uint32_t srcCacheCount,
+ const VkPipelineCache* pSrcCaches)
+{
+ stub_return(VK_SUCCESS);
+}
+
+void anv_DestroyPipeline(
+ VkDevice _device,
+ VkPipeline _pipeline,
+ const VkAllocationCallbacks* pAllocator)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
+
+ anv_reloc_list_finish(&pipeline->batch_relocs,
+ pAllocator ? pAllocator : &device->alloc);
+ if (pipeline->blend_state.map)
+ anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+ anv_free2(&device->alloc, pAllocator, pipeline);
+}
+
+static const uint32_t vk_to_gen_primitive_type[] = {
+ [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+ [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+ [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */
+};
+
+static void
+populate_sampler_prog_key(const struct brw_device_info *devinfo,
+ struct brw_sampler_prog_key_data *key)
+{
+ /* XXX: Handle texture swizzle on HSW- */
+ for (int i = 0; i < MAX_SAMPLERS; i++) {
+ /* Assume color sampler, no swizzling. (Works for BDW+) */
+ key->swizzles[i] = SWIZZLE_XYZW;
+ }
+}
+
+static void
+populate_vs_prog_key(const struct brw_device_info *devinfo,
+ struct brw_vs_prog_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ populate_sampler_prog_key(devinfo, &key->tex);
+
+ /* XXX: Handle vertex input work-arounds */
+
+ /* XXX: Handle sampler_prog_key */
+}
+
+static void
+populate_gs_prog_key(const struct brw_device_info *devinfo,
+ struct brw_gs_prog_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ populate_sampler_prog_key(devinfo, &key->tex);
+}
+
+static void
+populate_wm_prog_key(const struct brw_device_info *devinfo,
+ const VkGraphicsPipelineCreateInfo *info,
+ const struct anv_graphics_pipeline_create_info *extra,
+ struct brw_wm_prog_key *key)
+{
+ ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
+
+ memset(key, 0, sizeof(*key));
+
+ populate_sampler_prog_key(devinfo, &key->tex);
+
+ /* TODO: Fill out key->input_slots_valid */
+
+ /* Vulkan doesn't specify a default */
+ key->high_quality_derivatives = false;
+
+ /* XXX Vulkan doesn't appear to specify */
+ key->clamp_fragment_color = false;
+
+ /* Vulkan always specifies upper-left coordinates */
+ key->drawable_height = 0;
+ key->render_to_fbo = false;
+
+ if (extra && extra->color_attachment_count >= 0) {
+ key->nr_color_regions = extra->color_attachment_count;
+ } else {
+ key->nr_color_regions =
+ render_pass->subpasses[info->subpass].color_count;
+ }
+
+ key->replicate_alpha = key->nr_color_regions > 1 &&
+ info->pMultisampleState &&
+ info->pMultisampleState->alphaToCoverageEnable;
+
+ if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) {
+ /* We should probably pull this out of the shader, but it's fairly
+ * harmless to compute it and then let dead-code take care of it.
+ */
+ key->persample_shading = info->pMultisampleState->sampleShadingEnable;
+ if (key->persample_shading)
+ key->persample_2x = info->pMultisampleState->rasterizationSamples == 2;
+
+ key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable;
+ key->compute_sample_id = info->pMultisampleState->sampleShadingEnable;
+ }
+}
+
+static void
+populate_cs_prog_key(const struct brw_device_info *devinfo,
+ struct brw_cs_prog_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ populate_sampler_prog_key(devinfo, &key->tex);
+}
+
+static nir_shader *
+anv_pipeline_compile(struct anv_pipeline *pipeline,
+ struct anv_shader_module *module,
+ const char *entrypoint,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info,
+ struct brw_stage_prog_data *prog_data)
+{
+ const struct brw_compiler *compiler =
+ pipeline->device->instance->physicalDevice.compiler;
+
+ nir_shader *nir = anv_shader_compile_to_nir(pipeline->device,
+ module, entrypoint, stage,
+ spec_info);
+ if (nir == NULL)
+ return NULL;
+
+ anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]);
+
+ /* Figure out the number of parameters */
+ prog_data->nr_params = 0;
+
+ if (nir->num_uniforms > 0) {
+ /* If the shader uses any push constants at all, we'll just give
+ * them the maximum possible number
+ */
+ prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
+ }
+
+ if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
+ prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2;
+
+ if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0)
+ prog_data->nr_params += pipeline->layout->stage[stage].image_count *
+ BRW_IMAGE_PARAM_SIZE;
+
+ if (prog_data->nr_params > 0) {
+ /* XXX: I think we're leaking this */
+ prog_data->param = (const union gl_constant_value **)
+ malloc(prog_data->nr_params * sizeof(union gl_constant_value *));
+
+ /* We now set the param values to be offsets into a
+ * anv_push_constant_data structure. Since the compiler doesn't
+ * actually dereference any of the gl_constant_value pointers in the
+ * params array, it doesn't really matter what we put here.
+ */
+ struct anv_push_constants *null_data = NULL;
+ if (nir->num_uniforms > 0) {
+ /* Fill out the push constants section of the param array */
+ for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++)
+ prog_data->param[i] = (const union gl_constant_value *)
+ &null_data->client_data[i * sizeof(float)];
+ }
+ }
+
+ /* Set up dynamic offsets */
+ anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data);
+
+ /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
+ if (pipeline->layout)
+ anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout);
+
+ /* All binding table offsets provided by apply_pipeline_layout() are
+ * relative to the start of the bindint table (plus MAX_RTS for VS).
+ */
+ unsigned bias;
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ bias = MAX_RTS;
+ break;
+ case MESA_SHADER_COMPUTE:
+ bias = 1;
+ break;
+ default:
+ bias = 0;
+ break;
+ }
+ prog_data->binding_table.size_bytes = 0;
+ prog_data->binding_table.texture_start = bias;
+ prog_data->binding_table.ubo_start = bias;
+ prog_data->binding_table.ssbo_start = bias;
+ prog_data->binding_table.image_start = bias;
+
+ /* Finish the optimization and compilation process */
+ nir = brw_nir_lower_io(nir, &pipeline->device->info,
+ compiler->scalar_stage[stage]);
+
+ /* nir_lower_io will only handle the push constants; we need to set this
+ * to the full number of possible uniforms.
+ */
+ nir->num_uniforms = prog_data->nr_params * 4;
+
+ return nir;
+}
+
+static void
+anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
+ gl_shader_stage stage,
+ struct brw_stage_prog_data *prog_data)
+{
+ struct brw_device_info *devinfo = &pipeline->device->info;
+ uint32_t max_threads[] = {
+ [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
+ [MESA_SHADER_TESS_CTRL] = 0,
+ [MESA_SHADER_TESS_EVAL] = 0,
+ [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
+ [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
+ [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads,
+ };
+
+ pipeline->prog_data[stage] = prog_data;
+ pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
+ pipeline->scratch_start[stage] = pipeline->total_scratch;
+ pipeline->total_scratch =
+ align_u32(pipeline->total_scratch, 1024) +
+ prog_data->total_scratch * max_threads[stage];
+}
+
+static VkResult
+anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
+ struct anv_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *info,
+ struct anv_shader_module *module,
+ const char *entrypoint,
+ const VkSpecializationInfo *spec_info)
+{
+ const struct brw_compiler *compiler =
+ pipeline->device->instance->physicalDevice.compiler;
+ struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+ struct brw_vs_prog_key key;
+
+ populate_vs_prog_key(&pipeline->device->info, &key);
+
+ /* TODO: Look up shader in cache */
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ MESA_SHADER_VERTEX, spec_info,
+ &prog_data->base.base);
+ if (nir == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ void *mem_ctx = ralloc_context(NULL);
+
+ if (module->nir == NULL)
+ ralloc_steal(mem_ctx, nir);
+
+ prog_data->inputs_read = nir->info.inputs_read;
+ if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ))
+ pipeline->writes_point_size = true;
+
+ brw_compute_vue_map(&pipeline->device->info,
+ &prog_data->base.vue_map,
+ nir->info.outputs_written,
+ nir->info.separate_shader);
+
+ unsigned code_size;
+ const unsigned *shader_code =
+ brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ NULL, false, -1, &code_size, NULL);
+ if (shader_code == NULL) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ const uint32_t offset =
+ anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
+ pipeline->vs_simd8 = offset;
+ pipeline->vs_vec4 = NO_KERNEL;
+ } else {
+ pipeline->vs_simd8 = NO_KERNEL;
+ pipeline->vs_vec4 = offset;
+ }
+
+ ralloc_free(mem_ctx);
+
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
+ &prog_data->base.base);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
+ struct anv_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *info,
+ struct anv_shader_module *module,
+ const char *entrypoint,
+ const VkSpecializationInfo *spec_info)
+{
+ const struct brw_compiler *compiler =
+ pipeline->device->instance->physicalDevice.compiler;
+ struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data;
+ struct brw_gs_prog_key key;
+
+ populate_gs_prog_key(&pipeline->device->info, &key);
+
+ /* TODO: Look up shader in cache */
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ MESA_SHADER_GEOMETRY, spec_info,
+ &prog_data->base.base);
+ if (nir == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ void *mem_ctx = ralloc_context(NULL);
+
+ if (module->nir == NULL)
+ ralloc_steal(mem_ctx, nir);
+
+ if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ))
+ pipeline->writes_point_size = true;
+
+ brw_compute_vue_map(&pipeline->device->info,
+ &prog_data->base.vue_map,
+ nir->info.outputs_written,
+ nir->info.separate_shader);
+
+ unsigned code_size;
+ const unsigned *shader_code =
+ brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ NULL, -1, &code_size, NULL);
+ if (shader_code == NULL) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ /* TODO: SIMD8 GS */
+ pipeline->gs_kernel =
+ anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ pipeline->gs_vertex_count = nir->info.gs.vertices_in;
+
+ ralloc_free(mem_ctx);
+
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
+ &prog_data->base.base);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
+ struct anv_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *info,
+ const struct anv_graphics_pipeline_create_info *extra,
+ struct anv_shader_module *module,
+ const char *entrypoint,
+ const VkSpecializationInfo *spec_info)
+{
+ const struct brw_compiler *compiler =
+ pipeline->device->instance->physicalDevice.compiler;
+ struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
+ struct brw_wm_prog_key key;
+
+ populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
+
+ if (pipeline->use_repclear)
+ key.nr_color_regions = 1;
+
+ /* TODO: Look up shader in cache */
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ prog_data->binding_table.render_target_start = 0;
+
+ nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ MESA_SHADER_FRAGMENT, spec_info,
+ &prog_data->base);
+ if (nir == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ void *mem_ctx = ralloc_context(NULL);
+
+ if (module->nir == NULL)
+ ralloc_steal(mem_ctx, nir);
+
+ unsigned code_size;
+ const unsigned *shader_code =
+ brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
+ if (shader_code == NULL) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ uint32_t offset =
+ anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ if (prog_data->no_8)
+ pipeline->ps_simd8 = NO_KERNEL;
+ else
+ pipeline->ps_simd8 = offset;
+
+ if (prog_data->no_8 || prog_data->prog_offset_16) {
+ pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
+ } else {
+ pipeline->ps_simd16 = NO_KERNEL;
+ }
+
+ pipeline->ps_ksp2 = 0;
+ pipeline->ps_grf_start2 = 0;
+ if (pipeline->ps_simd8 != NO_KERNEL) {
+ pipeline->ps_ksp0 = pipeline->ps_simd8;
+ pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg;
+ if (pipeline->ps_simd16 != NO_KERNEL) {
+ pipeline->ps_ksp2 = pipeline->ps_simd16;
+ pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16;
+ }
+ } else if (pipeline->ps_simd16 != NO_KERNEL) {
+ pipeline->ps_ksp0 = pipeline->ps_simd16;
+ pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16;
+ }
+
+ ralloc_free(mem_ctx);
+
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
+ &prog_data->base);
+
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
+ struct anv_pipeline_cache *cache,
+ const VkComputePipelineCreateInfo *info,
+ struct anv_shader_module *module,
+ const char *entrypoint,
+ const VkSpecializationInfo *spec_info)
+{
+ const struct brw_compiler *compiler =
+ pipeline->device->instance->physicalDevice.compiler;
+ struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+ struct brw_cs_prog_key key;
+
+ populate_cs_prog_key(&pipeline->device->info, &key);
+
+ /* TODO: Look up shader in cache */
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ prog_data->binding_table.work_groups_start = 0;
+
+ nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ MESA_SHADER_COMPUTE, spec_info,
+ &prog_data->base);
+ if (nir == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ prog_data->base.total_shared = nir->num_shared;
+
+ void *mem_ctx = ralloc_context(NULL);
+
+ if (module->nir == NULL)
+ ralloc_steal(mem_ctx, nir);
+
+ unsigned code_size;
+ const unsigned *shader_code =
+ brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ -1, &code_size, NULL);
+ if (shader_code == NULL) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ pipeline->cs_simd =
+ anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ ralloc_free(mem_ctx);
+
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
+ &prog_data->base);
+
+ return VK_SUCCESS;
+}
+
+static const int gen8_push_size = 32 * 1024;
+
+static void
+gen7_compute_urb_partition(struct anv_pipeline *pipeline)
+{
+ const struct brw_device_info *devinfo = &pipeline->device->info;
+ bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT;
+ unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
+ unsigned vs_entry_size_bytes = vs_size * 64;
+ bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT;
+ unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
+ unsigned gs_entry_size_bytes = gs_size * 64;
+
+ /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
+ *
+ * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
+ * Allocation Size is less than 9 512-bit URB entries.
+ *
+ * Similar text exists for GS.
+ */
+ unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
+ unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
+
+ /* URB allocations must be done in 8k chunks. */
+ unsigned chunk_size_bytes = 8192;
+
+ /* Determine the size of the URB in chunks. */
+ unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
+
+ /* Reserve space for push constants */
+ unsigned push_constant_bytes = gen8_push_size;
+ unsigned push_constant_chunks =
+ push_constant_bytes / chunk_size_bytes;
+
+ /* Initially, assign each stage the minimum amount of URB space it needs,
+ * and make a note of how much additional space it "wants" (the amount of
+ * additional space it could actually make use of).
+ */
+
+ /* VS has a lower limit on the number of URB entries */
+ unsigned vs_chunks =
+ ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes;
+ unsigned vs_wants =
+ ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes - vs_chunks;
+
+ unsigned gs_chunks = 0;
+ unsigned gs_wants = 0;
+ if (gs_present) {
+ /* There are two constraints on the minimum amount of URB space we can
+ * allocate:
+ *
+ * (1) We need room for at least 2 URB entries, since we always operate
+ * the GS in DUAL_OBJECT mode.
+ *
+ * (2) We can't allocate less than nr_gs_entries_granularity.
+ */
+ gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes;
+ gs_wants =
+ ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes - gs_chunks;
+ }
+
+ /* There should always be enough URB space to satisfy the minimum
+ * requirements of each stage.
+ */
+ unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
+ assert(total_needs <= urb_chunks);
+
+ /* Mete out remaining space (if any) in proportion to "wants". */
+ unsigned total_wants = vs_wants + gs_wants;
+ unsigned remaining_space = urb_chunks - total_needs;
+ if (remaining_space > total_wants)
+ remaining_space = total_wants;
+ if (remaining_space > 0) {
+ unsigned vs_additional = (unsigned)
+ round(vs_wants * (((double) remaining_space) / total_wants));
+ vs_chunks += vs_additional;
+ remaining_space -= vs_additional;
+ gs_chunks += remaining_space;
+ }
+
+ /* Sanity check that we haven't over-allocated. */
+ assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
+
+ /* Finally, compute the number of entries that can fit in the space
+ * allocated to each stage.
+ */
+ unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
+ unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
+
+ /* Since we rounded up when computing *_wants, this may be slightly more
+ * than the maximum allowed amount, so correct for that.
+ */
+ nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
+ nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
+
+ /* Ensure that we program a multiple of the granularity. */
+ nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
+ nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
+
+ /* Finally, sanity check to make sure we have at least the minimum number
+ * of entries needed for each stage.
+ */
+ assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
+ if (gs_present)
+ assert(nr_gs_entries >= 2);
+
+ /* Lay out the URB in the following order:
+ * - push constants
+ * - VS
+ * - GS
+ */
+ pipeline->urb.vs_start = push_constant_chunks;
+ pipeline->urb.vs_size = vs_size;
+ pipeline->urb.nr_vs_entries = nr_vs_entries;
+
+ pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
+ pipeline->urb.gs_size = gs_size;
+ pipeline->urb.nr_gs_entries = nr_gs_entries;
+}
+
+static void
+anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
+ ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
+ struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+
+ pipeline->dynamic_state = default_dynamic_state;
+
+ if (pCreateInfo->pDynamicState) {
+ /* Remove all of the states that are marked as dynamic */
+ uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+ for (uint32_t s = 0; s < count; s++)
+ states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
+ }
+
+ struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
+
+ dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+ if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+ typed_memcpy(dynamic->viewport.viewports,
+ pCreateInfo->pViewportState->pViewports,
+ pCreateInfo->pViewportState->viewportCount);
+ }
+
+ dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+ if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+ typed_memcpy(dynamic->scissor.scissors,
+ pCreateInfo->pViewportState->pScissors,
+ pCreateInfo->pViewportState->scissorCount);
+ }
+
+ if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
+ assert(pCreateInfo->pRasterizationState);
+ dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
+ }
+
+ if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
+ assert(pCreateInfo->pRasterizationState);
+ dynamic->depth_bias.bias =
+ pCreateInfo->pRasterizationState->depthBiasConstantFactor;
+ dynamic->depth_bias.clamp =
+ pCreateInfo->pRasterizationState->depthBiasClamp;
+ dynamic->depth_bias.slope =
+ pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
+ }
+
+ if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
+ assert(pCreateInfo->pColorBlendState);
+ typed_memcpy(dynamic->blend_constants,
+ pCreateInfo->pColorBlendState->blendConstants, 4);
+ }
+
+ /* If there is no depthstencil attachment, then don't read
+ * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
+ * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
+ * no need to override the depthstencil defaults in
+ * anv_pipeline::dynamic_state when there is no depthstencil attachment.
+ *
+ * From the Vulkan spec (20 Oct 2015, git-aa308cb):
+ *
+ * pDepthStencilState [...] may only be NULL if renderPass and subpass
+ * specify a subpass that has no depth/stencil attachment.
+ */
+ if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
+ if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
+ assert(pCreateInfo->pDepthStencilState);
+ dynamic->depth_bounds.min =
+ pCreateInfo->pDepthStencilState->minDepthBounds;
+ dynamic->depth_bounds.max =
+ pCreateInfo->pDepthStencilState->maxDepthBounds;
+ }
+
+ if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
+ assert(pCreateInfo->pDepthStencilState);
+ dynamic->stencil_compare_mask.front =
+ pCreateInfo->pDepthStencilState->front.compareMask;
+ dynamic->stencil_compare_mask.back =
+ pCreateInfo->pDepthStencilState->back.compareMask;
+ }
+
+ if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
+ assert(pCreateInfo->pDepthStencilState);
+ dynamic->stencil_write_mask.front =
+ pCreateInfo->pDepthStencilState->front.writeMask;
+ dynamic->stencil_write_mask.back =
+ pCreateInfo->pDepthStencilState->back.writeMask;
+ }
+
+ if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
+ assert(pCreateInfo->pDepthStencilState);
+ dynamic->stencil_reference.front =
+ pCreateInfo->pDepthStencilState->front.reference;
+ dynamic->stencil_reference.back =
+ pCreateInfo->pDepthStencilState->back.reference;
+ }
+ }
+
+ pipeline->dynamic_state_mask = states;
+}
+
+static void
+anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
+{
+ struct anv_render_pass *renderpass = NULL;
+ struct anv_subpass *subpass = NULL;
+
+ /* Assert that all required members of VkGraphicsPipelineCreateInfo are
+ * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section
+ * 4.2 Graphics Pipeline.
+ */
+ assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
+
+ renderpass = anv_render_pass_from_handle(info->renderPass);
+ assert(renderpass);
+
+ if (renderpass != &anv_meta_dummy_renderpass) {
+ assert(info->subpass < renderpass->subpass_count);
+ subpass = &renderpass->subpasses[info->subpass];
+ }
+
+ assert(info->stageCount >= 1);
+ assert(info->pVertexInputState);
+ assert(info->pInputAssemblyState);
+ assert(info->pViewportState);
+ assert(info->pRasterizationState);
+
+ if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
+ assert(info->pDepthStencilState);
+
+ if (subpass && subpass->color_count > 0)
+ assert(info->pColorBlendState);
+
+ for (uint32_t i = 0; i < info->stageCount; ++i) {
+ switch (info->pStages[i].stage) {
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+ assert(info->pTessellationState);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+VkResult
+anv_pipeline_init(struct anv_pipeline *pipeline,
+ struct anv_device *device,
+ struct anv_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct anv_graphics_pipeline_create_info *extra,
+ const VkAllocationCallbacks *alloc)
+{
+ VkResult result;
+
+ anv_validate {
+ anv_pipeline_validate_create_info(pCreateInfo);
+ }
+
+ if (alloc == NULL)
+ alloc = &device->alloc;
+
+ pipeline->device = device;
+ pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
+
+ result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pipeline->batch.alloc = alloc;
+ pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
+ pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
+ pipeline->batch.relocs = &pipeline->batch_relocs;
+
+ anv_pipeline_init_dynamic_state(pipeline, pCreateInfo);
+
+ if (pCreateInfo->pTessellationState)
+ anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO");
+
+ pipeline->use_repclear = extra && extra->use_repclear;
+ pipeline->writes_point_size = false;
+
+ /* When we free the pipeline, we detect stages based on the NULL status
+ * of various prog_data pointers. Make them NULL by default.
+ */
+ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
+ memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
+
+ pipeline->vs_simd8 = NO_KERNEL;
+ pipeline->vs_vec4 = NO_KERNEL;
+ pipeline->gs_kernel = NO_KERNEL;
+ pipeline->ps_ksp0 = NO_KERNEL;
+
+ pipeline->active_stages = 0;
+ pipeline->total_scratch = 0;
+
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ ANV_FROM_HANDLE(anv_shader_module, module,
+ pCreateInfo->pStages[i].module);
+
+ switch (pCreateInfo->pStages[i].stage) {
+ case VK_SHADER_STAGE_VERTEX_BIT:
+ anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module,
+ pCreateInfo->pStages[i].pName,
+ pCreateInfo->pStages[i].pSpecializationInfo);
+ break;
+ case VK_SHADER_STAGE_GEOMETRY_BIT:
+ anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module,
+ pCreateInfo->pStages[i].pName,
+ pCreateInfo->pStages[i].pSpecializationInfo);
+ break;
+ case VK_SHADER_STAGE_FRAGMENT_BIT:
+ anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module,
+ pCreateInfo->pStages[i].pName,
+ pCreateInfo->pStages[i].pSpecializationInfo);
+ break;
+ default:
+ anv_finishme("Unsupported shader stage");
+ }
+ }
+
+ if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
+ /* Vertex is only optional if disable_vs is set */
+ assert(extra->disable_vs);
+ memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
+ }
+
+ gen7_compute_urb_partition(pipeline);
+
+ const VkPipelineVertexInputStateCreateInfo *vi_info =
+ pCreateInfo->pVertexInputState;
+
+ uint64_t inputs_read;
+ if (extra && extra->disable_vs) {
+ /* If the VS is disabled, just assume the user knows what they're
+ * doing and apply the layout blindly. This can only come from
+ * meta, so this *should* be safe.
+ */
+ inputs_read = ~0ull;
+ } else {
+ inputs_read = pipeline->vs_prog_data.inputs_read;
+ }
+
+ pipeline->vb_used = 0;
+ for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
+ const VkVertexInputAttributeDescription *desc =
+ &vi_info->pVertexAttributeDescriptions[i];
+
+ if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location)))
+ pipeline->vb_used |= 1 << desc->binding;
+ }
+
+ for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
+ const VkVertexInputBindingDescription *desc =
+ &vi_info->pVertexBindingDescriptions[i];
+
+ pipeline->binding_stride[desc->binding] = desc->stride;
+
+ /* Step rate is programmed per vertex element (attribute), not
+ * binding. Set up a map of which bindings step per instance, for
+ * reference by vertex element setup. */
+ switch (desc->inputRate) {
+ default:
+ case VK_VERTEX_INPUT_RATE_VERTEX:
+ pipeline->instancing_enable[desc->binding] = false;
+ break;
+ case VK_VERTEX_INPUT_RATE_INSTANCE:
+ pipeline->instancing_enable[desc->binding] = true;
+ break;
+ }
+ }
+
+ const VkPipelineInputAssemblyStateCreateInfo *ia_info =
+ pCreateInfo->pInputAssemblyState;
+ pipeline->primitive_restart = ia_info->primitiveRestartEnable;
+ pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
+
+ if (extra && extra->use_rectlist)
+ pipeline->topology = _3DPRIM_RECTLIST;
+
+ while (anv_block_pool_size(&device->scratch_block_pool) <
+ pipeline->total_scratch)
+ anv_block_pool_alloc(&device->scratch_block_pool);
+
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_graphics_pipeline_create(
+ VkDevice _device,
+ VkPipelineCache _cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct anv_graphics_pipeline_create_info *extra,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipeline)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
+
+ if (cache == NULL)
+ cache = &device->default_pipeline_cache;
+
+ switch (device->info.gen) {
+ case 7:
+ if (device->info.is_haswell)
+ return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ else
+ return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ case 8:
+ return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ case 9:
+ return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ default:
+ unreachable("unsupported gen\n");
+ }
+}
+
+VkResult anv_CreateGraphicsPipelines(
+ VkDevice _device,
+ VkPipelineCache pipelineCache,
+ uint32_t count,
+ const VkGraphicsPipelineCreateInfo* pCreateInfos,
+ const VkAllocationCallbacks* pAllocator,
+ VkPipeline* pPipelines)
+{
+ VkResult result = VK_SUCCESS;
+
+ unsigned i = 0;
+ for (; i < count; i++) {
+ result = anv_graphics_pipeline_create(_device,
+ pipelineCache,
+ &pCreateInfos[i],
+ NULL, pAllocator, &pPipelines[i]);
+ if (result != VK_SUCCESS) {
+ for (unsigned j = 0; j < i; j++) {
+ anv_DestroyPipeline(_device, pPipelines[j], pAllocator);
+ }
+
+ return result;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult anv_compute_pipeline_create(
+ VkDevice _device,
+ VkPipelineCache _cache,
+ const VkComputePipelineCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkPipeline* pPipeline)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
+
+ if (cache == NULL)
+ cache = &device->default_pipeline_cache;
+
+ switch (device->info.gen) {
+ case 7:
+ if (device->info.is_haswell)
+ return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ else
+ return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ case 8:
+ return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ case 9:
+ return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ default:
+ unreachable("unsupported gen\n");
+ }
+}
+
+VkResult anv_CreateComputePipelines(
+ VkDevice _device,
+ VkPipelineCache pipelineCache,
+ uint32_t count,
+ const VkComputePipelineCreateInfo* pCreateInfos,
+ const VkAllocationCallbacks* pAllocator,
+ VkPipeline* pPipelines)
+{
+ VkResult result = VK_SUCCESS;
+
+ unsigned i = 0;
+ for (; i < count; i++) {
+ result = anv_compute_pipeline_create(_device, pipelineCache,
+ &pCreateInfos[i],
+ pAllocator, &pPipelines[i]);
+ if (result != VK_SUCCESS) {
+ for (unsigned j = 0; j < i; j++) {
+ anv_DestroyPipeline(_device, pPipelines[j], pAllocator);
+ }
+
+ return result;
+ }
+ }
+
+ return VK_SUCCESS;
+}